@valescoagency/runway 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -10
- package/dist/commands/doctor.js +203 -2
- package/dist/commands/run.js +70 -15
- package/dist/config.js +53 -61
- package/dist/git.js +43 -29
- package/dist/github.js +136 -21
- package/dist/linear.js +295 -63
- package/dist/orchestrator.js +407 -115
- package/dist/policy.js +76 -0
- package/dist/prompts.js +44 -1
- package/dist/subprocess.js +40 -0
- package/dist/telemetry.js +31 -0
- package/package.json +10 -1
- package/prompts/implement.md +46 -2
- package/templates/Dockerfile.claude-code.base +24 -0
package/dist/orchestrator.js
CHANGED
|
@@ -2,10 +2,27 @@ import { existsSync } from "node:fs";
|
|
|
2
2
|
import { join } from "node:path";
|
|
3
3
|
import { run, claudeCode } from "@ai-hero/sandcastle";
|
|
4
4
|
import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
|
|
5
|
-
import {
|
|
6
|
-
import { implementVars, loadImplementPrompt, loadReviewPrompt, renderPrompt, reviewVars, } from "./prompts.js";
|
|
5
|
+
import { Effect, Redacted } from "effect";
|
|
6
|
+
import { buildIterationSummary, implementVars, loadImplementPrompt, loadReviewPrompt, renderPrompt, reviewVars, tailOfMessage, } from "./prompts.js";
|
|
7
7
|
import { detectBaseBranch } from "./git.js";
|
|
8
|
-
|
|
8
|
+
import { loadPolicy } from "./policy.js";
|
|
9
|
+
import { runExecaScoped } from "./subprocess.js";
|
|
10
|
+
// VA-353: review verdict marker. Global flag because sandcastle
|
|
11
|
+
// appends wrapper output ("Agent stopped", "Capturing session",
|
|
12
|
+
// "Reached max iterations (1).", "Run complete: …") AFTER the agent's
|
|
13
|
+
// final message — so the marker is rarely the last line. We scan
|
|
14
|
+
// every line-start match and keep the LAST one, which is the most
|
|
15
|
+
// recent agent verdict. Standalone-line: ^…$ with /m anchors prevent
|
|
16
|
+
// mid-prose matches like "the reviewer should output REVIEW: APPROVED
|
|
17
|
+
// when…".
|
|
18
|
+
const REVIEW_VERDICT_RE = /^REVIEW:\s*(APPROVED|REJECTED)(?:\s+—\s+(.*))?$/gm;
|
|
19
|
+
// VA-350: impl-pass termination contract. Last `IMPL:` marker line in
|
|
20
|
+
// the agent's output wins (most recent iteration's verdict). DONE →
|
|
21
|
+
// proceed to review; BLOCKED → HITL with reason; CONTINUE or missing →
|
|
22
|
+
// fall through (back-compat). The trailing reason after `—` is
|
|
23
|
+
// captured for BLOCKED.
|
|
24
|
+
const IMPL_VERDICT_RE = /^IMPL:\s*(DONE|BLOCKED|CONTINUE)(?:\s+—\s+(.*))?$/gm;
|
|
25
|
+
const IMPL_COMPLETION_SIGNALS = ["IMPL: DONE", "IMPL: BLOCKED"];
|
|
9
26
|
/**
|
|
10
27
|
* Confirms the cwd looks like a sandcastle-initialised repo. If not,
|
|
11
28
|
* we error early with a clear message rather than letting Sandcastle
|
|
@@ -18,173 +35,438 @@ export function assertSandcastleInitialised(cwd) {
|
|
|
18
35
|
}
|
|
19
36
|
}
|
|
20
37
|
/**
|
|
21
|
-
*
|
|
22
|
-
*
|
|
38
|
+
* VA-358: drains the Linear queue as a single Effect program with
|
|
39
|
+
* spans at every pipeline stage and structured-field logs (issue id,
|
|
40
|
+
* branch, stage, duration). Externally still appears to "do the
|
|
41
|
+
* loop" — `Effect.runPromise` at the cli.ts boundary turns it back
|
|
42
|
+
* into a Promise<OrchestratorResult>.
|
|
43
|
+
*
|
|
44
|
+
* Error channel is `never` because the function folds every per-issue
|
|
45
|
+
* failure into an `IssueOutcome` (revert / errored / HITL) rather than
|
|
46
|
+
* propagating. The whole drain only halts when the queue is empty or
|
|
47
|
+
* `--max` is reached.
|
|
23
48
|
*/
|
|
24
|
-
export
|
|
49
|
+
export const drainQueue = (deps, opts = {}) => Effect.gen(function* () {
|
|
25
50
|
const { config, linear } = deps;
|
|
26
51
|
const max = opts.max ?? Number.POSITIVE_INFINITY;
|
|
27
|
-
let processed = 0;
|
|
28
|
-
let opened = 0;
|
|
29
|
-
let hitl = 0;
|
|
30
|
-
let errored = 0;
|
|
31
52
|
// Resolve the base branch once at startup so every issue in the
|
|
32
53
|
// drain sees the same answer (and so a misconfigured repo fails
|
|
33
54
|
// fast, before we touch any Linear state).
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
55
|
+
const baseBranchResolved = yield* (config.baseBranch
|
|
56
|
+
? Effect.succeed(config.baseBranch)
|
|
57
|
+
: detectBaseBranch(deps.cwd)).pipe(Effect.catchAll((err) => Effect.fail({
|
|
58
|
+
_tag: "BaseBranchDetectionFailed",
|
|
59
|
+
message: err.message,
|
|
60
|
+
})),
|
|
61
|
+
// If base-branch detection fails, the whole drain is dead in the
|
|
62
|
+
// water (we'd diff against an undefined branch). Surface it as a
|
|
63
|
+
// top-level defect; drainQueue's `never` error type is preserved
|
|
64
|
+
// because the unrecoverable failure dies the fiber, not the
|
|
65
|
+
// error channel.
|
|
66
|
+
Effect.orDie);
|
|
67
|
+
yield* Effect.logInfo("base branch resolved").pipe(Effect.annotateLogs({ baseBranch: baseBranchResolved }));
|
|
68
|
+
const policy = loadPolicy(deps.cwd, { allowPathsOverride: opts.allowPaths });
|
|
69
|
+
yield* Effect.logInfo(`policy source: ${policy.source}`);
|
|
70
|
+
const runDeps = {
|
|
71
|
+
...deps,
|
|
72
|
+
baseBranch: baseBranchResolved,
|
|
73
|
+
policy,
|
|
74
|
+
};
|
|
75
|
+
// VA-344: never re-pick an issue in the same invocation, even if
|
|
76
|
+
// VA-342 reverted it to `Todo`. Without this, a deterministic
|
|
77
|
+
// startup failure (broken .env.schema, missing image, expired
|
|
78
|
+
// token) would loop on the same issue until --max was exhausted.
|
|
79
|
+
const seen = new Set();
|
|
80
|
+
const outcomes = [];
|
|
81
|
+
let attempts = 0;
|
|
82
|
+
let opened = 0;
|
|
83
|
+
let hitl = 0;
|
|
84
|
+
let errored = 0;
|
|
85
|
+
while (attempts < max) {
|
|
86
|
+
const queue = yield* linear.fetchReady().pipe(
|
|
87
|
+
// Failure to fetch the queue is fatal to the drain (we can't
|
|
88
|
+
// pick the next issue); die rather than infinite-loop on the
|
|
89
|
+
// same error.
|
|
90
|
+
Effect.orDie);
|
|
91
|
+
const issue = queue.find((i) => !seen.has(i.id));
|
|
92
|
+
if (!issue)
|
|
40
93
|
break;
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
94
|
+
seen.add(issue.id);
|
|
95
|
+
attempts += 1;
|
|
96
|
+
const branch = `agent/${issue.identifier.toLowerCase()}`;
|
|
97
|
+
const processed = yield* processIssue(issue, runDeps).pipe(Effect.either, Effect.withSpan("processIssue", {
|
|
98
|
+
attributes: {
|
|
99
|
+
"runway.issue.identifier": issue.identifier,
|
|
100
|
+
"runway.issue.id": issue.id,
|
|
101
|
+
"runway.branch": branch,
|
|
102
|
+
},
|
|
103
|
+
}), Effect.annotateLogs({
|
|
104
|
+
issue: issue.identifier,
|
|
105
|
+
branch,
|
|
106
|
+
}));
|
|
107
|
+
if (processed._tag === "Right") {
|
|
108
|
+
const result = processed.right;
|
|
109
|
+
if (result.kind === "opened")
|
|
46
110
|
opened += 1;
|
|
47
|
-
if (
|
|
111
|
+
if (result.kind === "hitl")
|
|
48
112
|
hitl += 1;
|
|
113
|
+
outcomes.push({
|
|
114
|
+
identifier: issue.identifier,
|
|
115
|
+
kind: result.kind,
|
|
116
|
+
detail: result.detail,
|
|
117
|
+
});
|
|
49
118
|
}
|
|
50
|
-
|
|
119
|
+
else {
|
|
51
120
|
errored += 1;
|
|
52
|
-
|
|
121
|
+
const errDetail = errMsg(processed.left);
|
|
122
|
+
yield* Effect.logError(`error on ${issue.identifier}`).pipe(Effect.annotateLogs({
|
|
123
|
+
issue: issue.identifier,
|
|
124
|
+
error: errDetail,
|
|
125
|
+
}));
|
|
53
126
|
// If the agent crashed before producing any commits (missing
|
|
54
127
|
// image, varlock validation, container failed to boot, etc.),
|
|
55
|
-
// it's an infrastructure failure — not a HITL. Revert the
|
|
56
|
-
// to
|
|
57
|
-
// can pick it up cleanly. `In Progress` is reserved
|
|
58
|
-
// has committed to the branch".
|
|
59
|
-
const
|
|
60
|
-
const startedRealWork = await hasCommits(deps.cwd, baseBranch, branch);
|
|
128
|
+
// it's an infrastructure failure — not a HITL. Revert the
|
|
129
|
+
// issue to the ready state and skip the HITL label so the
|
|
130
|
+
// next run can pick it up cleanly. `In Progress` is reserved
|
|
131
|
+
// for "agent has committed to the branch".
|
|
132
|
+
const startedRealWork = yield* hasCommits(deps.cwd, baseBranchResolved, branch).pipe(Effect.catchAll(() => Effect.succeed(false)));
|
|
61
133
|
if (!startedRealWork) {
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
134
|
+
yield* runSwallow(linear.transition(issue.id, config.readyStatus), `${issue.identifier}: revert transition to ${config.readyStatus}`);
|
|
135
|
+
yield* runSwallow(linear.comment(issue.id, `Runway hit a startup failure before the agent produced any commits — reverting to \`${config.readyStatus}\` for retry:\n\n\`\`\`\n${errDetail}\n\`\`\``), `${issue.identifier}: revert-comment`);
|
|
136
|
+
outcomes.push({
|
|
137
|
+
identifier: issue.identifier,
|
|
138
|
+
kind: "reverted",
|
|
139
|
+
detail: errDetail,
|
|
140
|
+
});
|
|
68
141
|
}
|
|
69
142
|
else {
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
143
|
+
// VA-355: comment first with the substantive reason, label
|
|
144
|
+
// second (best-effort). If we labeled first and the label
|
|
145
|
+
// didn't exist (Flightplan workspaces hitting the
|
|
146
|
+
// `needs-human` default — see VA-354), the orchestrator's
|
|
147
|
+
// catch would never get to the reason and the operator
|
|
148
|
+
// would see an infrastructure error in Linear with no clue
|
|
149
|
+
// what the agent actually found.
|
|
150
|
+
yield* flagHitl(issue, runDeps, `Runway hit an unrecoverable error and flagged for human review: ${errDetail}`);
|
|
151
|
+
outcomes.push({
|
|
152
|
+
identifier: issue.identifier,
|
|
153
|
+
kind: "errored",
|
|
154
|
+
detail: errDetail,
|
|
155
|
+
});
|
|
76
156
|
}
|
|
77
157
|
}
|
|
78
158
|
}
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
159
|
+
yield* printExitSummary(outcomes);
|
|
160
|
+
return {
|
|
161
|
+
attempts,
|
|
162
|
+
opened,
|
|
163
|
+
hitl,
|
|
164
|
+
errored,
|
|
165
|
+
outcomes,
|
|
166
|
+
};
|
|
167
|
+
}).pipe(Effect.withSpan("drainQueue"));
|
|
168
|
+
const processIssue = (issue, deps) => Effect.gen(function* () {
|
|
82
169
|
const { config, linear, github, cwd, baseBranch } = deps;
|
|
83
170
|
const branch = `agent/${issue.identifier.toLowerCase()}`;
|
|
84
|
-
|
|
85
|
-
|
|
171
|
+
yield* linear.transition(issue.id, config.inProgressStatus);
|
|
172
|
+
yield* linear.comment(issue.id, `Runway picked up this issue. Branch: \`${branch}\`.`);
|
|
86
173
|
// 1. Implementation pass.
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
174
|
+
//
|
|
175
|
+
// VA-349 + VA-350: run iterations one at a time so we can (a)
|
|
176
|
+
// inject a summary of the previous iteration into the next prompt
|
|
177
|
+
// — no more "I'll start by understanding the current state of the
|
|
178
|
+
// repository" 5x per issue — and (b) break early on IMPL:
|
|
179
|
+
// DONE/BLOCKED parsed from our own code rather than relying on
|
|
180
|
+
// sandcastle's substring completionSignal.
|
|
181
|
+
const implementTemplate = yield* Effect.promise(() => loadImplementPrompt());
|
|
182
|
+
const maxIters = Math.max(1, config.maxIterations);
|
|
183
|
+
let prevSummary = "";
|
|
184
|
+
let implementResult;
|
|
185
|
+
let implVerdict = { kind: "missing" };
|
|
186
|
+
for (let iter = 1; iter <= maxIters; iter += 1) {
|
|
187
|
+
const implementPrompt = renderPrompt(implementTemplate, implementVars(issue, {
|
|
188
|
+
previousIterations: prevSummary,
|
|
189
|
+
policy: deps.policy,
|
|
190
|
+
}));
|
|
191
|
+
implementResult = yield* runSandcastle({
|
|
192
|
+
agent: claudeCode("claude-opus-4-6"),
|
|
193
|
+
sandbox: docker({ env: dockerEnv(config) }),
|
|
194
|
+
cwd,
|
|
195
|
+
prompt: implementPrompt,
|
|
196
|
+
branchStrategy: { type: "branch", branch },
|
|
197
|
+
maxIterations: 1,
|
|
198
|
+
completionSignal: [...IMPL_COMPLETION_SIGNALS],
|
|
199
|
+
name: `impl-${issue.identifier}-iter-${iter}`,
|
|
200
|
+
}).pipe(Effect.withSpan(`impl-iter-${iter}`, {
|
|
201
|
+
attributes: {
|
|
202
|
+
"runway.iteration": iter,
|
|
203
|
+
"runway.iteration.max": maxIters,
|
|
204
|
+
},
|
|
205
|
+
}));
|
|
206
|
+
implVerdict = parseImplVerdict(implementResult);
|
|
207
|
+
if (implVerdict.kind === "done" || implVerdict.kind === "blocked")
|
|
208
|
+
break;
|
|
209
|
+
// CONTINUE / missing — build the summary the NEXT iteration
|
|
210
|
+
// will see at the top of its prompt.
|
|
211
|
+
const commits = yield* captureCommitLog(cwd, baseBranch, branch).pipe(Effect.catchAll(() => Effect.succeed("")));
|
|
212
|
+
prevSummary = buildIterationSummary({
|
|
213
|
+
iterationsRun: iter,
|
|
214
|
+
commits,
|
|
215
|
+
finalMessageTail: tailOfMessage(implementResult.stdout ?? ""),
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
// implementResult is set after the first iteration. The `!` is
|
|
219
|
+
// safe because maxIters >= 1.
|
|
220
|
+
const finalResult = implementResult;
|
|
221
|
+
// VA-350: BLOCKED short-circuits straight to HITL — no reviewer
|
|
222
|
+
// pass for a self-declared blocker.
|
|
223
|
+
if (implVerdict.kind === "blocked") {
|
|
224
|
+
const reason = `Implementation pass blocked: ${implVerdict.reason}`;
|
|
225
|
+
yield* flagHitl(issue, deps, reason);
|
|
226
|
+
return { kind: "hitl", detail: reason };
|
|
227
|
+
}
|
|
228
|
+
if (implVerdict.kind === "missing") {
|
|
229
|
+
yield* Effect.logWarning(`impl agent ended without an IMPL: marker after ${maxIters} iteration(s); proceeding to review for back-compat`);
|
|
230
|
+
}
|
|
231
|
+
if (finalResult.commits.length === 0) {
|
|
232
|
+
const reason = "Agent produced no commits — the issue may need clarification or human input.";
|
|
233
|
+
yield* flagHitl(issue, deps, reason);
|
|
234
|
+
return { kind: "hitl", detail: reason };
|
|
102
235
|
}
|
|
103
236
|
// 2. Review pass — read-only-ish, just looking at the diff.
|
|
104
|
-
const diff =
|
|
105
|
-
const commitLog =
|
|
106
|
-
const
|
|
107
|
-
const
|
|
237
|
+
const diff = yield* captureDiff(cwd, baseBranch, branch);
|
|
238
|
+
const commitLog = yield* captureCommitLog(cwd, baseBranch, branch);
|
|
239
|
+
const reviewTemplate = yield* Effect.promise(() => loadReviewPrompt());
|
|
240
|
+
const reviewPrompt = renderPrompt(reviewTemplate, reviewVars({ issue, diff, commits: commitLog }));
|
|
241
|
+
const reviewResult = yield* runSandcastle({
|
|
108
242
|
agent: claudeCode("claude-opus-4-6"),
|
|
109
|
-
sandbox: docker({
|
|
110
|
-
env: dockerEnv(config),
|
|
111
|
-
}),
|
|
243
|
+
sandbox: docker({ env: dockerEnv(config) }),
|
|
112
244
|
cwd,
|
|
113
245
|
prompt: reviewPrompt,
|
|
114
246
|
branchStrategy: { type: "head" },
|
|
115
247
|
maxIterations: 1,
|
|
116
248
|
name: `review-${issue.identifier}`,
|
|
117
|
-
});
|
|
249
|
+
}).pipe(Effect.withSpan("review"));
|
|
118
250
|
const verdict = parseReviewVerdict(reviewResult);
|
|
119
251
|
if (verdict.kind === "rejected") {
|
|
120
|
-
|
|
121
|
-
|
|
252
|
+
const reason = `Sub-agent review rejected: ${verdict.reason}`;
|
|
253
|
+
yield* flagHitl(issue, deps, reason);
|
|
254
|
+
return { kind: "hitl", detail: reason };
|
|
255
|
+
}
|
|
256
|
+
if (verdict.kind === "missing") {
|
|
257
|
+
// VA-360: a review pass that didn't emit any marker is
|
|
258
|
+
// untrustworthy — usually a crash, OOM, or context-window
|
|
259
|
+
// truncation. Route to HITL with a reason that distinguishes
|
|
260
|
+
// this from a real rejection so the operator knows to look at
|
|
261
|
+
// the agent log instead of arguing with the verdict.
|
|
262
|
+
const reason = `Review pass ended without a REVIEW: marker (likely crashed or truncated): ${verdict.reason}`;
|
|
263
|
+
yield* flagHitl(issue, deps, reason);
|
|
264
|
+
return { kind: "hitl", detail: reason };
|
|
122
265
|
}
|
|
123
266
|
// 3. Push + PR.
|
|
124
|
-
|
|
267
|
+
yield* github.pushBranch(cwd, branch).pipe(Effect.withSpan("pushBranch"));
|
|
125
268
|
const prBody = buildPrBody(issue);
|
|
126
|
-
const prUrl =
|
|
269
|
+
const prUrl = yield* github
|
|
270
|
+
.openPullRequest({
|
|
127
271
|
repoPath: cwd,
|
|
128
272
|
branch,
|
|
129
273
|
base: baseBranch,
|
|
130
274
|
issue,
|
|
131
275
|
body: prBody,
|
|
132
|
-
})
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
}
|
|
137
|
-
|
|
276
|
+
})
|
|
277
|
+
.pipe(Effect.withSpan("openPullRequest"));
|
|
278
|
+
yield* linear.transition(issue.id, config.inReviewStatus);
|
|
279
|
+
yield* linear.comment(issue.id, `Runway opened a PR for review: ${prUrl}`);
|
|
280
|
+
return { kind: "opened", detail: prUrl };
|
|
281
|
+
});
|
|
282
|
+
/**
|
|
283
|
+
* VA-355: comment is the load-bearing artifact, label is metadata.
|
|
284
|
+
* Post the comment FIRST so the substantive reason lands on the
|
|
285
|
+
* issue even if the label apply later fails (Flightplan workspaces
|
|
286
|
+
* hitting the `needs-human` default, transient Linear errors, etc.).
|
|
287
|
+
* On full failure (comment didn't even post), dump the reason to
|
|
288
|
+
* stderr with a clear banner so the operator sees it terminal-side.
|
|
289
|
+
*/
|
|
290
|
+
const flagHitl = (issue, deps, reason) => Effect.gen(function* () {
|
|
138
291
|
const { config, linear } = deps;
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
292
|
+
const body = `Runway flagged for human review: ${reason}`;
|
|
293
|
+
const commentResult = yield* linear.comment(issue.id, body).pipe(Effect.either);
|
|
294
|
+
const commentPosted = commentResult._tag === "Right";
|
|
295
|
+
if (!commentPosted) {
|
|
296
|
+
yield* Effect.logError(`${issue.identifier}: failed to post HITL comment`).pipe(Effect.annotateLogs({
|
|
297
|
+
issue: issue.identifier,
|
|
298
|
+
error: errMsg(commentResult.left),
|
|
299
|
+
}));
|
|
300
|
+
}
|
|
301
|
+
const labelResult = yield* linear
|
|
302
|
+
.applyLabel(issue.id, config.hitlLabel)
|
|
303
|
+
.pipe(Effect.either);
|
|
304
|
+
if (labelResult._tag === "Left") {
|
|
305
|
+
const detail = errMsg(labelResult.left);
|
|
306
|
+
yield* Effect.logError(`${issue.identifier}: failed to apply HITL label "${config.hitlLabel}"`).pipe(Effect.annotateLogs({
|
|
307
|
+
issue: issue.identifier,
|
|
308
|
+
label: config.hitlLabel,
|
|
309
|
+
error: detail,
|
|
310
|
+
}));
|
|
311
|
+
if (commentPosted) {
|
|
312
|
+
// Best-effort follow-up note; the real reason is already on
|
|
313
|
+
// the issue from the first comment.
|
|
314
|
+
yield* runSwallow(linear.comment(issue.id, `Note: could not apply \`${config.hitlLabel}\` label — please apply it manually. (${detail})`), `${issue.identifier}: HITL follow-up note`);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
if (!commentPosted) {
|
|
318
|
+
// Last resort: the operator at least sees the reason in their
|
|
319
|
+
// terminal, even with Linear entirely unreachable.
|
|
320
|
+
yield* Effect.sync(() => {
|
|
321
|
+
process.stderr.write([
|
|
322
|
+
"",
|
|
323
|
+
`===== REJECTION REASON FOLLOWS (${issue.identifier}) =====`,
|
|
324
|
+
reason,
|
|
325
|
+
"===== END REJECTION REASON =====",
|
|
326
|
+
"",
|
|
327
|
+
"",
|
|
328
|
+
].join("\n"));
|
|
329
|
+
});
|
|
330
|
+
}
|
|
331
|
+
});
|
|
142
332
|
/**
|
|
143
|
-
*
|
|
144
|
-
*
|
|
145
|
-
*
|
|
146
|
-
*
|
|
147
|
-
*
|
|
333
|
+
* VA-356: explicit-by-intent "swallow this failure" runner for
|
|
334
|
+
* best-effort Linear calls (revert paths, HITL follow-up notes). The
|
|
335
|
+
* pre-Effect code used `.catch(() => undefined)`, which made the
|
|
336
|
+
* decision-to-ignore invisible. Here we log a one-liner so a failed
|
|
337
|
+
* label apply or transition leaves a trail without aborting the
|
|
338
|
+
* drain.
|
|
148
339
|
*/
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
340
|
+
const runSwallow = (effect, label) => effect.pipe(Effect.catchAll((err) => Effect.logWarning(`${label}: best-effort call failed (${err._tag}): ${err.message}`)), Effect.asVoid);
|
|
341
|
+
const errMsg = (err) => {
|
|
342
|
+
if (err && typeof err === "object" && "message" in err) {
|
|
343
|
+
const m = err.message;
|
|
344
|
+
if (typeof m === "string")
|
|
345
|
+
return m.split("\n")[0] ?? m;
|
|
153
346
|
}
|
|
154
|
-
|
|
155
|
-
|
|
347
|
+
return String(err);
|
|
348
|
+
};
|
|
349
|
+
/**
|
|
350
|
+
* VA-355: render a per-issue verdict trail at the end of the drain so
|
|
351
|
+
* the operator can scan results without opening Linear. Skipped when
|
|
352
|
+
* no issues were attempted.
|
|
353
|
+
*/
|
|
354
|
+
const printExitSummary = (outcomes) => Effect.sync(() => {
|
|
355
|
+
if (outcomes.length === 0)
|
|
356
|
+
return;
|
|
357
|
+
console.log("\n[runway] per-issue outcomes:");
|
|
358
|
+
for (const o of outcomes) {
|
|
359
|
+
const tag = o.kind === "opened"
|
|
360
|
+
? "APPROVED → PR opened"
|
|
361
|
+
: o.kind === "hitl"
|
|
362
|
+
? "HITL"
|
|
363
|
+
: o.kind === "reverted"
|
|
364
|
+
? "REVERTED → Todo"
|
|
365
|
+
: "INFRA_ERROR";
|
|
366
|
+
console.log(` ${o.identifier} ${tag} ${o.detail}`);
|
|
156
367
|
}
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
368
|
+
});
|
|
369
|
+
/**
|
|
370
|
+
* VA-358: Whether the agent branch has any commits beyond `base`.
|
|
371
|
+
* Used by the drain loop to distinguish "agent crashed mid-run, after
|
|
372
|
+
* producing real work" (→ HITL) from "agent crashed during startup,
|
|
373
|
+
* no work done" (→ revert to Todo). If the branch doesn't exist or
|
|
374
|
+
* git fails, treat as "no commits" so we revert rather than strand
|
|
375
|
+
* the issue.
|
|
376
|
+
*/
|
|
377
|
+
const hasCommits = (repoPath, base, branch) => runExecaScoped("git", ["rev-list", "--count", `${base}..${branch}`], { cwd: repoPath, reject: false }, (err) => ({
|
|
378
|
+
message: err instanceof Error ? err.message : String(err),
|
|
379
|
+
})).pipe(Effect.map((res) => {
|
|
380
|
+
const raw = res.stdout;
|
|
381
|
+
const out = typeof raw === "string" ? raw : "";
|
|
382
|
+
return Number.parseInt(out.trim(), 10) > 0;
|
|
383
|
+
}));
|
|
384
|
+
const captureDiff = (repoPath, base, branch) => runExecaScoped("git", ["diff", `${base}...${branch}`], { cwd: repoPath }, (err) => ({
|
|
385
|
+
message: err instanceof Error ? err.message : String(err),
|
|
386
|
+
})).pipe(Effect.map((res) => {
|
|
387
|
+
const raw = res.stdout;
|
|
388
|
+
const out = typeof raw === "string" ? raw : "";
|
|
389
|
+
// Truncate to keep the review prompt under the model's context
|
|
390
|
+
// budget.
|
|
391
|
+
return out.length > 60_000 ? `${out.slice(0, 60_000)}\n…(truncated)` : out;
|
|
392
|
+
}));
|
|
393
|
+
const captureCommitLog = (repoPath, base, branch) => runExecaScoped("git", ["log", "--oneline", `${base}..${branch}`], { cwd: repoPath }, (err) => ({
|
|
394
|
+
message: err instanceof Error ? err.message : String(err),
|
|
395
|
+
})).pipe(Effect.map((res) => {
|
|
396
|
+
const raw = res.stdout;
|
|
397
|
+
return typeof raw === "string" ? raw : "";
|
|
398
|
+
}));
|
|
399
|
+
/**
|
|
400
|
+
* VA-358: thin Effect wrapper around `sandcastle.run`. The agent run
|
|
401
|
+
* happens inside Docker — sandcastle doesn't (yet) expose a kill
|
|
402
|
+
* handle that we can pipe through `Effect.acquireRelease`, so an
|
|
403
|
+
* interrupt during a long agent pass abandons the Promise but doesn't
|
|
404
|
+
* tear down the container. Step 3's acceptance documents this as a
|
|
405
|
+
* sandcastle-side limitation; for git / gh subprocesses (the common
|
|
406
|
+
* orphan source today) we DO get proper SIGKILL on interrupt via
|
|
407
|
+
* `runExecaScoped`.
|
|
408
|
+
*/
|
|
409
|
+
const runSandcastle = (args) => Effect.tryPromise({
|
|
410
|
+
try: () => run(args),
|
|
411
|
+
catch: (err) => ({
|
|
412
|
+
message: err instanceof Error ? err.message : String(err),
|
|
413
|
+
}),
|
|
414
|
+
});
|
|
415
|
+
/**
|
|
416
|
+
* Pulls the last `IMPL:` marker line out of the agent's output. The
|
|
417
|
+
* orchestrator uses this to distinguish a clean completion (DONE)
|
|
418
|
+
* from a self-declared block (BLOCKED — reason) from a multi-
|
|
419
|
+
* iteration in-progress signal (CONTINUE). A missing marker is
|
|
420
|
+
* treated as CONTINUE-with-warning for back-compat.
|
|
421
|
+
*/
|
|
422
|
+
export function parseImplVerdict(result) {
|
|
423
|
+
const text = stringifyResult(result);
|
|
424
|
+
// Take the LAST match — later iterations override earlier ones if
|
|
425
|
+
// the agent emitted multiple markers across an iteration loop.
|
|
426
|
+
const matches = [...text.matchAll(IMPL_VERDICT_RE)];
|
|
427
|
+
const last = matches[matches.length - 1];
|
|
428
|
+
if (!last)
|
|
429
|
+
return { kind: "missing" };
|
|
430
|
+
if (last[1] === "DONE")
|
|
431
|
+
return { kind: "done" };
|
|
432
|
+
if (last[1] === "CONTINUE")
|
|
433
|
+
return { kind: "continue" };
|
|
434
|
+
return {
|
|
435
|
+
kind: "blocked",
|
|
436
|
+
reason: last[2]?.trim() || "no reason given",
|
|
437
|
+
};
|
|
168
438
|
}
|
|
169
439
|
/**
|
|
170
|
-
*
|
|
171
|
-
*
|
|
172
|
-
*
|
|
440
|
+
* VA-353: parse the reviewer's final `REVIEW: APPROVED` /
|
|
441
|
+
* `REVIEW: REJECTED — <reason>` marker. Scans the agent's combined
|
|
442
|
+
* stdout for *all* matches and returns the LAST one, since
|
|
443
|
+
* sandcastle appends its own wrapper output ("Agent stopped",
|
|
444
|
+
* "Capturing session", "Reached max iterations (N).", "Run complete:
|
|
445
|
+
* …") after the agent's final message. A missing marker is itself a
|
|
446
|
+
* rejection — a reviewer pass that didn't terminate cleanly is not
|
|
447
|
+
* trustworthy.
|
|
173
448
|
*/
|
|
174
|
-
function parseReviewVerdict(result) {
|
|
449
|
+
export function parseReviewVerdict(result) {
|
|
175
450
|
const text = stringifyResult(result);
|
|
176
|
-
const
|
|
177
|
-
|
|
451
|
+
const matches = [...text.matchAll(REVIEW_VERDICT_RE)];
|
|
452
|
+
const last = matches[matches.length - 1];
|
|
453
|
+
// VA-360: explicit `missing` kind. Pre-VA-360 this returned a
|
|
454
|
+
// rejection with the message "review output did not contain a
|
|
455
|
+
// REVIEW: verdict line" — which conflated "agent reviewed and
|
|
456
|
+
// rejected" with "agent never emitted a verdict (crash, truncation,
|
|
457
|
+
// OOM)". `processIssue` now routes the two to HITL with distinct
|
|
458
|
+
// reason lines so the operator can tell them apart.
|
|
459
|
+
if (!last) {
|
|
178
460
|
return {
|
|
179
|
-
kind: "
|
|
461
|
+
kind: "missing",
|
|
180
462
|
reason: "review output did not contain a REVIEW: verdict line",
|
|
181
463
|
};
|
|
182
464
|
}
|
|
183
|
-
if (
|
|
465
|
+
if (last[1] === "APPROVED")
|
|
184
466
|
return { kind: "approved", reason: "" };
|
|
185
467
|
return {
|
|
186
468
|
kind: "rejected",
|
|
187
|
-
reason:
|
|
469
|
+
reason: last[2]?.trim() || "no reason given",
|
|
188
470
|
};
|
|
189
471
|
}
|
|
190
472
|
function stringifyResult(result) {
|
|
@@ -192,6 +474,16 @@ function stringifyResult(result) {
|
|
|
192
474
|
return result;
|
|
193
475
|
if (result && typeof result === "object") {
|
|
194
476
|
const r = result;
|
|
477
|
+
// VA-353: sandcastle's RunResult carries the combined agent
|
|
478
|
+
// output on `stdout`. Prefer it — falling through to
|
|
479
|
+
// JSON.stringify (the old behavior) replaces real newlines with
|
|
480
|
+
// `\n` escapes and breaks `^…$/m` line anchoring, which is the
|
|
481
|
+
// exact reason the reviewer's verdict was being silently dropped
|
|
482
|
+
// for issues like VA-312 tonight. The iterations/output
|
|
483
|
+
// fallbacks remain for back-compat with older shapes and inline
|
|
484
|
+
// test fixtures.
|
|
485
|
+
if (typeof r.stdout === "string" && r.stdout.length > 0)
|
|
486
|
+
return r.stdout;
|
|
195
487
|
if (r.iterations?.length) {
|
|
196
488
|
return r.iterations
|
|
197
489
|
.map((i) => i.output ?? i.text ?? "")
|
|
@@ -214,7 +506,7 @@ function stringifyResult(result) {
|
|
|
214
506
|
function dockerEnv(config) {
|
|
215
507
|
const env = {};
|
|
216
508
|
if (config.opServiceAccountToken) {
|
|
217
|
-
env.OP_SERVICE_ACCOUNT_TOKEN = config.opServiceAccountToken;
|
|
509
|
+
env.OP_SERVICE_ACCOUNT_TOKEN = Redacted.value(config.opServiceAccountToken);
|
|
218
510
|
}
|
|
219
511
|
return env;
|
|
220
512
|
}
|