@valescoagency/runway 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,10 +2,27 @@ import { existsSync } from "node:fs";
2
2
  import { join } from "node:path";
3
3
  import { run, claudeCode } from "@ai-hero/sandcastle";
4
4
  import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
5
- import { execa } from "execa";
6
- import { implementVars, loadImplementPrompt, loadReviewPrompt, renderPrompt, reviewVars, } from "./prompts.js";
5
+ import { Effect, Redacted } from "effect";
6
+ import { buildIterationSummary, implementVars, loadImplementPrompt, loadReviewPrompt, renderPrompt, reviewVars, tailOfMessage, } from "./prompts.js";
7
7
  import { detectBaseBranch } from "./git.js";
8
- const REVIEW_VERDICT_RE = /^REVIEW:\s*(APPROVED|REJECTED)(?:\s+—\s+(.*))?$/m;
8
+ import { loadPolicy } from "./policy.js";
9
+ import { runExecaScoped } from "./subprocess.js";
10
+ // VA-353: review verdict marker. Global flag because sandcastle
11
+ // appends wrapper output ("Agent stopped", "Capturing session",
12
+ // "Reached max iterations (1).", "Run complete: …") AFTER the agent's
13
+ // final message — so the marker is rarely the last line. We scan
14
+ // every line-start match and keep the LAST one, which is the most
15
+ // recent agent verdict. Standalone-line: ^…$ with /m anchors prevent
16
+ // mid-prose matches like "the reviewer should output REVIEW: APPROVED
17
+ // when…".
18
+ const REVIEW_VERDICT_RE = /^REVIEW:\s*(APPROVED|REJECTED)(?:\s+—\s+(.*))?$/gm;
19
+ // VA-350: impl-pass termination contract. Last `IMPL:` marker line in
20
+ // the agent's output wins (most recent iteration's verdict). DONE →
21
+ // proceed to review; BLOCKED → HITL with reason; CONTINUE or missing →
22
+ // fall through (back-compat). The trailing reason after `—` is
23
+ // captured for BLOCKED.
24
+ const IMPL_VERDICT_RE = /^IMPL:\s*(DONE|BLOCKED|CONTINUE)(?:\s+—\s+(.*))?$/gm;
25
+ const IMPL_COMPLETION_SIGNALS = ["IMPL: DONE", "IMPL: BLOCKED"];
9
26
  /**
10
27
  * Confirms the cwd looks like a sandcastle-initialised repo. If not,
11
28
  * we error early with a clear message rather than letting Sandcastle
@@ -18,173 +35,438 @@ export function assertSandcastleInitialised(cwd) {
18
35
  }
19
36
  }
20
37
  /**
21
- * Drains the Linear queue until empty (or until --max is hit). One
22
- * issue at a time in v1; parallel runs are a follow-up.
38
+ * VA-358: drains the Linear queue as a single Effect program with
39
+ * spans at every pipeline stage and structured-field logs (issue id,
40
+ * branch, stage, duration). Externally still appears to "do the
41
+ * loop" — `Effect.runPromise` at the cli.ts boundary turns it back
42
+ * into a Promise<OrchestratorResult>.
43
+ *
44
+ * Error channel is `never` because the function folds every per-issue
45
+ * failure into an `IssueOutcome` (revert / errored / HITL) rather than
46
+ * propagating. The whole drain only halts when the queue is empty or
47
+ * `--max` is reached.
23
48
  */
24
- export async function drainQueue(deps, opts = {}) {
49
+ export const drainQueue = (deps, opts = {}) => Effect.gen(function* () {
25
50
  const { config, linear } = deps;
26
51
  const max = opts.max ?? Number.POSITIVE_INFINITY;
27
- let processed = 0;
28
- let opened = 0;
29
- let hitl = 0;
30
- let errored = 0;
31
52
  // Resolve the base branch once at startup so every issue in the
32
53
  // drain sees the same answer (and so a misconfigured repo fails
33
54
  // fast, before we touch any Linear state).
34
- const baseBranch = config.baseBranch ?? (await detectBaseBranch(deps.cwd));
35
- console.log(`[runway] base branch resolved to "${baseBranch}"`);
36
- const runDeps = { ...deps, baseBranch };
37
- while (processed < max) {
38
- const queue = await linear.fetchReady();
39
- if (queue.length === 0)
55
+ const baseBranchResolved = yield* (config.baseBranch
56
+ ? Effect.succeed(config.baseBranch)
57
+ : detectBaseBranch(deps.cwd)).pipe(Effect.catchAll((err) => Effect.fail({
58
+ _tag: "BaseBranchDetectionFailed",
59
+ message: err.message,
60
+ })),
61
+ // If base-branch detection fails, the whole drain is dead in the
62
+ // water (we'd diff against an undefined branch). Surface it as a
63
+ // top-level defect; drainQueue's `never` error type is preserved
64
+ // because the unrecoverable failure dies the fiber, not the
65
+ // error channel.
66
+ Effect.orDie);
67
+ yield* Effect.logInfo("base branch resolved").pipe(Effect.annotateLogs({ baseBranch: baseBranchResolved }));
68
+ const policy = loadPolicy(deps.cwd, { allowPathsOverride: opts.allowPaths });
69
+ yield* Effect.logInfo(`policy source: ${policy.source}`);
70
+ const runDeps = {
71
+ ...deps,
72
+ baseBranch: baseBranchResolved,
73
+ policy,
74
+ };
75
+ // VA-344: never re-pick an issue in the same invocation, even if
76
+ // VA-342 reverted it to `Todo`. Without this, a deterministic
77
+ // startup failure (broken .env.schema, missing image, expired
78
+ // token) would loop on the same issue until --max was exhausted.
79
+ const seen = new Set();
80
+ const outcomes = [];
81
+ let attempts = 0;
82
+ let opened = 0;
83
+ let hitl = 0;
84
+ let errored = 0;
85
+ while (attempts < max) {
86
+ const queue = yield* linear.fetchReady().pipe(
87
+ // Failure to fetch the queue is fatal to the drain (we can't
88
+ // pick the next issue); die rather than infinite-loop on the
89
+ // same error.
90
+ Effect.orDie);
91
+ const issue = queue.find((i) => !seen.has(i.id));
92
+ if (!issue)
40
93
  break;
41
- const issue = queue[0];
42
- try {
43
- const verdict = await processIssue(issue, runDeps);
44
- processed += 1;
45
- if (verdict === "opened")
94
+ seen.add(issue.id);
95
+ attempts += 1;
96
+ const branch = `agent/${issue.identifier.toLowerCase()}`;
97
+ const processed = yield* processIssue(issue, runDeps).pipe(Effect.either, Effect.withSpan("processIssue", {
98
+ attributes: {
99
+ "runway.issue.identifier": issue.identifier,
100
+ "runway.issue.id": issue.id,
101
+ "runway.branch": branch,
102
+ },
103
+ }), Effect.annotateLogs({
104
+ issue: issue.identifier,
105
+ branch,
106
+ }));
107
+ if (processed._tag === "Right") {
108
+ const result = processed.right;
109
+ if (result.kind === "opened")
46
110
  opened += 1;
47
- if (verdict === "hitl")
111
+ if (result.kind === "hitl")
48
112
  hitl += 1;
113
+ outcomes.push({
114
+ identifier: issue.identifier,
115
+ kind: result.kind,
116
+ detail: result.detail,
117
+ });
49
118
  }
50
- catch (err) {
119
+ else {
51
120
  errored += 1;
52
- console.error(`[runway] error on ${issue.identifier}:`, err);
121
+ const errDetail = errMsg(processed.left);
122
+ yield* Effect.logError(`error on ${issue.identifier}`).pipe(Effect.annotateLogs({
123
+ issue: issue.identifier,
124
+ error: errDetail,
125
+ }));
53
126
  // If the agent crashed before producing any commits (missing
54
127
  // image, varlock validation, container failed to boot, etc.),
55
- // it's an infrastructure failure — not a HITL. Revert the issue
56
- // to `Todo` and skip the `needs-human` label so the next run
57
- // can pick it up cleanly. `In Progress` is reserved for "agent
58
- // has committed to the branch".
59
- const branch = `agent/${issue.identifier.toLowerCase()}`;
60
- const startedRealWork = await hasCommits(deps.cwd, baseBranch, branch);
128
+ // it's an infrastructure failure — not a HITL. Revert the
129
+ // issue to the ready state and skip the HITL label so the
130
+ // next run can pick it up cleanly. `In Progress` is reserved
131
+ // for "agent has committed to the branch".
132
+ const startedRealWork = yield* hasCommits(deps.cwd, baseBranchResolved, branch).pipe(Effect.catchAll(() => Effect.succeed(false)));
61
133
  if (!startedRealWork) {
62
- await linear
63
- .transition(issue.id, config.readyStatus)
64
- .catch(() => undefined);
65
- await linear
66
- .comment(issue.id, `Runway hit a startup failure before the agent produced any commits — reverting to \`${config.readyStatus}\` for retry:\n\n\`\`\`\n${err instanceof Error ? err.message : String(err)}\n\`\`\``)
67
- .catch(() => undefined);
134
+ yield* runSwallow(linear.transition(issue.id, config.readyStatus), `${issue.identifier}: revert transition to ${config.readyStatus}`);
135
+ yield* runSwallow(linear.comment(issue.id, `Runway hit a startup failure before the agent produced any commits — reverting to \`${config.readyStatus}\` for retry:\n\n\`\`\`\n${errDetail}\n\`\`\``), `${issue.identifier}: revert-comment`);
136
+ outcomes.push({
137
+ identifier: issue.identifier,
138
+ kind: "reverted",
139
+ detail: errDetail,
140
+ });
68
141
  }
69
142
  else {
70
- await linear
71
- .applyLabel(issue.id, config.hitlLabel)
72
- .catch(() => undefined);
73
- await linear
74
- .comment(issue.id, `Runway hit an unrecoverable error and flagged for human review:\n\n\`\`\`\n${err instanceof Error ? err.message : String(err)}\n\`\`\``)
75
- .catch(() => undefined);
143
+ // VA-355: comment first with the substantive reason, label
144
+ // second (best-effort). If we labeled first and the label
145
+ // didn't exist (Flightplan workspaces hitting the
146
+ // `needs-human` default — see VA-354), the orchestrator's
147
+ // catch would never get to the reason and the operator
148
+ // would see an infrastructure error in Linear with no clue
149
+ // what the agent actually found.
150
+ yield* flagHitl(issue, runDeps, `Runway hit an unrecoverable error and flagged for human review: ${errDetail}`);
151
+ outcomes.push({
152
+ identifier: issue.identifier,
153
+ kind: "errored",
154
+ detail: errDetail,
155
+ });
76
156
  }
77
157
  }
78
158
  }
79
- return { processed, opened, hitl, errored };
80
- }
81
- async function processIssue(issue, deps) {
159
+ yield* printExitSummary(outcomes);
160
+ return {
161
+ attempts,
162
+ opened,
163
+ hitl,
164
+ errored,
165
+ outcomes,
166
+ };
167
+ }).pipe(Effect.withSpan("drainQueue"));
168
+ const processIssue = (issue, deps) => Effect.gen(function* () {
82
169
  const { config, linear, github, cwd, baseBranch } = deps;
83
170
  const branch = `agent/${issue.identifier.toLowerCase()}`;
84
- await linear.transition(issue.id, config.inProgressStatus);
85
- await linear.comment(issue.id, `Runway picked up this issue. Branch: \`${branch}\`.`);
171
+ yield* linear.transition(issue.id, config.inProgressStatus);
172
+ yield* linear.comment(issue.id, `Runway picked up this issue. Branch: \`${branch}\`.`);
86
173
  // 1. Implementation pass.
87
- const implementPrompt = renderPrompt(await loadImplementPrompt(), implementVars(issue));
88
- const implementResult = await run({
89
- agent: claudeCode("claude-opus-4-6"),
90
- sandbox: docker({
91
- env: dockerEnv(config),
92
- }),
93
- cwd,
94
- prompt: implementPrompt,
95
- branchStrategy: { type: "branch", branch },
96
- maxIterations: config.maxIterations,
97
- name: `impl-${issue.identifier}`,
98
- });
99
- if (implementResult.commits.length === 0) {
100
- await flagHitl(issue, deps, "Agent produced no commits — the issue may need clarification or human input.");
101
- return "hitl";
174
+ //
175
+ // VA-349 + VA-350: run iterations one at a time so we can (a)
176
+ // inject a summary of the previous iteration into the next prompt
177
+ // — no more "I'll start by understanding the current state of the
178
+ // repository" 5x per issue — and (b) break early on IMPL:
179
+ // DONE/BLOCKED parsed from our own code rather than relying on
180
+ // sandcastle's substring completionSignal.
181
+ const implementTemplate = yield* Effect.promise(() => loadImplementPrompt());
182
+ const maxIters = Math.max(1, config.maxIterations);
183
+ let prevSummary = "";
184
+ let implementResult;
185
+ let implVerdict = { kind: "missing" };
186
+ for (let iter = 1; iter <= maxIters; iter += 1) {
187
+ const implementPrompt = renderPrompt(implementTemplate, implementVars(issue, {
188
+ previousIterations: prevSummary,
189
+ policy: deps.policy,
190
+ }));
191
+ implementResult = yield* runSandcastle({
192
+ agent: claudeCode("claude-opus-4-6"),
193
+ sandbox: docker({ env: dockerEnv(config) }),
194
+ cwd,
195
+ prompt: implementPrompt,
196
+ branchStrategy: { type: "branch", branch },
197
+ maxIterations: 1,
198
+ completionSignal: [...IMPL_COMPLETION_SIGNALS],
199
+ name: `impl-${issue.identifier}-iter-${iter}`,
200
+ }).pipe(Effect.withSpan(`impl-iter-${iter}`, {
201
+ attributes: {
202
+ "runway.iteration": iter,
203
+ "runway.iteration.max": maxIters,
204
+ },
205
+ }));
206
+ implVerdict = parseImplVerdict(implementResult);
207
+ if (implVerdict.kind === "done" || implVerdict.kind === "blocked")
208
+ break;
209
+ // CONTINUE / missing — build the summary the NEXT iteration
210
+ // will see at the top of its prompt.
211
+ const commits = yield* captureCommitLog(cwd, baseBranch, branch).pipe(Effect.catchAll(() => Effect.succeed("")));
212
+ prevSummary = buildIterationSummary({
213
+ iterationsRun: iter,
214
+ commits,
215
+ finalMessageTail: tailOfMessage(implementResult.stdout ?? ""),
216
+ });
217
+ }
218
+ // implementResult is set after the first iteration. The `!` is
219
+ // safe because maxIters >= 1.
220
+ const finalResult = implementResult;
221
+ // VA-350: BLOCKED short-circuits straight to HITL — no reviewer
222
+ // pass for a self-declared blocker.
223
+ if (implVerdict.kind === "blocked") {
224
+ const reason = `Implementation pass blocked: ${implVerdict.reason}`;
225
+ yield* flagHitl(issue, deps, reason);
226
+ return { kind: "hitl", detail: reason };
227
+ }
228
+ if (implVerdict.kind === "missing") {
229
+ yield* Effect.logWarning(`impl agent ended without an IMPL: marker after ${maxIters} iteration(s); proceeding to review for back-compat`);
230
+ }
231
+ if (finalResult.commits.length === 0) {
232
+ const reason = "Agent produced no commits — the issue may need clarification or human input.";
233
+ yield* flagHitl(issue, deps, reason);
234
+ return { kind: "hitl", detail: reason };
102
235
  }
103
236
  // 2. Review pass — read-only-ish, just looking at the diff.
104
- const diff = await captureDiff(cwd, baseBranch, branch);
105
- const commitLog = await captureCommitLog(cwd, baseBranch, branch);
106
- const reviewPrompt = renderPrompt(await loadReviewPrompt(), reviewVars({ issue, diff, commits: commitLog }));
107
- const reviewResult = await run({
237
+ const diff = yield* captureDiff(cwd, baseBranch, branch);
238
+ const commitLog = yield* captureCommitLog(cwd, baseBranch, branch);
239
+ const reviewTemplate = yield* Effect.promise(() => loadReviewPrompt());
240
+ const reviewPrompt = renderPrompt(reviewTemplate, reviewVars({ issue, diff, commits: commitLog }));
241
+ const reviewResult = yield* runSandcastle({
108
242
  agent: claudeCode("claude-opus-4-6"),
109
- sandbox: docker({
110
- env: dockerEnv(config),
111
- }),
243
+ sandbox: docker({ env: dockerEnv(config) }),
112
244
  cwd,
113
245
  prompt: reviewPrompt,
114
246
  branchStrategy: { type: "head" },
115
247
  maxIterations: 1,
116
248
  name: `review-${issue.identifier}`,
117
- });
249
+ }).pipe(Effect.withSpan("review"));
118
250
  const verdict = parseReviewVerdict(reviewResult);
119
251
  if (verdict.kind === "rejected") {
120
- await flagHitl(issue, deps, `Sub-agent review rejected: ${verdict.reason}`);
121
- return "hitl";
252
+ const reason = `Sub-agent review rejected: ${verdict.reason}`;
253
+ yield* flagHitl(issue, deps, reason);
254
+ return { kind: "hitl", detail: reason };
255
+ }
256
+ if (verdict.kind === "missing") {
257
+ // VA-360: a review pass that didn't emit any marker is
258
+ // untrustworthy — usually a crash, OOM, or context-window
259
+ // truncation. Route to HITL with a reason that distinguishes
260
+ // this from a real rejection so the operator knows to look at
261
+ // the agent log instead of arguing with the verdict.
262
+ const reason = `Review pass ended without a REVIEW: marker (likely crashed or truncated): ${verdict.reason}`;
263
+ yield* flagHitl(issue, deps, reason);
264
+ return { kind: "hitl", detail: reason };
122
265
  }
123
266
  // 3. Push + PR.
124
- await github.pushBranch(cwd, branch);
267
+ yield* github.pushBranch(cwd, branch).pipe(Effect.withSpan("pushBranch"));
125
268
  const prBody = buildPrBody(issue);
126
- const prUrl = await github.openPullRequest({
269
+ const prUrl = yield* github
270
+ .openPullRequest({
127
271
  repoPath: cwd,
128
272
  branch,
129
273
  base: baseBranch,
130
274
  issue,
131
275
  body: prBody,
132
- });
133
- await linear.transition(issue.id, config.inReviewStatus);
134
- await linear.comment(issue.id, `Runway opened a PR for review: ${prUrl}`);
135
- return "opened";
136
- }
137
- async function flagHitl(issue, deps, reason) {
276
+ })
277
+ .pipe(Effect.withSpan("openPullRequest"));
278
+ yield* linear.transition(issue.id, config.inReviewStatus);
279
+ yield* linear.comment(issue.id, `Runway opened a PR for review: ${prUrl}`);
280
+ return { kind: "opened", detail: prUrl };
281
+ });
282
+ /**
283
+ * VA-355: comment is the load-bearing artifact, label is metadata.
284
+ * Post the comment FIRST so the substantive reason lands on the
285
+ * issue even if the label apply later fails (Flightplan workspaces
286
+ * hitting the `needs-human` default, transient Linear errors, etc.).
287
+ * On full failure (comment didn't even post), dump the reason to
288
+ * stderr with a clear banner so the operator sees it terminal-side.
289
+ */
290
+ const flagHitl = (issue, deps, reason) => Effect.gen(function* () {
138
291
  const { config, linear } = deps;
139
- await linear.applyLabel(issue.id, config.hitlLabel);
140
- await linear.comment(issue.id, `Runway flagged for human review: ${reason}`);
141
- }
292
+ const body = `Runway flagged for human review: ${reason}`;
293
+ const commentResult = yield* linear.comment(issue.id, body).pipe(Effect.either);
294
+ const commentPosted = commentResult._tag === "Right";
295
+ if (!commentPosted) {
296
+ yield* Effect.logError(`${issue.identifier}: failed to post HITL comment`).pipe(Effect.annotateLogs({
297
+ issue: issue.identifier,
298
+ error: errMsg(commentResult.left),
299
+ }));
300
+ }
301
+ const labelResult = yield* linear
302
+ .applyLabel(issue.id, config.hitlLabel)
303
+ .pipe(Effect.either);
304
+ if (labelResult._tag === "Left") {
305
+ const detail = errMsg(labelResult.left);
306
+ yield* Effect.logError(`${issue.identifier}: failed to apply HITL label "${config.hitlLabel}"`).pipe(Effect.annotateLogs({
307
+ issue: issue.identifier,
308
+ label: config.hitlLabel,
309
+ error: detail,
310
+ }));
311
+ if (commentPosted) {
312
+ // Best-effort follow-up note; the real reason is already on
313
+ // the issue from the first comment.
314
+ yield* runSwallow(linear.comment(issue.id, `Note: could not apply \`${config.hitlLabel}\` label — please apply it manually. (${detail})`), `${issue.identifier}: HITL follow-up note`);
315
+ }
316
+ }
317
+ if (!commentPosted) {
318
+ // Last resort: the operator at least sees the reason in their
319
+ // terminal, even with Linear entirely unreachable.
320
+ yield* Effect.sync(() => {
321
+ process.stderr.write([
322
+ "",
323
+ `===== REJECTION REASON FOLLOWS (${issue.identifier}) =====`,
324
+ reason,
325
+ "===== END REJECTION REASON =====",
326
+ "",
327
+ "",
328
+ ].join("\n"));
329
+ });
330
+ }
331
+ });
142
332
  /**
143
- * Whether the agent branch has any commits beyond `base`. Used by the
144
- * drain loop to distinguish "agent crashed mid-run, after producing
145
- * real work" (→ HITL) from "agent crashed during startup, no work
146
- * done" (→ revert to Todo). If the branch doesn't exist or git fails,
147
- * treat as "no commits" so we revert rather than strand the issue.
333
+ * VA-356: explicit-by-intent "swallow this failure" runner for
334
+ * best-effort Linear calls (revert paths, HITL follow-up notes). The
335
+ * pre-Effect code used `.catch(() => undefined)`, which made the
336
+ * decision-to-ignore invisible. Here we log a one-liner so a failed
337
+ * label apply or transition leaves a trail without aborting the
338
+ * drain.
148
339
  */
149
- async function hasCommits(repoPath, base, branch) {
150
- try {
151
- const { stdout } = await execa("git", ["rev-list", "--count", `${base}..${branch}`], { cwd: repoPath, reject: false });
152
- return Number.parseInt(stdout.trim(), 10) > 0;
340
+ const runSwallow = (effect, label) => effect.pipe(Effect.catchAll((err) => Effect.logWarning(`${label}: best-effort call failed (${err._tag}): ${err.message}`)), Effect.asVoid);
341
+ const errMsg = (err) => {
342
+ if (err && typeof err === "object" && "message" in err) {
343
+ const m = err.message;
344
+ if (typeof m === "string")
345
+ return m.split("\n")[0] ?? m;
153
346
  }
154
- catch {
155
- return false;
347
+ return String(err);
348
+ };
349
+ /**
350
+ * VA-355: render a per-issue verdict trail at the end of the drain so
351
+ * the operator can scan results without opening Linear. Skipped when
352
+ * no issues were attempted.
353
+ */
354
+ const printExitSummary = (outcomes) => Effect.sync(() => {
355
+ if (outcomes.length === 0)
356
+ return;
357
+ console.log("\n[runway] per-issue outcomes:");
358
+ for (const o of outcomes) {
359
+ const tag = o.kind === "opened"
360
+ ? "APPROVED → PR opened"
361
+ : o.kind === "hitl"
362
+ ? "HITL"
363
+ : o.kind === "reverted"
364
+ ? "REVERTED → Todo"
365
+ : "INFRA_ERROR";
366
+ console.log(` ${o.identifier} ${tag} ${o.detail}`);
156
367
  }
157
- }
158
- async function captureDiff(repoPath, base, branch) {
159
- const { stdout } = await execa("git", ["diff", `${base}...${branch}`], {
160
- cwd: repoPath,
161
- });
162
- // Truncate to keep the review prompt under the model's context budget.
163
- return stdout.length > 60_000 ? `${stdout.slice(0, 60_000)}\n…(truncated)` : stdout;
164
- }
165
- async function captureCommitLog(repoPath, base, branch) {
166
- const { stdout } = await execa("git", ["log", "--oneline", `${base}..${branch}`], { cwd: repoPath });
167
- return stdout;
368
+ });
369
+ /**
370
+ * VA-358: Whether the agent branch has any commits beyond `base`.
371
+ * Used by the drain loop to distinguish "agent crashed mid-run, after
372
+ * producing real work" (→ HITL) from "agent crashed during startup,
373
+ * no work done" (→ revert to Todo). If the branch doesn't exist or
374
+ * git fails, treat as "no commits" so we revert rather than strand
375
+ * the issue.
376
+ */
377
+ const hasCommits = (repoPath, base, branch) => runExecaScoped("git", ["rev-list", "--count", `${base}..${branch}`], { cwd: repoPath, reject: false }, (err) => ({
378
+ message: err instanceof Error ? err.message : String(err),
379
+ })).pipe(Effect.map((res) => {
380
+ const raw = res.stdout;
381
+ const out = typeof raw === "string" ? raw : "";
382
+ return Number.parseInt(out.trim(), 10) > 0;
383
+ }));
384
+ const captureDiff = (repoPath, base, branch) => runExecaScoped("git", ["diff", `${base}...${branch}`], { cwd: repoPath }, (err) => ({
385
+ message: err instanceof Error ? err.message : String(err),
386
+ })).pipe(Effect.map((res) => {
387
+ const raw = res.stdout;
388
+ const out = typeof raw === "string" ? raw : "";
389
+ // Truncate to keep the review prompt under the model's context
390
+ // budget.
391
+ return out.length > 60_000 ? `${out.slice(0, 60_000)}\n…(truncated)` : out;
392
+ }));
393
+ const captureCommitLog = (repoPath, base, branch) => runExecaScoped("git", ["log", "--oneline", `${base}..${branch}`], { cwd: repoPath }, (err) => ({
394
+ message: err instanceof Error ? err.message : String(err),
395
+ })).pipe(Effect.map((res) => {
396
+ const raw = res.stdout;
397
+ return typeof raw === "string" ? raw : "";
398
+ }));
399
+ /**
400
+ * VA-358: thin Effect wrapper around `sandcastle.run`. The agent run
401
+ * happens inside Docker — sandcastle doesn't (yet) expose a kill
402
+ * handle that we can pipe through `Effect.acquireRelease`, so an
403
+ * interrupt during a long agent pass abandons the Promise but doesn't
404
+ * tear down the container. Step 3's acceptance documents this as a
405
+ * sandcastle-side limitation; for git / gh subprocesses (the common
406
+ * orphan source today) we DO get proper SIGKILL on interrupt via
407
+ * `runExecaScoped`.
408
+ */
409
+ const runSandcastle = (args) => Effect.tryPromise({
410
+ try: () => run(args),
411
+ catch: (err) => ({
412
+ message: err instanceof Error ? err.message : String(err),
413
+ }),
414
+ });
415
+ /**
416
+ * Pulls the last `IMPL:` marker line out of the agent's output. The
417
+ * orchestrator uses this to distinguish a clean completion (DONE)
418
+ * from a self-declared block (BLOCKED — reason) from a multi-
419
+ * iteration in-progress signal (CONTINUE). A missing marker is
420
+ * treated as CONTINUE-with-warning for back-compat.
421
+ */
422
+ export function parseImplVerdict(result) {
423
+ const text = stringifyResult(result);
424
+ // Take the LAST match — later iterations override earlier ones if
425
+ // the agent emitted multiple markers across an iteration loop.
426
+ const matches = [...text.matchAll(IMPL_VERDICT_RE)];
427
+ const last = matches[matches.length - 1];
428
+ if (!last)
429
+ return { kind: "missing" };
430
+ if (last[1] === "DONE")
431
+ return { kind: "done" };
432
+ if (last[1] === "CONTINUE")
433
+ return { kind: "continue" };
434
+ return {
435
+ kind: "blocked",
436
+ reason: last[2]?.trim() || "no reason given",
437
+ };
168
438
  }
169
439
  /**
170
- * Sandcastle's `RunResult` shape varies by version; defensively dig out
171
- * the last assistant message text. We only need to match the
172
- * `REVIEW: APPROVED` / `REVIEW: REJECTED …` line at the tail.
440
+ * VA-353: parse the reviewer's final `REVIEW: APPROVED` /
441
+ * `REVIEW: REJECTED <reason>` marker. Scans the agent's combined
442
+ * stdout for *all* matches and returns the LAST one, since
443
+ * sandcastle appends its own wrapper output ("Agent stopped",
444
+ * "Capturing session", "Reached max iterations (N).", "Run complete:
445
+ * …") after the agent's final message. A missing marker is itself a
446
+ * rejection — a reviewer pass that didn't terminate cleanly is not
447
+ * trustworthy.
173
448
  */
174
- function parseReviewVerdict(result) {
449
+ export function parseReviewVerdict(result) {
175
450
  const text = stringifyResult(result);
176
- const match = text.match(REVIEW_VERDICT_RE);
177
- if (!match) {
451
+ const matches = [...text.matchAll(REVIEW_VERDICT_RE)];
452
+ const last = matches[matches.length - 1];
453
+ // VA-360: explicit `missing` kind. Pre-VA-360 this returned a
454
+ // rejection with the message "review output did not contain a
455
+ // REVIEW: verdict line" — which conflated "agent reviewed and
456
+ // rejected" with "agent never emitted a verdict (crash, truncation,
457
+ // OOM)". `processIssue` now routes the two to HITL with distinct
458
+ // reason lines so the operator can tell them apart.
459
+ if (!last) {
178
460
  return {
179
- kind: "rejected",
461
+ kind: "missing",
180
462
  reason: "review output did not contain a REVIEW: verdict line",
181
463
  };
182
464
  }
183
- if (match[1] === "APPROVED")
465
+ if (last[1] === "APPROVED")
184
466
  return { kind: "approved", reason: "" };
185
467
  return {
186
468
  kind: "rejected",
187
- reason: match[2]?.trim() || "no reason given",
469
+ reason: last[2]?.trim() || "no reason given",
188
470
  };
189
471
  }
190
472
  function stringifyResult(result) {
@@ -192,6 +474,16 @@ function stringifyResult(result) {
192
474
  return result;
193
475
  if (result && typeof result === "object") {
194
476
  const r = result;
477
+ // VA-353: sandcastle's RunResult carries the combined agent
478
+ // output on `stdout`. Prefer it — falling through to
479
+ // JSON.stringify (the old behavior) replaces real newlines with
480
+ // `\n` escapes and breaks `^…$/m` line anchoring, which is the
481
+ // exact reason the reviewer's verdict was being silently dropped
482
+ // for issues like VA-312 tonight. The iterations/output
483
+ // fallbacks remain for back-compat with older shapes and inline
484
+ // test fixtures.
485
+ if (typeof r.stdout === "string" && r.stdout.length > 0)
486
+ return r.stdout;
195
487
  if (r.iterations?.length) {
196
488
  return r.iterations
197
489
  .map((i) => i.output ?? i.text ?? "")
@@ -214,7 +506,7 @@ function stringifyResult(result) {
214
506
  function dockerEnv(config) {
215
507
  const env = {};
216
508
  if (config.opServiceAccountToken) {
217
- env.OP_SERVICE_ACCOUNT_TOKEN = config.opServiceAccountToken;
509
+ env.OP_SERVICE_ACCOUNT_TOKEN = Redacted.value(config.opServiceAccountToken);
218
510
  }
219
511
  return env;
220
512
  }