@valescoagency/runway 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/review.js CHANGED
@@ -4,15 +4,21 @@ import { Effect } from "effect";
4
4
  import { captureCommitLog, captureDiff } from "./git.js";
5
5
  import { renderReviewPrompt } from "./prompts.js";
6
6
  import { dockerEnv, runSandcastle, stringifyResult, } from "./sandcastle.js";
7
- // VA-353: review verdict marker. Global flag because sandcastle
8
- // appends wrapper output ("Agent stopped", "Capturing session",
9
- // "Reached max iterations (1).", "Run complete: …") AFTER the agent's
10
- // final message — so the marker is rarely the last line. We scan
11
- // every line-start match and keep the LAST one, which is the most
12
- // recent agent verdict. Standalone-line: ^…$ with /m anchors prevent
13
- // mid-prose matches like "the reviewer should output REVIEW: APPROVED
14
- // when…".
15
- const REVIEW_VERDICT_RE = /^REVIEW:\s*(APPROVED|REJECTED)(?:\s+—\s+(.*))?$/gm;
7
+ // VA-353 / VA-418: review verdict marker. Global flag because
8
+ // sandcastle appends wrapper output ("Agent stopped", "Capturing
9
+ // session", "Reached max iterations (1).", "Run complete: …") AFTER
10
+ // the agent's final message — so the marker is rarely the last line.
11
+ // We scan every line-start match and keep the LAST one, which is the
12
+ // most recent agent verdict. Standalone-line: ^…$ with /m anchors
13
+ // prevent mid-prose matches like "the reviewer should output REVIEW:
14
+ // APPROVED when…".
15
+ //
16
+ // VA-418 extended the verdict alphabet from {APPROVED, REJECTED} to
17
+ // {APPROVED, REJECTED, REJECTED-RETRY, REJECTED-HITL}. Plain
18
+ // `REJECTED` (the legacy marker) still parses and maps to
19
+ // `rejected-retry` so existing prompts continue to drive the retry
20
+ // flow without an update.
21
+ const REVIEW_VERDICT_RE = /^REVIEW:\s*(APPROVED|REJECTED-RETRY|REJECTED-HITL|REJECTED)(?:\s+—\s+(.*))?$/gm;
16
22
  /**
17
23
  * VA-353: parse the reviewer's final `REVIEW: APPROVED` /
18
24
  * `REVIEW: REJECTED — <reason>` marker. Scans the agent's combined
@@ -41,10 +47,14 @@ export function parseReviewVerdict(result) {
41
47
  }
42
48
  if (last[1] === "APPROVED")
43
49
  return { kind: "approved", reason: "" };
44
- return {
45
- kind: "rejected",
46
- reason: last[2]?.trim() || "no reason given",
47
- };
50
+ const reason = last[2]?.trim() || "no reason given";
51
+ // VA-418: REJECTED-HITL escalates immediately; REJECTED-RETRY (and
52
+ // the legacy bare REJECTED, for graceful degradation) flows into
53
+ // the bounded retry loop. Default to the more forgiving path on
54
+ // ambiguity — the retry budget caps the blast radius.
55
+ if (last[1] === "REJECTED-HITL")
56
+ return { kind: "rejected-hitl", reason };
57
+ return { kind: "rejected-retry", reason };
48
58
  }
49
59
  export const runReviewPass = (issue, deps, branch) => Effect.gen(function* () {
50
60
  const { config, cwd, baseBranch } = deps;
@@ -61,11 +71,15 @@ export const runReviewPass = (issue, deps, branch) => Effect.gen(function* () {
61
71
  name: `review-${issue.identifier}`,
62
72
  }).pipe(Effect.withSpan("review"));
63
73
  const verdict = parseReviewVerdict(reviewResult);
64
- if (verdict.kind === "rejected") {
65
- return {
66
- kind: "hitl",
67
- reason: `Sub-agent review rejected: ${verdict.reason}`,
68
- };
74
+ // VA-418: split rejection into two outcomes so the composer can
75
+ // route mechanically-fixable rejections back to the impl agent
76
+ // (within the configured retry budget) and only escalate to HITL
77
+ // when the reviewer explicitly asks for human judgment.
78
+ if (verdict.kind === "rejected-retry") {
79
+ return { kind: "rejected-retry", reason: verdict.reason };
80
+ }
81
+ if (verdict.kind === "rejected-hitl") {
82
+ return { kind: "rejected-hitl", reason: verdict.reason };
69
83
  }
70
84
  if (verdict.kind === "missing") {
71
85
  // VA-360: a review pass that didn't emit any marker is
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@valescoagency/runway",
3
- "version": "0.9.0",
3
+ "version": "0.10.0",
4
4
  "description": "Linear-driven orchestrator + scaffolder for coding agents on Sandcastle. `runway init` scaffolds a target repo (sandcastle + varlock + 1Password); `runway run` drains a Linear queue against it; `runway doctor`, `runway upgrade`, `runway upgrade-repo` round out the lifecycle.",
5
5
  "license": "MIT",
6
6
  "author": {
@@ -8,6 +8,8 @@ You are an autonomous coding agent working on a single Linear issue.
8
8
 
9
9
  {{PRIOR_REVIEW_FEEDBACK}}
10
10
 
11
+ {{IN_RUN_REVIEWER_FEEDBACK}}
12
+
11
13
  {{PREVIOUS_ITERATIONS}}
12
14
 
13
15
  # Repository context
@@ -22,6 +24,15 @@ that addresses every blocker in that feedback, replacing the existing
22
24
  commits if they would re-introduce a rejected design. Do not treat
23
25
  pre-existing commits as authoritative.
24
26
 
27
+ If a section titled `Reviewer feedback from this run` is present
28
+ above, the previous iteration of this drain produced commits that
29
+ review rejected. Every blocker in that section is from review of YOUR
30
+ work this run — treat it as the highest-priority signal and address
31
+ each one before signaling `IMPL: DONE` again. In-run feedback ranks
32
+ above cross-drain *Review feedback from prior attempts*; the latter
33
+ is guidance from earlier runs, the former is the reviewer's verdict
34
+ on the diff you just produced.
35
+
25
36
  # What done looks like
26
37
 
27
38
  1. Code changes that satisfy the issue body.
package/prompts/review.md CHANGED
@@ -34,12 +34,54 @@ Score the change against these axes. For each, give a brief verdict
34
34
 
35
35
  # Output format
36
36
 
37
- End your response with EXACTLY one of these two lines, alone, no other
38
- text on the line:
37
+ End your response with EXACTLY one of these three lines, alone, no
38
+ other text on the line:
39
39
 
40
40
  REVIEW: APPROVED
41
- REVIEW: REJECTED — <one-line reason>
41
+ REVIEW: REJECTED-RETRY — <one-line, actionable fix>
42
+ REVIEW: REJECTED-HITL — <one-line, needs human judgment>
42
43
 
43
- If you output `REVIEW: REJECTED`, the agent will get one more iteration
44
- to address your concerns. Be specific about what to fix. Don't reject
45
- for nits.
44
+ Pick the rejection marker that fits how the fix should be applied:
45
+
46
+ ## REVIEW: REJECTED-RETRY — mechanically fixable
47
+
48
+ Use this when the rejection is something the impl agent can address
49
+ in another iteration without human judgment. The fix is concrete:
50
+ add code, delete code, swap a function, normalize a value, gate a
51
+ URL scheme, tighten a regex, update a doc to match the implementation,
52
+ add the missing test, etc. The reviewer's one-line reason should
53
+ **name the fix**, not just the symptom. Runway will hand your reason
54
+ back to the impl agent and re-run review.
55
+
56
+ Examples:
57
+
58
+ REVIEW: REJECTED-RETRY — add a URL scheme guard before rendering `<a href={url}>` so `javascript:` URIs cannot be injected
59
+ REVIEW: REJECTED-RETRY — normalize both timestamps to SQLite ISO-8601 format before the lexical comparison in `isFresh()`
60
+ REVIEW: REJECTED-RETRY — remove the back-compat shim for `RUNWAY_LEGACY_LABEL`; the env var was removed in 0.7.0
61
+ REVIEW: REJECTED-RETRY — update `docs/api.md` to match the new `processIssue` return shape (`{ kind, detail }`, not `{ status, message }`)
62
+ REVIEW: REJECTED-RETRY — add a test covering the empty-comment-list branch of `formatPriorFeedback`
63
+
64
+ ## REVIEW: REJECTED-HITL — needs human judgment
65
+
66
+ Use this when the rejection requires a decision the agent cannot
67
+ reasonably make on its own. Wrong architectural direction;
68
+ contradiction between the issue body and a spec the diff is supposed
69
+ to follow; ambiguity about which of two valid behaviors is wanted;
70
+ the diff implements a different feature than the issue describes;
71
+ the issue itself is underspecified in a way that no rewrite can
72
+ satisfy. Runway will escalate the issue to a human and not run any
73
+ more impl attempts in this drain.
74
+
75
+ Examples:
76
+
77
+ REVIEW: REJECTED-HITL — diff replaces `LinearGateway` with a REST shim, but the issue scope is read-only telemetry; this needs a product call on whether to keep the SDK
78
+ REVIEW: REJECTED-HITL — issue body says "default to `Todo`", `.runway/policy.yml` says "default to `Backlog`"; both contradict and the resolution isn't obvious from the codebase
79
+ REVIEW: REJECTED-HITL — the AC for "atomic" claim/transition is ambiguous between optimistic-locking and a single GraphQL mutation; please pick one before retrying
80
+ REVIEW: REJECTED-HITL — diff adds a new `ConfigTag` layer with no migration path for downstream consumers; needs a deprecation policy call
81
+ REVIEW: REJECTED-HITL — the change deletes the rate limiter without a replacement; this affects every Linear caller and needs explicit operator sign-off
82
+
83
+ If your output ends with `REVIEW: REJECTED — <reason>` (the legacy
84
+ bare marker, no suffix), runway treats it as `REJECTED-RETRY` so the
85
+ retry loop fires. Prefer the explicit suffix.
86
+
87
+ Be specific about what to fix. Don't reject for nits.