npm - @valescoagency/runway - Versions diffs - 0.9.0 → 0.10.0 - Mend

@valescoagency/runway 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md +1 -1
package/dist/cli.js +1 -0
package/dist/commands/run.js +47 -0
package/dist/config.js +8 -0
package/dist/dashboard/otlp.js +16 -2
package/dist/dashboard/projector.js +12 -0
package/dist/dashboard/server.js +60 -4
package/dist/dashboard/storage.js +233 -17
package/dist/dashboard/views.js +18 -1
package/dist/finalize.js +34 -2
package/dist/git.js +170 -22
package/dist/implement.js +6 -0
package/dist/linear.js +35 -9
package/dist/orchestrator.js +99 -18
package/dist/prompts.js +40 -0
package/dist/review.js +32 -18
package/package.json +1 -1
package/prompts/implement.md +11 -0
package/prompts/review.md +48 -6

package/dist/review.js CHANGED Viewed

@@ -4,15 +4,21 @@ import { Effect } from "effect";
 import { captureCommitLog, captureDiff } from "./git.js";
 import { renderReviewPrompt } from "./prompts.js";
 import { dockerEnv, runSandcastle, stringifyResult, } from "./sandcastle.js";
-// VA-353: review verdict marker. Global flag because sandcastle
-// appends wrapper output ("Agent stopped", "Capturing session",
-// "Reached max iterations (1).", "Run complete: …") AFTER the agent's
-// final message — so the marker is rarely the last line. We scan
-// every line-start match and keep the LAST one, which is the most
-// recent agent verdict. Standalone-line: ^…$ with /m anchors prevent
-// mid-prose matches like "the reviewer should output REVIEW: APPROVED
-// when…".
-const REVIEW_VERDICT_RE = /^REVIEW:\s*(APPROVED|REJECTED)(?:\s+—\s+(.*))?$/gm;
+// VA-353 / VA-418: review verdict marker. Global flag because
+// sandcastle appends wrapper output ("Agent stopped", "Capturing
+// session", "Reached max iterations (1).", "Run complete: …") AFTER
+// the agent's final message — so the marker is rarely the last line.
+// We scan every line-start match and keep the LAST one, which is the
+// most recent agent verdict. Standalone-line: ^…$ with /m anchors
+// prevent mid-prose matches like "the reviewer should output REVIEW:
+// APPROVED when…".
+//
+// VA-418 extended the verdict alphabet from {APPROVED, REJECTED} to
+// {APPROVED, REJECTED, REJECTED-RETRY, REJECTED-HITL}. Plain
+// `REJECTED` (the legacy marker) still parses and maps to
+// `rejected-retry` so existing prompts continue to drive the retry
+// flow without an update.
+const REVIEW_VERDICT_RE = /^REVIEW:\s*(APPROVED|REJECTED-RETRY|REJECTED-HITL|REJECTED)(?:\s+—\s+(.*))?$/gm;
 /**
  * VA-353: parse the reviewer's final `REVIEW: APPROVED` /
  * `REVIEW: REJECTED — <reason>` marker. Scans the agent's combined
@@ -41,10 +47,14 @@ export function parseReviewVerdict(result) {
     }
     if (last[1] === "APPROVED")
         return { kind: "approved", reason: "" };
-    return {
-        kind: "rejected",
-        reason: last[2]?.trim() || "no reason given",
-    };
+    const reason = last[2]?.trim() || "no reason given";
+    // VA-418: REJECTED-HITL escalates immediately; REJECTED-RETRY (and
+    // the legacy bare REJECTED, for graceful degradation) flows into
+    // the bounded retry loop. Default to the more forgiving path on
+    // ambiguity — the retry budget caps the blast radius.
+    if (last[1] === "REJECTED-HITL")
+        return { kind: "rejected-hitl", reason };
+    return { kind: "rejected-retry", reason };
 }
 export const runReviewPass = (issue, deps, branch) => Effect.gen(function* () {
     const { config, cwd, baseBranch } = deps;
@@ -61,11 +71,15 @@ export const runReviewPass = (issue, deps, branch) => Effect.gen(function* () {
         name: `review-${issue.identifier}`,
     }).pipe(Effect.withSpan("review"));
     const verdict = parseReviewVerdict(reviewResult);
-    if (verdict.kind === "rejected") {
-        return {
-            kind: "hitl",
-            reason: `Sub-agent review rejected: ${verdict.reason}`,
-        };
+    // VA-418: split rejection into two outcomes so the composer can
+    // route mechanically-fixable rejections back to the impl agent
+    // (within the configured retry budget) and only escalate to HITL
+    // when the reviewer explicitly asks for human judgment.
+    if (verdict.kind === "rejected-retry") {
+        return { kind: "rejected-retry", reason: verdict.reason };
+    }
+    if (verdict.kind === "rejected-hitl") {
+        return { kind: "rejected-hitl", reason: verdict.reason };
     }
     if (verdict.kind === "missing") {
         // VA-360: a review pass that didn't emit any marker is

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@valescoagency/runway",
-  "version": "0.9.0",
+  "version": "0.10.0",
   "description": "Linear-driven orchestrator + scaffolder for coding agents on Sandcastle. `runway init` scaffolds a target repo (sandcastle + varlock + 1Password); `runway run` drains a Linear queue against it; `runway doctor`, `runway upgrade`, `runway upgrade-repo` round out the lifecycle.",
   "license": "MIT",
   "author": {

package/prompts/implement.md CHANGED Viewed

@@ -8,6 +8,8 @@ You are an autonomous coding agent working on a single Linear issue.
 {{PRIOR_REVIEW_FEEDBACK}}
+{{IN_RUN_REVIEWER_FEEDBACK}}
 {{PREVIOUS_ITERATIONS}}
 # Repository context
@@ -22,6 +24,15 @@ that addresses every blocker in that feedback, replacing the existing
 commits if they would re-introduce a rejected design. Do not treat
 pre-existing commits as authoritative.
+If a section titled `Reviewer feedback from this run` is present
+above, the previous iteration of this drain produced commits that
+review rejected. Every blocker in that section is from review of YOUR
+work this run — treat it as the highest-priority signal and address
+each one before signaling `IMPL: DONE` again. In-run feedback ranks
+above cross-drain *Review feedback from prior attempts*; the latter
+is guidance from earlier runs, the former is the reviewer's verdict
+on the diff you just produced.
 # What done looks like
 1. Code changes that satisfy the issue body.

package/prompts/review.md CHANGED Viewed

@@ -34,12 +34,54 @@ Score the change against these axes. For each, give a brief verdict
 # Output format
-End your response with EXACTLY one of these two lines, alone, no other
-text on the line:
+End your response with EXACTLY one of these three lines, alone, no
+other text on the line:
     REVIEW: APPROVED
-    REVIEW: REJECTED — <one-line reason>
+    REVIEW: REJECTED-RETRY — <one-line, actionable fix>
+    REVIEW: REJECTED-HITL  — <one-line, needs human judgment>
-If you output `REVIEW: REJECTED`, the agent will get one more iteration
-to address your concerns. Be specific about what to fix. Don't reject
-for nits.
+Pick the rejection marker that fits how the fix should be applied:
+## REVIEW: REJECTED-RETRY — mechanically fixable
+Use this when the rejection is something the impl agent can address
+in another iteration without human judgment. The fix is concrete:
+add code, delete code, swap a function, normalize a value, gate a
+URL scheme, tighten a regex, update a doc to match the implementation,
+add the missing test, etc. The reviewer's one-line reason should
+**name the fix**, not just the symptom. Runway will hand your reason
+back to the impl agent and re-run review.
+Examples:
+    REVIEW: REJECTED-RETRY — add a URL scheme guard before rendering `<a href={url}>` so `javascript:` URIs cannot be injected
+    REVIEW: REJECTED-RETRY — normalize both timestamps to SQLite ISO-8601 format before the lexical comparison in `isFresh()`
+    REVIEW: REJECTED-RETRY — remove the back-compat shim for `RUNWAY_LEGACY_LABEL`; the env var was removed in 0.7.0
+    REVIEW: REJECTED-RETRY — update `docs/api.md` to match the new `processIssue` return shape (`{ kind, detail }`, not `{ status, message }`)
+    REVIEW: REJECTED-RETRY — add a test covering the empty-comment-list branch of `formatPriorFeedback`
+## REVIEW: REJECTED-HITL — needs human judgment
+Use this when the rejection requires a decision the agent cannot
+reasonably make on its own. Wrong architectural direction;
+contradiction between the issue body and a spec the diff is supposed
+to follow; ambiguity about which of two valid behaviors is wanted;
+the diff implements a different feature than the issue describes;
+the issue itself is underspecified in a way that no rewrite can
+satisfy. Runway will escalate the issue to a human and not run any
+more impl attempts in this drain.
+Examples:
+    REVIEW: REJECTED-HITL — diff replaces `LinearGateway` with a REST shim, but the issue scope is read-only telemetry; this needs a product call on whether to keep the SDK
+    REVIEW: REJECTED-HITL — issue body says "default to `Todo`", `.runway/policy.yml` says "default to `Backlog`"; both contradict and the resolution isn't obvious from the codebase
+    REVIEW: REJECTED-HITL — the AC for "atomic" claim/transition is ambiguous between optimistic-locking and a single GraphQL mutation; please pick one before retrying
+    REVIEW: REJECTED-HITL — diff adds a new `ConfigTag` layer with no migration path for downstream consumers; needs a deprecation policy call
+    REVIEW: REJECTED-HITL — the change deletes the rate limiter without a replacement; this affects every Linear caller and needs explicit operator sign-off
+If your output ends with `REVIEW: REJECTED — <reason>` (the legacy
+bare marker, no suffix), runway treats it as `REJECTED-RETRY` so the
+retry loop fires. Prefer the explicit suffix.
+Be specific about what to fix. Don't reject for nits.