npm - @valescoagency/runway - Versions diffs - 0.6.0 → 0.7.1 - Mend

@valescoagency/runway 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -172,6 +172,14 @@ export LINEAR_API_KEY=lin_api_...
 # export RUNWAY_IN_REVIEW_STATUS="In Review"
 # export RUNWAY_HITL_LABEL="ready-for-human"
 # export RUNWAY_MAX_ITERATIONS=5
+# export RUNWAY_COMMENT_AUTHOR_ALLOWLIST="Reviewer Bot,Jane Reviewer"
+#   optional, comma-separated Linear user names whose comments on a
+#   re-queued issue surface as "Review feedback from prior attempts"
+#   in the implement prompt. Defaults to the Linear user the API key
+#   authenticates as (which is both the reviewer-agent's identity
+#   and, in the solo-dev case, the repo owner). Set this only when
+#   the reviewer agent and the repo owner are split across distinct
+#   Linear identities.
 ```
 `RUNWAY_HITL_LABEL` defaults to `ready-for-human`, matching the
@@ -384,7 +392,7 @@ These are tractable, just not v1.
 ## Status
-0.6.0 — production-shaped and dogfooded against live Linear queues.
+0.7.1 — production-shaped and dogfooded against live Linear queues.
 The end-to-end pipeline (init → run → review → PR) is stable; surface
 may still shift as the orchestrator's policy and iteration mechanics
 mature. See [CHANGELOG.md](./CHANGELOG.md) for per-release detail.

package/dist/config.js CHANGED Viewed

@@ -20,6 +20,7 @@ const configEffect = EConfig.all({
         message: "RUNWAY_MAX_ITERATIONS must be a positive integer",
         validation: (n) => n > 0,
     })),
+    commentAuthorAllowlist: EConfig.option(EConfig.string("RUNWAY_COMMENT_AUTHOR_ALLOWLIST")),
 }).pipe(Effect.map((raw) => ({
     linearApiKey: raw.linearApiKey,
     opServiceAccountToken: Option.getOrUndefined(raw.opServiceAccountToken),
@@ -31,6 +32,10 @@ const configEffect = EConfig.all({
     inReviewStatus: raw.inReviewStatus,
     hitlLabel: raw.hitlLabel,
     maxIterations: raw.maxIterations,
+    commentAuthorAllowlist: Option.getOrUndefined(raw.commentAuthorAllowlist)
+        ?.split(",")
+        .map((s) => s.trim())
+        .filter(Boolean),
 })));
 /**
  * VA-359: Context tag for the resolved RunwayConfig. Provided by

package/dist/implement.js CHANGED Viewed

@@ -44,7 +44,7 @@ export function parseImplVerdict(result) {
  * completionSignal.
  */
 export const runImplementLoop = (issue, deps, branch) => Effect.gen(function* () {
-    const { config, cwd, baseBranch, policy } = deps;
+    const { config, cwd, baseBranch, policy, priorFeedback } = deps;
     const maxIters = Math.max(1, config.maxIterations);
     let prevSummary = "";
     let implementResult;
@@ -54,6 +54,12 @@ export const runImplementLoop = (issue, deps, branch) => Effect.gen(function* ()
             issue,
             policy,
             previousIterations: prevSummary,
+            // VA-383: surface prior-attempt review feedback on every
+            // iteration of this run — the rejection blockers that put
+            // the issue back to Todo are equally relevant on iteration 1
+            // and iteration N+1, since the implementer keeps drifting
+            // toward the same code paths until corrected.
+            priorReviewFeedback: priorFeedback,
         }));
         implementResult = yield* runSandcastle({
             agent: claudeCode("claude-opus-4-7"),

package/dist/linear.js CHANGED Viewed

@@ -26,11 +26,92 @@ const IssueLabelNodeSchema = Schema.Struct({
 const ProjectNodeSchema = Schema.Struct({
     id: Schema.String,
 });
+// VA-394: minimal shape needed to ask "is this blocker still active?"
+// — we only read the relation's `type` (filtering for `blocks`) and
+// the related issue's workflow state `type`. Schemas stay narrow so
+// the validation cost per candidate stays small.
+const IssueRelationNodeSchema = Schema.Struct({
+    id: Schema.String,
+    type: Schema.String,
+});
+const WorkflowStateTypeNodeSchema = Schema.Struct({
+    type: Schema.String,
+});
+// VA-383: comments fetched for surfacing prior-attempt review feedback
+// to the implementer. `user` is nullable on Linear's side (system /
+// integration comments can lack an author); we map a missing user to
+// the empty string so downstream filters can match by name without a
+// null-guard at every call site.
+const CommentNodeSchema = Schema.Struct({
+    id: Schema.String,
+    body: Schema.String,
+    createdAt: Schema.Union(Schema.String, Schema.DateFromSelf),
+    user: Schema.NullOr(Schema.Struct({
+        id: Schema.String,
+        name: Schema.String,
+    })),
+});
+const ViewerSchema = Schema.Struct({
+    id: Schema.String,
+    name: Schema.String,
+});
 const decodeIssueNode = Schema.decodeUnknownSync(IssueNodeSchema);
 const decodeWorkflowStateNode = Schema.decodeUnknownSync(WorkflowStateNodeSchema);
 const decodeTeamNode = Schema.decodeUnknownSync(TeamNodeSchema);
 const decodeIssueLabelNode = Schema.decodeUnknownSync(IssueLabelNodeSchema);
 const decodeProjectNode = Schema.decodeUnknownSync(ProjectNodeSchema);
+const decodeCommentNode = Schema.decodeUnknownSync(CommentNodeSchema);
+const decodeViewer = Schema.decodeUnknownSync(ViewerSchema);
+const decodeIssueRelationNode = Schema.decodeUnknownSync(IssueRelationNodeSchema);
+const decodeWorkflowStateTypeNode = Schema.decodeUnknownSync(WorkflowStateTypeNodeSchema);
+// VA-394: Linear workflow state `type` values that mean "the blocker
+// has resolved." Anything else (`triage`, `backlog`, `unstarted`,
+// `started`) still gates the blocked issue from runway pickup.
+const TERMINAL_STATE_TYPES = new Set(["completed", "canceled"]);
+const isTerminalStateType = (type) => TERMINAL_STATE_TYPES.has(type);
+/**
+ * VA-394: returns true when `issue` carries `hitlLabel`. Runway writes
+ * this label on HITL escapes, and triage may apply it to flag
+ * human-only work — either way the contract is "do not pick up." Uses
+ * the SDK's chained `labels()` call rather than asking for labels
+ * inline at the candidate-fetch site so the schema for `IssueNode`
+ * stays narrow.
+ */
+async function hasHitlLabel(issue, hitlLabel) {
+    const labels = await issue.labels();
+    for (const raw of labels.nodes) {
+        if (decodeIssueLabelNode(raw).name === hitlLabel)
+            return true;
+    }
+    return false;
+}
+/**
+ * VA-394: returns true when `issue` has at least one `inverseRelations`
+ * record of type `blocks` whose blocker is in a non-terminal workflow
+ * state. Other relation types (`duplicate`, `related`) do not gate
+ * pickup. A blocker with a missing or undecodable state is treated as
+ * inactive (best-effort: drift in one relation should not stop the
+ * queue), but the decoder still throws `ParseError` on outright
+ * malformed relation nodes — caught upstream as `LinearSchemaError`.
+ */
+async function hasActiveBlocker(issue) {
+    const relations = await issue.inverseRelations();
+    for (const rawRel of relations.nodes) {
+        const rel = decodeIssueRelationNode(rawRel);
+        if (rel.type !== "blocks")
+            continue;
+        const blocker = await rawRel.issue;
+        if (!blocker)
+            continue;
+        const blockerState = await blocker.state;
+        if (!blockerState)
+            continue;
+        const stateType = decodeWorkflowStateTypeNode(blockerState).type;
+        if (!isTerminalStateType(stateType))
+            return true;
+    }
+    return false;
+}
 export class LinearNotFound extends Data.TaggedError("LinearNotFound") {
 }
 export class LinearUnauthorized extends Data.TaggedError("LinearUnauthorized") {
@@ -219,15 +300,30 @@ export function createLinearGateway(config, limiter = null) {
                     // VA-360: validate every issue node through the schema —
                     // a single drifted issue surfaces as `LinearSchemaError`
                     // instead of a downstream `cannot read property X`.
-                    return issues.nodes.map((raw) => {
+                    //
+                    // VA-394: per-candidate eligibility — skip issues carrying
+                    // `config.hitlLabel` (triage's "human-only" marker, also
+                    // applied by runway itself on prior HITL escapes), and
+                    // skip issues with at least one active `blocks` relation
+                    // pointing at them. The candidate set is the Todo queue,
+                    // typically a handful of issues; the N+1 SDK calls per
+                    // candidate go through the rate limiter and the retry
+                    // policy alongside everything else.
+                    const eligible = [];
+                    for (const raw of issues.nodes) {
                         const i = decodeIssueNode(raw);
-                        return {
+                        if (await hasHitlLabel(raw, config.hitlLabel))
+                            continue;
+                        if (await hasActiveBlocker(raw))
+                            continue;
+                        eligible.push({
                             id: i.id,
                             identifier: i.identifier,
                             title: i.title,
                             description: i.description ?? "",
-                        };
-                    });
+                        });
+                    }
+                    return eligible;
                 },
                 catch: (err) => classifyLinearError(err, "fetchReady"),
             }), { call: "fetchReady" }));
@@ -293,6 +389,42 @@ export function createLinearGateway(config, limiter = null) {
                 catch: (err) => classifyLinearError(err, "comment"),
             }), { call: "comment" }));
         },
+        fetchComments(issueId) {
+            return gate(applyLinearPolicy(Effect.tryPromise({
+                try: async () => {
+                    // Linear SDK paginates by default — use the issue-scoped
+                    // accessor and let the SDK's first page suffice for the
+                    // common case (Runway's HITL + reviewer flow rarely
+                    // produces more than a handful per attempt). If issues
+                    // start blowing past the page limit, fold in pagination.
+                    const issue = await client.issue(issueId);
+                    const comments = await issue.comments();
+                    const decoded = comments.nodes.map((raw) => {
+                        const c = decodeCommentNode(raw);
+                        const createdAt = c.createdAt instanceof Date ? c.createdAt : new Date(c.createdAt);
+                        return {
+                            id: c.id,
+                            author: c.user?.name ?? "",
+                            body: c.body,
+                            createdAt,
+                        };
+                    });
+                    // VA-383: ascending by createdAt so the impl agent reads
+                    // feedback in the order it was given.
+                    return [...decoded].sort((a, b) => a.createdAt.getTime() - b.createdAt.getTime());
+                },
+                catch: (err) => classifyLinearError(err, "fetchComments"),
+            }), { call: "fetchComments" }));
+        },
+        viewer() {
+            return gate(applyLinearPolicy(Effect.tryPromise({
+                try: async () => {
+                    const v = await client.viewer;
+                    return decodeViewer({ id: v.id, name: v.name });
+                },
+                catch: (err) => classifyLinearError(err, "viewer"),
+            }), { call: "viewer" }));
+        },
     };
 }
 export async function validateLinearConfig(config) {

package/dist/orchestrator.js CHANGED Viewed

@@ -5,6 +5,7 @@ import { detectBaseBranch, pruneStaleAgentBranch } from "./git.js";
 import { loadPolicy } from "./policy.js";
 import { flagHitl, handleProcessFailure } from "./hitl.js";
 import { runImplementLoop } from "./implement.js";
+import { formatPriorFeedback } from "./prompts.js";
 import { runReviewPass } from "./review.js";
 import { finalize } from "./finalize.js";
 // Re-exports so existing callers (commands/run.ts) and tests
@@ -58,10 +59,24 @@ export const drainQueue = (deps, opts = {}) => Effect.gen(function* () {
     yield* Effect.logInfo("base branch resolved").pipe(Effect.annotateLogs({ baseBranch: baseBranchResolved }));
     const policy = loadPolicy(deps.cwd, { allowPathsOverride: opts.allowPaths });
     yield* Effect.logInfo(`policy source: ${policy.source}`);
+    // VA-383: resolve the Linear viewer once per drain to seed the
+    // default comment-author allowlist. If the operator has provided
+    // an explicit allowlist, skip the lookup. If the lookup fails,
+    // fall back to an empty allowlist — `formatPriorFeedback` will
+    // emit nothing and the run proceeds with description-only
+    // context (matching VA-383's "never blocks the run" AC).
+    const commentAuthorAllowlist = config
+        .commentAuthorAllowlist?.length
+        ? config.commentAuthorAllowlist
+        : yield* linear.viewer().pipe(Effect.map((v) => [v.name]), Effect.catchAll((err) => Effect.logWarning(`VA-383: viewer() failed; prior-attempt feedback will be empty (${err.message ?? String(err)})`).pipe(Effect.as([]))));
+    yield* Effect.logInfo(`comment author allowlist: ${commentAuthorAllowlist.length === 0
+        ? "(empty — prior-feedback surfacing disabled)"
+        : commentAuthorAllowlist.join(", ")}`);
     const runDeps = {
         ...deps,
         baseBranch: baseBranchResolved,
         policy,
+        commentAuthorAllowlist,
     };
     // VA-344: never re-pick an issue in the same invocation, even if
     // VA-342 reverted it to `Todo`. Without this, a deterministic
@@ -133,8 +148,17 @@ export const drainQueue = (deps, opts = {}) => Effect.gen(function* () {
  * Linear side-effects so the phases stay independently testable.
  */
 const processIssue = (issue, deps) => Effect.gen(function* () {
-    const { config, linear } = deps;
+    const { config, linear, commentAuthorAllowlist } = deps;
     const branch = `agent/${issue.identifier.toLowerCase()}`;
+    // VA-383: fetch prior comments BEFORE posting "Runway picked up
+    // this issue" so we don't see our own bookkeeping. The allowlist
+    // resolved at drain startup filters to reviewer/repo-owner
+    // identities; `formatPriorFeedback` additionally strips known
+    // runway bookkeeping prefixes. A fetch failure logs a warning and
+    // the run proceeds with empty feedback (never blocks).
+    const priorFeedback = yield* (commentAuthorAllowlist.length === 0
+        ? Effect.succeed("")
+        : linear.fetchComments(issue.id).pipe(Effect.map((cs) => formatPriorFeedback(cs, commentAuthorAllowlist)), Effect.catchAll((err) => Effect.logWarning(`${issue.identifier}: fetchComments failed; proceeding with description-only context (${err.message ?? String(err)})`).pipe(Effect.as("")))));
     yield* linear.transition(issue.id, config.inProgressStatus);
     yield* linear.comment(issue.id, `Runway picked up this issue. Branch: \`${branch}\`.`);
     // VA-366: if a prior attempt left an `agent/<id>` branch behind
@@ -143,7 +167,7 @@ const processIssue = (issue, deps) => Effect.gen(function* () {
     // current `baseBranch` HEAD. Preserves the branch when it carries
     // real work. Best-effort — failures here don't abort the issue.
     yield* pruneStaleAgentBranch(deps.cwd, deps.baseBranch, branch).pipe(Effect.catchAll((err) => Effect.logWarning(`${issue.identifier}: pruneStaleAgentBranch failed (continuing): ${err.message}`)));
-    const impl = yield* runImplementLoop(issue, deps, branch);
+    const impl = yield* runImplementLoop(issue, { ...deps, priorFeedback }, branch);
     if (impl.kind === "hitl") {
         yield* flagHitl(issue, deps, impl.reason);
         return { kind: "hitl", detail: impl.reason };

package/dist/prompts.js CHANGED Viewed

@@ -54,6 +54,7 @@ function implementVars(args) {
         ISSUE_TITLE: args.issue.title,
         ISSUE_DESCRIPTION: args.issue.description || "(no description)",
         PREVIOUS_ITERATIONS: args.previousIterations,
+        PRIOR_REVIEW_FEEDBACK: args.priorReviewFeedback,
         POLICY_FORBIDDEN_BULLET: renderForbiddenPathsBullet(args.policy),
     };
 }
@@ -66,6 +67,67 @@ function reviewVars(args) {
         COMMITS: args.commits || "(no commits)",
     };
 }
+/**
+ * VA-383: known orchestrator-emitted comment prefixes that are
+ * bookkeeping noise, not feedback the impl agent should learn from.
+ * Filtering by author alone is insufficient because runway and the
+ * reviewer both post under the API-key owner's identity — so
+ * "Runway picked up this issue. Branch: …" would otherwise leak into
+ * the prior-feedback block on every retry.
+ *
+ * Intentionally does NOT include `Runway flagged for human review:` —
+ * that comment carries the reviewer's rejection reason, which is the
+ * load-bearing signal VA-383 exists to surface.
+ */
+const RUNWAY_BOOKKEEPING_PREFIXES = [
+    "Runway picked up this issue",
+    "Runway opened a PR for review:",
+    "Runway hit a startup failure",
+    "Note: could not apply",
+];
+const isRunwayBookkeeping = (body) => RUNWAY_BOOKKEEPING_PREFIXES.some((p) => body.startsWith(p));
+/**
+ * VA-383: filter + format the comment set fetched off an issue into
+ * the "Review feedback from prior attempts" block the implement
+ * prompt renders. Pure, side-effect-free, no Linear deps — the
+ * orchestrator owns I/O, this owns the shape.
+ *
+ * Comments survive the filter when **all** of:
+ *   1. author name is in `allowlist` (case-sensitive exact match)
+ *   2. body does NOT begin with a known runway bookkeeping prefix
+ *      (orchestrator transitions, PR-opened announcements, etc.)
+ *
+ * Returns the empty string when nothing makes it through — the
+ * `{{PRIOR_REVIEW_FEEDBACK}}` slot expands to empty and the section
+ * disappears from the rendered prompt entirely.
+ */
+export function formatPriorFeedback(comments, allowlist) {
+    const allowed = new Set(allowlist);
+    const surviving = comments.filter((c) => allowed.has(c.author) && !isRunwayBookkeeping(c.body));
+    if (surviving.length === 0)
+        return "";
+    // Already sorted ascending by createdAt at the gateway boundary —
+    // re-sort here defensively in case a caller hands us an unsorted
+    // array (the test fixture, for instance, asserts ordering explicitly).
+    const ordered = [...surviving].sort((a, b) => a.createdAt.getTime() - b.createdAt.getTime());
+    const entries = ordered.map((c) => {
+        const when = c.createdAt.toISOString();
+        return `### ${when} — ${c.author}\n\n${c.body.trim()}`;
+    });
+    return [
+        "# Review feedback from prior attempts",
+        "",
+        "Prior runway attempts on this issue were rejected. Each block below is a",
+        "review comment to internalize **before** redoing the work — these are the",
+        "specific blockers that caused the previous attempt to be rejected. The",
+        "issue description above is the spec; the comments here are augmenting",
+        "guidance. If the two contradict, prefer the spec and call out the conflict",
+        "in your final message.",
+        "",
+        entries.join("\n\n"),
+        "",
+    ].join("\n");
+}
 /**
  * VA-352 (absorbed from policy.ts in VA-361): render the bullet
  * sentence the impl prompt shows the agent. Stable formatting so a

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@valescoagency/runway",
-  "version": "0.6.0",
+  "version": "0.7.1",
   "description": "Linear-driven orchestrator + scaffolder for coding agents on Sandcastle. `runway init` scaffolds a target repo (sandcastle + varlock + 1Password); `runway run` drains a Linear queue against it; `runway doctor`, `runway upgrade`, `runway upgrade-repo` round out the lifecycle.",
   "license": "MIT",
   "author": {
@@ -41,26 +41,26 @@
   "dependencies": {
     "@ai-hero/sandcastle": "^0.5.10",
     "@effect/opentelemetry": "^0.63.0",
-    "@linear/sdk": "^41.0.0",
+    "@linear/sdk": "^84.0.0",
     "@opentelemetry/api": "^1.9.1",
     "@opentelemetry/exporter-trace-otlp-http": "^0.217.0",
     "@opentelemetry/resources": "^2.7.1",
     "@opentelemetry/sdk-trace-base": "^2.7.1",
     "@opentelemetry/sdk-trace-node": "^2.7.1",
-    "@opentelemetry/semantic-conventions": "^1.40.0",
+    "@opentelemetry/semantic-conventions": "^1.41.1",
     "effect": "^3.21.2",
-    "execa": "^9.5.2",
+    "execa": "^9.6.1",
     "yaml": "^2.9.0",
-    "zod": "^3.23.8"
+    "zod": "^4.4.3"
   },
   "devDependencies": {
-    "@commitlint/cli": "^21.0.0",
-    "@commitlint/config-conventional": "^21.0.0",
-    "@types/node": "^22.10.0",
+    "@commitlint/cli": "^21.0.1",
+    "@commitlint/config-conventional": "^21.0.1",
+    "@types/node": "^25.7.0",
     "lefthook": "^2.1.6",
-    "tsx": "^4.19.2",
-    "typescript": "^5.7.2",
-    "vitest": "^4.1.5"
+    "tsx": "^4.21.0",
+    "typescript": "^6.0.3",
+    "vitest": "^4.1.6"
   },
   "engines": {
     "node": ">=22"

package/prompts/implement.md CHANGED Viewed

@@ -6,6 +6,8 @@ You are an autonomous coding agent working on a single Linear issue.
 {{ISSUE_DESCRIPTION}}
+{{PRIOR_REVIEW_FEEDBACK}}
 {{PREVIOUS_ITERATIONS}}
 # Repository context