@valescoagency/runway 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +109 -9
- package/dist/commands/doctor.js +282 -6
- package/dist/commands/init.js +32 -6
- package/dist/commands/run.js +37 -5
- package/dist/commands/upgrade-repo.js +42 -14
- package/dist/config.js +18 -1
- package/dist/git.js +41 -0
- package/dist/github.js +2 -2
- package/dist/linear.js +41 -0
- package/dist/orchestrator.js +262 -57
- package/dist/policy.js +76 -0
- package/dist/prompts.js +44 -1
- package/package.json +10 -3
- package/prompts/implement.md +46 -2
- package/templates/.env.schema.target-repo +8 -1
- package/templates/Dockerfile.claude-code.base +24 -0
package/dist/orchestrator.js
CHANGED
|
@@ -3,8 +3,25 @@ import { join } from "node:path";
|
|
|
3
3
|
import { run, claudeCode } from "@ai-hero/sandcastle";
|
|
4
4
|
import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
|
|
5
5
|
import { execa } from "execa";
|
|
6
|
-
import { implementVars, loadImplementPrompt, loadReviewPrompt, renderPrompt, reviewVars, } from "./prompts.js";
|
|
7
|
-
|
|
6
|
+
import { buildIterationSummary, implementVars, loadImplementPrompt, loadReviewPrompt, renderPrompt, reviewVars, tailOfMessage, } from "./prompts.js";
|
|
7
|
+
import { detectBaseBranch } from "./git.js";
|
|
8
|
+
import { loadPolicy } from "./policy.js";
|
|
9
|
+
// VA-353: review verdict marker. Global flag because sandcastle
|
|
10
|
+
// appends wrapper output ("Agent stopped", "Capturing session",
|
|
11
|
+
// "Reached max iterations (1).", "Run complete: …") AFTER the agent's
|
|
12
|
+
// final message — so the marker is rarely the last line. We scan
|
|
13
|
+
// every line-start match and keep the LAST one, which is the most
|
|
14
|
+
// recent agent verdict. Standalone-line: ^…$ with /m anchors prevent
|
|
15
|
+
// mid-prose matches like "the reviewer should output REVIEW: APPROVED
|
|
16
|
+
// when…".
|
|
17
|
+
const REVIEW_VERDICT_RE = /^REVIEW:\s*(APPROVED|REJECTED)(?:\s+—\s+(.*))?$/gm;
|
|
18
|
+
// VA-350: impl-pass termination contract. Last `IMPL:` marker line in
|
|
19
|
+
// the agent's output wins (most recent iteration's verdict). DONE →
|
|
20
|
+
// proceed to review; BLOCKED → HITL with reason; CONTINUE or missing →
|
|
21
|
+
// fall through (back-compat). The trailing reason after `—` is
|
|
22
|
+
// captured for BLOCKED.
|
|
23
|
+
const IMPL_VERDICT_RE = /^IMPL:\s*(DONE|BLOCKED|CONTINUE)(?:\s+—\s+(.*))?$/gm;
|
|
24
|
+
const IMPL_COMPLETION_SIGNALS = ["IMPL: DONE", "IMPL: BLOCKED"];
|
|
8
25
|
/**
|
|
9
26
|
* Confirms the cwd looks like a sandcastle-initialised repo. If not,
|
|
10
27
|
* we error early with a clear message rather than letting Sandcastle
|
|
@@ -23,22 +40,42 @@ export function assertSandcastleInitialised(cwd) {
|
|
|
23
40
|
export async function drainQueue(deps, opts = {}) {
|
|
24
41
|
const { config, linear } = deps;
|
|
25
42
|
const max = opts.max ?? Number.POSITIVE_INFINITY;
|
|
26
|
-
let
|
|
43
|
+
let attempts = 0;
|
|
27
44
|
let opened = 0;
|
|
28
45
|
let hitl = 0;
|
|
29
46
|
let errored = 0;
|
|
30
|
-
|
|
47
|
+
// Resolve the base branch once at startup so every issue in the
|
|
48
|
+
// drain sees the same answer (and so a misconfigured repo fails
|
|
49
|
+
// fast, before we touch any Linear state).
|
|
50
|
+
const baseBranch = config.baseBranch ?? (await detectBaseBranch(deps.cwd));
|
|
51
|
+
console.log(`[runway] base branch resolved to "${baseBranch}"`);
|
|
52
|
+
const policy = loadPolicy(deps.cwd, { allowPathsOverride: opts.allowPaths });
|
|
53
|
+
console.log(`[runway] policy: ${policy.source}`);
|
|
54
|
+
const runDeps = { ...deps, baseBranch, policy };
|
|
55
|
+
// VA-344: never re-pick an issue in the same invocation, even if
|
|
56
|
+
// VA-342 reverted it to `Todo`. Without this, a deterministic
|
|
57
|
+
// startup failure (broken .env.schema, missing image, expired token)
|
|
58
|
+
// would loop on the same issue until --max was exhausted.
|
|
59
|
+
const seen = new Set();
|
|
60
|
+
const outcomes = [];
|
|
61
|
+
while (attempts < max) {
|
|
31
62
|
const queue = await linear.fetchReady();
|
|
32
|
-
|
|
63
|
+
const issue = queue.find((i) => !seen.has(i.id));
|
|
64
|
+
if (!issue)
|
|
33
65
|
break;
|
|
34
|
-
|
|
66
|
+
seen.add(issue.id);
|
|
67
|
+
attempts += 1;
|
|
35
68
|
try {
|
|
36
|
-
const
|
|
37
|
-
|
|
38
|
-
if (verdict === "opened")
|
|
69
|
+
const result = await processIssue(issue, runDeps);
|
|
70
|
+
if (result.kind === "opened")
|
|
39
71
|
opened += 1;
|
|
40
|
-
if (
|
|
72
|
+
if (result.kind === "hitl")
|
|
41
73
|
hitl += 1;
|
|
74
|
+
outcomes.push({
|
|
75
|
+
identifier: issue.identifier,
|
|
76
|
+
kind: result.kind,
|
|
77
|
+
detail: result.detail,
|
|
78
|
+
});
|
|
42
79
|
}
|
|
43
80
|
catch (err) {
|
|
44
81
|
errored += 1;
|
|
@@ -46,56 +83,113 @@ export async function drainQueue(deps, opts = {}) {
|
|
|
46
83
|
// If the agent crashed before producing any commits (missing
|
|
47
84
|
// image, varlock validation, container failed to boot, etc.),
|
|
48
85
|
// it's an infrastructure failure — not a HITL. Revert the issue
|
|
49
|
-
// to
|
|
50
|
-
//
|
|
51
|
-
//
|
|
86
|
+
// to the ready state and skip the HITL label so the next run can
|
|
87
|
+
// pick it up cleanly. `In Progress` is reserved for "agent has
|
|
88
|
+
// committed to the branch".
|
|
52
89
|
const branch = `agent/${issue.identifier.toLowerCase()}`;
|
|
53
|
-
const startedRealWork = await hasCommits(deps.cwd, branch);
|
|
90
|
+
const startedRealWork = await hasCommits(deps.cwd, baseBranch, branch);
|
|
91
|
+
const errDetail = err instanceof Error ? err.message : String(err);
|
|
54
92
|
if (!startedRealWork) {
|
|
55
93
|
await linear
|
|
56
94
|
.transition(issue.id, config.readyStatus)
|
|
57
95
|
.catch(() => undefined);
|
|
58
96
|
await linear
|
|
59
|
-
.comment(issue.id, `Runway hit a startup failure before the agent produced any commits — reverting to \`${config.readyStatus}\` for retry:\n\n\`\`\`\n${
|
|
97
|
+
.comment(issue.id, `Runway hit a startup failure before the agent produced any commits — reverting to \`${config.readyStatus}\` for retry:\n\n\`\`\`\n${errDetail}\n\`\`\``)
|
|
60
98
|
.catch(() => undefined);
|
|
99
|
+
outcomes.push({
|
|
100
|
+
identifier: issue.identifier,
|
|
101
|
+
kind: "reverted",
|
|
102
|
+
detail: errDetail,
|
|
103
|
+
});
|
|
61
104
|
}
|
|
62
105
|
else {
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
106
|
+
// VA-355: comment first with the substantive reason, label
|
|
107
|
+
// second (best-effort). If we labeled first and the label
|
|
108
|
+
// didn't exist (Flightplan workspaces hitting the
|
|
109
|
+
// `needs-human` default — see VA-354), the orchestrator's
|
|
110
|
+
// catch would never get to the reason and the operator would
|
|
111
|
+
// see an infrastructure error in Linear with no clue what
|
|
112
|
+
// the agent actually found.
|
|
113
|
+
await flagHitl(issue, runDeps, `Runway hit an unrecoverable error and flagged for human review: ${errDetail}`);
|
|
114
|
+
outcomes.push({
|
|
115
|
+
identifier: issue.identifier,
|
|
116
|
+
kind: "errored",
|
|
117
|
+
detail: errDetail,
|
|
118
|
+
});
|
|
69
119
|
}
|
|
70
120
|
}
|
|
71
121
|
}
|
|
72
|
-
|
|
122
|
+
printExitSummary(outcomes);
|
|
123
|
+
return { attempts, opened, hitl, errored, outcomes };
|
|
73
124
|
}
|
|
74
125
|
async function processIssue(issue, deps) {
|
|
75
|
-
const { config, linear, github, cwd } = deps;
|
|
126
|
+
const { config, linear, github, cwd, baseBranch } = deps;
|
|
76
127
|
const branch = `agent/${issue.identifier.toLowerCase()}`;
|
|
77
128
|
await linear.transition(issue.id, config.inProgressStatus);
|
|
78
129
|
await linear.comment(issue.id, `Runway picked up this issue. Branch: \`${branch}\`.`);
|
|
79
130
|
// 1. Implementation pass.
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
131
|
+
//
|
|
132
|
+
// VA-349 + VA-350: run iterations one at a time so we can (a) inject
|
|
133
|
+
// a summary of the previous iteration into the next prompt — no more
|
|
134
|
+
// "I'll start by understanding the current state of the repository"
|
|
135
|
+
// 5x per issue — and (b) break early on IMPL: DONE/BLOCKED parsed
|
|
136
|
+
// from our own code rather than relying on sandcastle's substring
|
|
137
|
+
// completionSignal.
|
|
138
|
+
const implementTemplate = await loadImplementPrompt();
|
|
139
|
+
const maxIters = Math.max(1, config.maxIterations);
|
|
140
|
+
let prevSummary = "";
|
|
141
|
+
let implementResult;
|
|
142
|
+
let implVerdict = { kind: "missing" };
|
|
143
|
+
for (let iter = 1; iter <= maxIters; iter += 1) {
|
|
144
|
+
const implementPrompt = renderPrompt(implementTemplate, implementVars(issue, {
|
|
145
|
+
previousIterations: prevSummary,
|
|
146
|
+
policy: deps.policy,
|
|
147
|
+
}));
|
|
148
|
+
implementResult = await run({
|
|
149
|
+
agent: claudeCode("claude-opus-4-6"),
|
|
150
|
+
sandbox: docker({
|
|
151
|
+
env: dockerEnv(config),
|
|
152
|
+
}),
|
|
153
|
+
cwd,
|
|
154
|
+
prompt: implementPrompt,
|
|
155
|
+
branchStrategy: { type: "branch", branch },
|
|
156
|
+
maxIterations: 1,
|
|
157
|
+
completionSignal: [...IMPL_COMPLETION_SIGNALS],
|
|
158
|
+
name: `impl-${issue.identifier}-iter-${iter}`,
|
|
159
|
+
});
|
|
160
|
+
implVerdict = parseImplVerdict(implementResult);
|
|
161
|
+
if (implVerdict.kind === "done" || implVerdict.kind === "blocked")
|
|
162
|
+
break;
|
|
163
|
+
// CONTINUE / missing — build the summary that the NEXT iteration
|
|
164
|
+
// will see at the top of its prompt.
|
|
165
|
+
const commits = await captureCommitLog(cwd, baseBranch, branch).catch(() => "");
|
|
166
|
+
prevSummary = buildIterationSummary({
|
|
167
|
+
iterationsRun: iter,
|
|
168
|
+
commits,
|
|
169
|
+
finalMessageTail: tailOfMessage(implementResult.stdout ?? ""),
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
// implementResult is set after the first iteration. The `!` is safe
|
|
173
|
+
// because maxIters >= 1.
|
|
174
|
+
const finalResult = implementResult;
|
|
175
|
+
// VA-350: BLOCKED short-circuits straight to HITL — no reviewer pass
|
|
176
|
+
// for a self-declared blocker.
|
|
177
|
+
if (implVerdict.kind === "blocked") {
|
|
178
|
+
const reason = `Implementation pass blocked: ${implVerdict.reason}`;
|
|
179
|
+
await flagHitl(issue, deps, reason);
|
|
180
|
+
return { kind: "hitl", detail: reason };
|
|
181
|
+
}
|
|
182
|
+
if (implVerdict.kind === "missing") {
|
|
183
|
+
console.warn(`[runway] ${issue.identifier}: impl agent ended without an IMPL: marker after ${maxIters} iteration(s); proceeding to review for back-compat.`);
|
|
184
|
+
}
|
|
185
|
+
if (finalResult.commits.length === 0) {
|
|
186
|
+
const reason = "Agent produced no commits — the issue may need clarification or human input.";
|
|
187
|
+
await flagHitl(issue, deps, reason);
|
|
188
|
+
return { kind: "hitl", detail: reason };
|
|
95
189
|
}
|
|
96
190
|
// 2. Review pass — read-only-ish, just looking at the diff.
|
|
97
|
-
const diff = await captureDiff(cwd, branch);
|
|
98
|
-
const commitLog = await captureCommitLog(cwd, branch);
|
|
191
|
+
const diff = await captureDiff(cwd, baseBranch, branch);
|
|
192
|
+
const commitLog = await captureCommitLog(cwd, baseBranch, branch);
|
|
99
193
|
const reviewPrompt = renderPrompt(await loadReviewPrompt(), reviewVars({ issue, diff, commits: commitLog }));
|
|
100
194
|
const reviewResult = await run({
|
|
101
195
|
agent: claudeCode("claude-opus-4-6"),
|
|
@@ -110,8 +204,9 @@ async function processIssue(issue, deps) {
|
|
|
110
204
|
});
|
|
111
205
|
const verdict = parseReviewVerdict(reviewResult);
|
|
112
206
|
if (verdict.kind === "rejected") {
|
|
113
|
-
|
|
114
|
-
|
|
207
|
+
const reason = `Sub-agent review rejected: ${verdict.reason}`;
|
|
208
|
+
await flagHitl(issue, deps, reason);
|
|
209
|
+
return { kind: "hitl", detail: reason };
|
|
115
210
|
}
|
|
116
211
|
// 3. Push + PR.
|
|
117
212
|
await github.pushBranch(cwd, branch);
|
|
@@ -119,64 +214,165 @@ async function processIssue(issue, deps) {
|
|
|
119
214
|
const prUrl = await github.openPullRequest({
|
|
120
215
|
repoPath: cwd,
|
|
121
216
|
branch,
|
|
217
|
+
base: baseBranch,
|
|
122
218
|
issue,
|
|
123
219
|
body: prBody,
|
|
124
220
|
});
|
|
125
221
|
await linear.transition(issue.id, config.inReviewStatus);
|
|
126
222
|
await linear.comment(issue.id, `Runway opened a PR for review: ${prUrl}`);
|
|
127
|
-
return "opened";
|
|
223
|
+
return { kind: "opened", detail: prUrl };
|
|
128
224
|
}
|
|
225
|
+
/**
|
|
226
|
+
* VA-355: comment is the load-bearing artifact, label is metadata.
|
|
227
|
+
* Post the comment FIRST so the substantive reason lands on the issue
|
|
228
|
+
* even if the label apply later fails (Flightplan workspaces hitting
|
|
229
|
+
* the `needs-human` default, transient Linear errors, etc.). On full
|
|
230
|
+
* failure (comment didn't even post), dump the reason to stderr with
|
|
231
|
+
* a clear banner so the operator sees it terminal-side.
|
|
232
|
+
*/
|
|
129
233
|
async function flagHitl(issue, deps, reason) {
|
|
130
234
|
const { config, linear } = deps;
|
|
131
|
-
|
|
132
|
-
|
|
235
|
+
const body = `Runway flagged for human review: ${reason}`;
|
|
236
|
+
let commentPosted = false;
|
|
237
|
+
try {
|
|
238
|
+
await linear.comment(issue.id, body);
|
|
239
|
+
commentPosted = true;
|
|
240
|
+
}
|
|
241
|
+
catch (err) {
|
|
242
|
+
console.error(`[runway] ${issue.identifier}: failed to post HITL comment:`, errMsg(err));
|
|
243
|
+
}
|
|
244
|
+
try {
|
|
245
|
+
await linear.applyLabel(issue.id, config.hitlLabel);
|
|
246
|
+
}
|
|
247
|
+
catch (err) {
|
|
248
|
+
const detail = errMsg(err);
|
|
249
|
+
console.error(`[runway] ${issue.identifier}: failed to apply HITL label "${config.hitlLabel}":`, detail);
|
|
250
|
+
if (commentPosted) {
|
|
251
|
+
// Best-effort follow-up note; the real reason is already on the
|
|
252
|
+
// issue from the first comment.
|
|
253
|
+
await linear
|
|
254
|
+
.comment(issue.id, `Note: could not apply \`${config.hitlLabel}\` label — please apply it manually. (${detail})`)
|
|
255
|
+
.catch(() => undefined);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
if (!commentPosted) {
|
|
259
|
+
// Last resort: the operator at least sees the reason in their
|
|
260
|
+
// terminal, even with Linear entirely unreachable.
|
|
261
|
+
process.stderr.write([
|
|
262
|
+
"",
|
|
263
|
+
`===== REJECTION REASON FOLLOWS (${issue.identifier}) =====`,
|
|
264
|
+
reason,
|
|
265
|
+
"===== END REJECTION REASON =====",
|
|
266
|
+
"",
|
|
267
|
+
"",
|
|
268
|
+
].join("\n"));
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
function errMsg(err) {
|
|
272
|
+
if (err instanceof Error)
|
|
273
|
+
return err.message.split("\n")[0] ?? err.message;
|
|
274
|
+
return String(err);
|
|
275
|
+
}
|
|
276
|
+
/**
|
|
277
|
+
* VA-355: render a per-issue verdict trail at the end of the drain so
|
|
278
|
+
* the operator can scan results without opening Linear. Skipped when
|
|
279
|
+
* no issues were attempted.
|
|
280
|
+
*/
|
|
281
|
+
function printExitSummary(outcomes) {
|
|
282
|
+
if (outcomes.length === 0)
|
|
283
|
+
return;
|
|
284
|
+
console.log("\n[runway] per-issue outcomes:");
|
|
285
|
+
for (const o of outcomes) {
|
|
286
|
+
const tag = o.kind === "opened"
|
|
287
|
+
? "APPROVED → PR opened"
|
|
288
|
+
: o.kind === "hitl"
|
|
289
|
+
? "HITL"
|
|
290
|
+
: o.kind === "reverted"
|
|
291
|
+
? "REVERTED → Todo"
|
|
292
|
+
: "INFRA_ERROR";
|
|
293
|
+
console.log(` ${o.identifier} ${tag} ${o.detail}`);
|
|
294
|
+
}
|
|
133
295
|
}
|
|
134
296
|
/**
|
|
135
|
-
* Whether the agent branch has any commits beyond `
|
|
297
|
+
* Whether the agent branch has any commits beyond `base`. Used by the
|
|
136
298
|
* drain loop to distinguish "agent crashed mid-run, after producing
|
|
137
299
|
* real work" (→ HITL) from "agent crashed during startup, no work
|
|
138
300
|
* done" (→ revert to Todo). If the branch doesn't exist or git fails,
|
|
139
301
|
* treat as "no commits" so we revert rather than strand the issue.
|
|
140
302
|
*/
|
|
141
|
-
async function hasCommits(repoPath, branch) {
|
|
303
|
+
async function hasCommits(repoPath, base, branch) {
|
|
142
304
|
try {
|
|
143
|
-
const { stdout } = await execa("git", ["rev-list", "--count",
|
|
305
|
+
const { stdout } = await execa("git", ["rev-list", "--count", `${base}..${branch}`], { cwd: repoPath, reject: false });
|
|
144
306
|
return Number.parseInt(stdout.trim(), 10) > 0;
|
|
145
307
|
}
|
|
146
308
|
catch {
|
|
147
309
|
return false;
|
|
148
310
|
}
|
|
149
311
|
}
|
|
150
|
-
async function captureDiff(repoPath, branch) {
|
|
151
|
-
const { stdout } = await execa("git", ["diff",
|
|
312
|
+
async function captureDiff(repoPath, base, branch) {
|
|
313
|
+
const { stdout } = await execa("git", ["diff", `${base}...${branch}`], {
|
|
152
314
|
cwd: repoPath,
|
|
153
315
|
});
|
|
154
316
|
// Truncate to keep the review prompt under the model's context budget.
|
|
155
317
|
return stdout.length > 60_000 ? `${stdout.slice(0, 60_000)}\n…(truncated)` : stdout;
|
|
156
318
|
}
|
|
157
|
-
async function captureCommitLog(repoPath, branch) {
|
|
158
|
-
const { stdout } = await execa("git", ["log", "--oneline",
|
|
319
|
+
async function captureCommitLog(repoPath, base, branch) {
|
|
320
|
+
const { stdout } = await execa("git", ["log", "--oneline", `${base}..${branch}`], { cwd: repoPath });
|
|
159
321
|
return stdout;
|
|
160
322
|
}
|
|
323
|
+
/**
|
|
324
|
+
* Pulls the last `IMPL:` marker line out of the agent's output. The
|
|
325
|
+
* orchestrator uses this to distinguish a clean completion (DONE) from
|
|
326
|
+
* a self-declared block (BLOCKED — reason) from a multi-iteration
|
|
327
|
+
* in-progress signal (CONTINUE). A missing marker is treated as
|
|
328
|
+
* CONTINUE-with-warning for back-compat.
|
|
329
|
+
*/
|
|
330
|
+
export function parseImplVerdict(result) {
|
|
331
|
+
const text = stringifyResult(result);
|
|
332
|
+
// Take the LAST match — later iterations override earlier ones if
|
|
333
|
+
// the agent emitted multiple markers across an iteration loop.
|
|
334
|
+
const matches = [...text.matchAll(IMPL_VERDICT_RE)];
|
|
335
|
+
const last = matches[matches.length - 1];
|
|
336
|
+
if (!last)
|
|
337
|
+
return { kind: "missing" };
|
|
338
|
+
if (last[1] === "DONE")
|
|
339
|
+
return { kind: "done" };
|
|
340
|
+
if (last[1] === "CONTINUE")
|
|
341
|
+
return { kind: "continue" };
|
|
342
|
+
return {
|
|
343
|
+
kind: "blocked",
|
|
344
|
+
reason: last[2]?.trim() || "no reason given",
|
|
345
|
+
};
|
|
346
|
+
}
|
|
161
347
|
/**
|
|
162
348
|
* Sandcastle's `RunResult` shape varies by version; defensively dig out
|
|
163
349
|
* the last assistant message text. We only need to match the
|
|
164
350
|
* `REVIEW: APPROVED` / `REVIEW: REJECTED — …` line at the tail.
|
|
165
351
|
*/
|
|
166
|
-
|
|
352
|
+
/**
|
|
353
|
+
* VA-353: parse the reviewer's final `REVIEW: APPROVED` /
|
|
354
|
+
* `REVIEW: REJECTED — <reason>` marker. Scans the agent's combined
|
|
355
|
+
* stdout for *all* matches and returns the LAST one, since sandcastle
|
|
356
|
+
* appends its own wrapper output ("Agent stopped", "Capturing
|
|
357
|
+
* session", "Reached max iterations (N).", "Run complete: …") after
|
|
358
|
+
* the agent's final message. A missing marker is itself a rejection —
|
|
359
|
+
* a reviewer pass that didn't terminate cleanly is not trustworthy.
|
|
360
|
+
*/
|
|
361
|
+
export function parseReviewVerdict(result) {
|
|
167
362
|
const text = stringifyResult(result);
|
|
168
|
-
const
|
|
169
|
-
|
|
363
|
+
const matches = [...text.matchAll(REVIEW_VERDICT_RE)];
|
|
364
|
+
const last = matches[matches.length - 1];
|
|
365
|
+
if (!last) {
|
|
170
366
|
return {
|
|
171
367
|
kind: "rejected",
|
|
172
368
|
reason: "review output did not contain a REVIEW: verdict line",
|
|
173
369
|
};
|
|
174
370
|
}
|
|
175
|
-
if (
|
|
371
|
+
if (last[1] === "APPROVED")
|
|
176
372
|
return { kind: "approved", reason: "" };
|
|
177
373
|
return {
|
|
178
374
|
kind: "rejected",
|
|
179
|
-
reason:
|
|
375
|
+
reason: last[2]?.trim() || "no reason given",
|
|
180
376
|
};
|
|
181
377
|
}
|
|
182
378
|
function stringifyResult(result) {
|
|
@@ -184,6 +380,15 @@ function stringifyResult(result) {
|
|
|
184
380
|
return result;
|
|
185
381
|
if (result && typeof result === "object") {
|
|
186
382
|
const r = result;
|
|
383
|
+
// VA-353: sandcastle's RunResult carries the combined agent output
|
|
384
|
+
// on `stdout`. Prefer it — falling through to JSON.stringify (the
|
|
385
|
+
// old behavior) replaces real newlines with `\n` escapes and
|
|
386
|
+
// breaks `^…$/m` line anchoring, which is the exact reason the
|
|
387
|
+
// reviewer's verdict was being silently dropped for issues like
|
|
388
|
+
// VA-312 tonight. The iterations/output fallbacks remain for
|
|
389
|
+
// back-compat with older shapes and inline test fixtures.
|
|
390
|
+
if (typeof r.stdout === "string" && r.stdout.length > 0)
|
|
391
|
+
return r.stdout;
|
|
187
392
|
if (r.iterations?.length) {
|
|
188
393
|
return r.iterations
|
|
189
394
|
.map((i) => i.output ?? i.text ?? "")
|
package/dist/policy.js
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { parse as parseYaml } from "yaml";
|
|
4
|
+
import { z } from "zod";
|
|
5
|
+
/**
|
|
6
|
+
* VA-352: per-repo + per-run write-path policy for the impl agent.
|
|
7
|
+
*
|
|
8
|
+
* Defaults are conservative — secrets and sandbox-internals are always
|
|
9
|
+
* denied. Repos that need agents to touch CI workflows (the common
|
|
10
|
+
* case) opt in by creating `.runway/policy.yml` with `allowedPaths`,
|
|
11
|
+
* or by passing `--allow-paths=` for a single invocation.
|
|
12
|
+
*
|
|
13
|
+
* The policy is reflected back to the agent in the rendered prompt
|
|
14
|
+
* (`prompts/implement.md`'s "Working style" denylist sentence) so the
|
|
15
|
+
* sentence the agent sees matches what runway will enforce at review
|
|
16
|
+
* time. Enforcement itself (refusing to push a PR that touches a
|
|
17
|
+
* denied path) lives in the reviewer pass — out of scope for this
|
|
18
|
+
* change; the goal here is that the agent gets a correct denylist
|
|
19
|
+
* and surfaces `IMPL: BLOCKED` when an AC requires a denied path.
|
|
20
|
+
*/
|
|
21
|
+
export const DEFAULT_FORBIDDEN_PATHS = [
|
|
22
|
+
".github/workflows/**",
|
|
23
|
+
".env*",
|
|
24
|
+
"*.pem",
|
|
25
|
+
"*.key",
|
|
26
|
+
"pnpm-lock.yaml",
|
|
27
|
+
".sandcastle/**",
|
|
28
|
+
];
|
|
29
|
+
const PolicyFileSchema = z.object({
|
|
30
|
+
allowedPaths: z.array(z.string()).optional(),
|
|
31
|
+
forbiddenPaths: z.array(z.string()).optional(),
|
|
32
|
+
});
|
|
33
|
+
const POLICY_RELATIVE_PATH = join(".runway", "policy.yml");
|
|
34
|
+
/**
|
|
35
|
+
* Resolve the effective policy for `cwd`. Reads `.runway/policy.yml`
|
|
36
|
+
* when present, layers it on top of the conservative defaults, then
|
|
37
|
+
* applies any `--allow-paths` CLI override.
|
|
38
|
+
*/
|
|
39
|
+
export function loadPolicy(cwd, opts = {}) {
|
|
40
|
+
const sources = [];
|
|
41
|
+
let forbidden = new Set(DEFAULT_FORBIDDEN_PATHS);
|
|
42
|
+
const policyPath = join(cwd, POLICY_RELATIVE_PATH);
|
|
43
|
+
if (existsSync(policyPath)) {
|
|
44
|
+
sources.push(POLICY_RELATIVE_PATH);
|
|
45
|
+
const raw = readFileSync(policyPath, "utf8");
|
|
46
|
+
const parsed = PolicyFileSchema.parse(parseYaml(raw) ?? {});
|
|
47
|
+
if (parsed.forbiddenPaths) {
|
|
48
|
+
forbidden = new Set(parsed.forbiddenPaths);
|
|
49
|
+
}
|
|
50
|
+
for (const allow of parsed.allowedPaths ?? [])
|
|
51
|
+
forbidden.delete(allow);
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
sources.push("defaults");
|
|
55
|
+
}
|
|
56
|
+
if (opts.allowPathsOverride?.length) {
|
|
57
|
+
for (const allow of opts.allowPathsOverride)
|
|
58
|
+
forbidden.delete(allow);
|
|
59
|
+
sources.push("--allow-paths");
|
|
60
|
+
}
|
|
61
|
+
return {
|
|
62
|
+
forbiddenPaths: [...forbidden],
|
|
63
|
+
source: sources.join(" + "),
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Render the bullet sentence the impl prompt shows the agent. Stable
|
|
68
|
+
* formatting so a missing path is visible in a diff.
|
|
69
|
+
*/
|
|
70
|
+
export function renderForbiddenPathsBullet(policy) {
|
|
71
|
+
if (policy.forbiddenPaths.length === 0) {
|
|
72
|
+
return "- (No write-path restrictions for this repo. Use judgment.)";
|
|
73
|
+
}
|
|
74
|
+
const quoted = policy.forbiddenPaths.map((p) => `\`${p}\``).join(", ");
|
|
75
|
+
return `- Never modify ${quoted}. If the issue's acceptance criteria require modifying one of these paths, **stop and emit \`IMPL: BLOCKED — issue requires modifying <path>, which working-style policy forbids\`** — do not silently skip the work.`;
|
|
76
|
+
}
|
package/dist/prompts.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { readFile } from "node:fs/promises";
|
|
2
2
|
import { fileURLToPath } from "node:url";
|
|
3
3
|
import { dirname, join } from "node:path";
|
|
4
|
+
import { renderForbiddenPathsBullet } from "./policy.js";
|
|
4
5
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
5
6
|
// Prompts ship with the runway package, NOT in the target repo's
|
|
6
7
|
// .sandcastle/. Runway substitutes {{KEY}} placeholders before passing
|
|
@@ -22,13 +23,55 @@ export async function loadReviewPrompt() {
|
|
|
22
23
|
export function renderPrompt(template, vars) {
|
|
23
24
|
return template.replace(/\{\{(\w+)\}\}/g, (_, k) => vars[k] ?? `{{${k}}}`);
|
|
24
25
|
}
|
|
25
|
-
export function implementVars(issue) {
|
|
26
|
+
export function implementVars(issue, opts = {}) {
|
|
26
27
|
return {
|
|
27
28
|
ISSUE_IDENTIFIER: issue.identifier,
|
|
28
29
|
ISSUE_TITLE: issue.title,
|
|
29
30
|
ISSUE_DESCRIPTION: issue.description || "(no description)",
|
|
31
|
+
// VA-349: empty for iteration 1, a structured summary for 2+.
|
|
32
|
+
PREVIOUS_ITERATIONS: opts.previousIterations ?? "",
|
|
33
|
+
// VA-352: render the working-style denylist from the active policy
|
|
34
|
+
// so the agent never sees a hardcoded list that diverges from what
|
|
35
|
+
// runway actually enforces.
|
|
36
|
+
POLICY_FORBIDDEN_BULLET: opts.policy
|
|
37
|
+
? renderForbiddenPathsBullet(opts.policy)
|
|
38
|
+
: "",
|
|
30
39
|
};
|
|
31
40
|
}
|
|
41
|
+
/**
|
|
42
|
+
* VA-349: build the "## Previous iterations" block that gets prepended
|
|
43
|
+
* to iteration N+1's prompt. Carries the agent's commit log and the
|
|
44
|
+
* tail of its final message so the next iteration doesn't re-explore
|
|
45
|
+
* the repo from scratch.
|
|
46
|
+
*/
|
|
47
|
+
export function buildIterationSummary(args) {
|
|
48
|
+
const { iterationsRun, commits, finalMessageTail } = args;
|
|
49
|
+
return [
|
|
50
|
+
"## Previous iterations",
|
|
51
|
+
"",
|
|
52
|
+
`You have already completed ${iterationsRun} iteration(s) on this issue.`,
|
|
53
|
+
"Do **not** re-explore the repository — pick up where the last iteration left off.",
|
|
54
|
+
"",
|
|
55
|
+
"### Commits so far on this branch",
|
|
56
|
+
"",
|
|
57
|
+
"```",
|
|
58
|
+
commits.trim() || "(no commits yet)",
|
|
59
|
+
"```",
|
|
60
|
+
"",
|
|
61
|
+
"### Tail of the last iteration's final message",
|
|
62
|
+
"",
|
|
63
|
+
"```",
|
|
64
|
+
finalMessageTail.trim() || "(no output captured)",
|
|
65
|
+
"```",
|
|
66
|
+
"",
|
|
67
|
+
].join("\n");
|
|
68
|
+
}
|
|
69
|
+
/** Keep the tail of an iteration's stdout small enough to fit alongside the prompt. */
|
|
70
|
+
export function tailOfMessage(stdout, maxChars = 2000) {
|
|
71
|
+
if (stdout.length <= maxChars)
|
|
72
|
+
return stdout;
|
|
73
|
+
return `…(earlier output truncated)\n${stdout.slice(-maxChars)}`;
|
|
74
|
+
}
|
|
32
75
|
export function reviewVars(args) {
|
|
33
76
|
return {
|
|
34
77
|
ISSUE_IDENTIFIER: args.issue.identifier,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@valescoagency/runway",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "Linear-driven orchestrator + scaffolder for coding agents on Sandcastle. `runway init` scaffolds a target repo (sandcastle + varlock + 1Password); `runway run` drains a Linear queue against it; `runway doctor`, `runway upgrade`, `runway upgrade-repo` round out the lifecycle.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": {
|
|
@@ -42,12 +42,17 @@
|
|
|
42
42
|
"@ai-hero/sandcastle": "^0.5.10",
|
|
43
43
|
"@linear/sdk": "^41.0.0",
|
|
44
44
|
"execa": "^9.5.2",
|
|
45
|
+
"yaml": "^2.9.0",
|
|
45
46
|
"zod": "^3.23.8"
|
|
46
47
|
},
|
|
47
48
|
"devDependencies": {
|
|
49
|
+
"@commitlint/cli": "^21.0.0",
|
|
50
|
+
"@commitlint/config-conventional": "^21.0.0",
|
|
48
51
|
"@types/node": "^22.10.0",
|
|
52
|
+
"lefthook": "^2.1.6",
|
|
49
53
|
"tsx": "^4.19.2",
|
|
50
|
-
"typescript": "^5.7.2"
|
|
54
|
+
"typescript": "^5.7.2",
|
|
55
|
+
"vitest": "^4.1.5"
|
|
51
56
|
},
|
|
52
57
|
"engines": {
|
|
53
58
|
"node": ">=22"
|
|
@@ -56,9 +61,11 @@
|
|
|
56
61
|
"access": "public"
|
|
57
62
|
},
|
|
58
63
|
"scripts": {
|
|
59
|
-
"build": "tsc && chmod +x dist/cli.js",
|
|
64
|
+
"build": "tsc -p tsconfig.build.json && chmod +x dist/cli.js",
|
|
60
65
|
"typecheck": "tsc --noEmit",
|
|
61
66
|
"dev": "tsx src/cli.ts",
|
|
67
|
+
"test": "vitest run",
|
|
68
|
+
"test:watch": "vitest",
|
|
62
69
|
"lint": "echo 'lint not configured yet'"
|
|
63
70
|
}
|
|
64
71
|
}
|