@valescoagency/runway 0.9.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli.js +1 -0
- package/dist/commands/run.js +64 -2
- package/dist/config.js +8 -0
- package/dist/dashboard/otlp.js +16 -2
- package/dist/dashboard/projector.js +12 -0
- package/dist/dashboard/server.js +60 -4
- package/dist/dashboard/storage.js +233 -17
- package/dist/dashboard/views.js +18 -1
- package/dist/finalize.js +34 -2
- package/dist/git.js +192 -22
- package/dist/implement.js +6 -0
- package/dist/linear.js +75 -16
- package/dist/orchestrator.js +99 -18
- package/dist/prompts.js +40 -0
- package/dist/review.js +32 -18
- package/package.json +1 -1
- package/prompts/implement.md +11 -0
- package/prompts/review.md +48 -6
package/README.md
CHANGED
|
@@ -392,7 +392,7 @@ These are tractable, just not v1.
|
|
|
392
392
|
|
|
393
393
|
## Status
|
|
394
394
|
|
|
395
|
-
0.
|
|
395
|
+
0.10.1 — production-shaped and dogfooded against live Linear queues.
|
|
396
396
|
The end-to-end pipeline (init → run → review → PR) is stable; surface
|
|
397
397
|
may still shift as the orchestrator's policy and iteration mechanics
|
|
398
398
|
mature. See [CHANGELOG.md](./CHANGELOG.md) for per-release detail.
|
package/dist/cli.js
CHANGED
package/dist/commands/run.js
CHANGED
|
@@ -2,6 +2,7 @@ import { Effect, Layer, Logger, RateLimiter } from "effect";
|
|
|
2
2
|
import { ConfigLive, ConfigTag } from "../config.js";
|
|
3
3
|
import { createLinearGateway } from "../linear.js";
|
|
4
4
|
import { createGithubGateway } from "../github.js";
|
|
5
|
+
import { remoteRefExists } from "../git.js";
|
|
5
6
|
import { assertSandcastleInitialised, drainQueue, } from "../orchestrator.js";
|
|
6
7
|
import { TelemetryLive } from "../telemetry.js";
|
|
7
8
|
export function parseRunArgs(argv) {
|
|
@@ -68,6 +69,25 @@ export function parseRunArgs(argv) {
|
|
|
68
69
|
}
|
|
69
70
|
opts.implTurns = n;
|
|
70
71
|
}
|
|
72
|
+
else if (a === "--review-retries") {
|
|
73
|
+
const v = argv[i + 1];
|
|
74
|
+
if (!v)
|
|
75
|
+
throw new Error("--review-retries requires a number");
|
|
76
|
+
const n = Number.parseInt(v, 10);
|
|
77
|
+
if (!Number.isFinite(n) || n < 0) {
|
|
78
|
+
throw new Error(`--review-retries must be a non-negative integer, got "${v}"`);
|
|
79
|
+
}
|
|
80
|
+
opts.reviewRetries = n;
|
|
81
|
+
i += 1;
|
|
82
|
+
}
|
|
83
|
+
else if (a?.startsWith("--review-retries=")) {
|
|
84
|
+
const v = a.slice("--review-retries=".length);
|
|
85
|
+
const n = Number.parseInt(v, 10);
|
|
86
|
+
if (!Number.isFinite(n) || n < 0) {
|
|
87
|
+
throw new Error(`--review-retries must be a non-negative integer, got "${v}"`);
|
|
88
|
+
}
|
|
89
|
+
opts.reviewRetries = n;
|
|
90
|
+
}
|
|
71
91
|
else if (a === "--help" || a === "-h") {
|
|
72
92
|
printRunUsage();
|
|
73
93
|
process.exit(0);
|
|
@@ -102,6 +122,14 @@ OPTIONS
|
|
|
102
122
|
(how many turns the Claude agent gets per attempt
|
|
103
123
|
before it has to signal IMPL: DONE / BLOCKED).
|
|
104
124
|
Overrides RUNWAY_IMPL_TURNS. Default: 3.
|
|
125
|
+
--review-retries N
|
|
126
|
+
In-run review-rejection retry budget. When the
|
|
127
|
+
reviewer emits REVIEW: REJECTED-RETRY — <reason>
|
|
128
|
+
(mechanically fixable), runway re-runs the impl
|
|
129
|
+
agent with the reason in {{IN_RUN_REVIEWER_FEEDBACK}}
|
|
130
|
+
and re-runs review. N caps the extra impl+review
|
|
131
|
+
pairs per drain pickup. 0 disables retries entirely.
|
|
132
|
+
Overrides RUNWAY_REVIEW_RETRIES. Default: 1.
|
|
105
133
|
--help, -h Show this help.
|
|
106
134
|
|
|
107
135
|
ENVIRONMENT
|
|
@@ -113,7 +141,14 @@ ENVIRONMENT
|
|
|
113
141
|
and targets with PRs). Detected from
|
|
114
142
|
origin/HEAD when unset.
|
|
115
143
|
RUNWAY_READY_STATUS default "Todo"
|
|
116
|
-
RUNWAY_IN_PROGRESS_STATUS default "In Progress"
|
|
144
|
+
RUNWAY_IN_PROGRESS_STATUS default "In Progress" — also the
|
|
145
|
+
auxiliary drain bucket (VA-421): runway
|
|
146
|
+
accepts issues in this status when no
|
|
147
|
+
agent/<id> branch exists on origin, so
|
|
148
|
+
Linear's GitHub auto-transitions (e.g.
|
|
149
|
+
an unrelated PR mentioning the issue in
|
|
150
|
+
its body) can't silently drop the issue
|
|
151
|
+
from the queue.
|
|
117
152
|
RUNWAY_IN_REVIEW_STATUS default "In Review"
|
|
118
153
|
RUNWAY_HITL_LABEL default "ready-for-human"
|
|
119
154
|
RUNWAY_MAX_ITERATIONS default 5 — outer impl re-prompt loop
|
|
@@ -122,6 +157,11 @@ ENVIRONMENT
|
|
|
122
157
|
RUNWAY_IMPL_TURNS default 3 — sandcastle inner turn
|
|
123
158
|
budget per impl phase. Overridden by
|
|
124
159
|
--impl-turns.
|
|
160
|
+
RUNWAY_REVIEW_RETRIES default 1 — review-rejection retry
|
|
161
|
+
loop. On REVIEW: REJECTED-RETRY, runway
|
|
162
|
+
re-runs impl with the rejection in the
|
|
163
|
+
prompt, then re-runs review. 0 disables
|
|
164
|
+
entirely. Overridden by --review-retries.
|
|
125
165
|
`);
|
|
126
166
|
}
|
|
127
167
|
export async function runCommand(argv) {
|
|
@@ -152,6 +192,9 @@ export async function runCommand(argv) {
|
|
|
152
192
|
...baseConfig,
|
|
153
193
|
...(opts.project ? { linearProject: opts.project } : {}),
|
|
154
194
|
...(opts.implTurns !== undefined ? { implTurns: opts.implTurns } : {}),
|
|
195
|
+
...(opts.reviewRetries !== undefined
|
|
196
|
+
? { reviewRetries: opts.reviewRetries }
|
|
197
|
+
: {}),
|
|
155
198
|
};
|
|
156
199
|
const scope = config.linearProject
|
|
157
200
|
? `team ${config.linearTeam} / project ${config.linearProject}`
|
|
@@ -161,10 +204,29 @@ export async function runCommand(argv) {
|
|
|
161
204
|
limit: 30,
|
|
162
205
|
interval: "1 minute",
|
|
163
206
|
});
|
|
164
|
-
|
|
207
|
+
// VA-421: inject a git-side predicate so `fetchReady` can accept
|
|
208
|
+
// In-Progress issues whose `agent/<id>` branch hasn't yet been
|
|
209
|
+
// pushed to origin. Closes the Linear-auto-transition loophole
|
|
210
|
+
// where an unrelated PR-body mention silently drops an issue from
|
|
211
|
+
// the drain queue.
|
|
212
|
+
const linear = createLinearGateway(config, linearLimiter, {
|
|
213
|
+
remoteAgentBranchExists: (branch) => Effect.runPromise(remoteRefExists(cwd, branch)),
|
|
214
|
+
});
|
|
165
215
|
const github = createGithubGateway();
|
|
166
216
|
return yield* drainQueue({ config, linear, github, cwd }, { max: opts.max, allowPaths: opts.allowPaths });
|
|
167
217
|
}).pipe(Effect.scoped, Effect.provide(MainLayer));
|
|
168
218
|
const result = await Effect.runPromise(program);
|
|
169
219
|
console.log(`[runway] done — attempts=${result.attempts} opened=${result.opened} hitl=${result.hitl} errored=${result.errored}`);
|
|
220
|
+
// Single-line, parser-friendly completion marker. Background
|
|
221
|
+
// watchers (Claude Code's `run_in_background` bash task, CI,
|
|
222
|
+
// scripts) can grep for `[runway:exit]` instead of guessing
|
|
223
|
+
// whether the drain is still in flight.
|
|
224
|
+
console.log(`[runway:exit] status=success attempts=${result.attempts} opened=${result.opened} hitl=${result.hitl} errored=${result.errored}`);
|
|
225
|
+
// Hard exit so any lingering handle (OTel BatchSpanProcessor's
|
|
226
|
+
// interval when OTEL_EXPORTER_OTLP_ENDPOINT is set, a Docker
|
|
227
|
+
// stream Sandcastle left open, etc.) can't keep the process — and
|
|
228
|
+
// the background task that launched it — alive after the drain is
|
|
229
|
+
// logically done. By this point `Effect.scoped` has already torn
|
|
230
|
+
// down its finalizers.
|
|
231
|
+
process.exit(0);
|
|
170
232
|
}
|
package/dist/config.js
CHANGED
|
@@ -24,6 +24,13 @@ const configEffect = EConfig.all({
|
|
|
24
24
|
message: "RUNWAY_IMPL_TURNS must be a positive integer",
|
|
25
25
|
validation: (n) => n > 0,
|
|
26
26
|
})),
|
|
27
|
+
// VA-418: zero is a valid value here (operator kill-switch) so the
|
|
28
|
+
// validation accepts >= 0, unlike implTurns/maxIterations which
|
|
29
|
+
// both require >= 1.
|
|
30
|
+
reviewRetries: EConfig.integer("RUNWAY_REVIEW_RETRIES").pipe(EConfig.withDefault(1), EConfig.validate({
|
|
31
|
+
message: "RUNWAY_REVIEW_RETRIES must be a non-negative integer",
|
|
32
|
+
validation: (n) => n >= 0,
|
|
33
|
+
})),
|
|
27
34
|
commentAuthorAllowlist: EConfig.option(EConfig.string("RUNWAY_COMMENT_AUTHOR_ALLOWLIST")),
|
|
28
35
|
}).pipe(Effect.map((raw) => ({
|
|
29
36
|
linearApiKey: raw.linearApiKey,
|
|
@@ -37,6 +44,7 @@ const configEffect = EConfig.all({
|
|
|
37
44
|
hitlLabel: raw.hitlLabel,
|
|
38
45
|
maxIterations: raw.maxIterations,
|
|
39
46
|
implTurns: raw.implTurns,
|
|
47
|
+
reviewRetries: raw.reviewRetries,
|
|
40
48
|
commentAuthorAllowlist: Option.getOrUndefined(raw.commentAuthorAllowlist)
|
|
41
49
|
?.split(",")
|
|
42
50
|
.map((s) => s.trim())
|
package/dist/dashboard/otlp.js
CHANGED
|
@@ -11,12 +11,18 @@
|
|
|
11
11
|
/**
|
|
12
12
|
* Coerce an OTLP attribute value to a plain JS scalar. We collapse
|
|
13
13
|
* the typed wire variants (`stringValue` / `intValue` / `boolValue` /
|
|
14
|
-
* `doubleValue`) into one return path so callers
|
|
15
|
-
* pattern-match without knowing the OTLP shape.
|
|
14
|
+
* `doubleValue` / `arrayValue`) into one return path so callers
|
|
15
|
+
* downstream can pattern-match without knowing the OTLP shape.
|
|
16
16
|
*
|
|
17
17
|
* `intValue` round-trips as a string to preserve int64 precision.
|
|
18
18
|
* Callers that want a `number` (e.g. for counters under 2^53) should
|
|
19
19
|
* `Number(...)` it themselves.
|
|
20
|
+
*
|
|
21
|
+
* VA-387: `arrayValue` collapses to a `readonly string[]` so the
|
|
22
|
+
* dashboard's label-style attributes (`runway.issue.labels`) survive
|
|
23
|
+
* the wire trip with their structure intact. Non-string array
|
|
24
|
+
* elements drop silently — projector callers only ever ask for
|
|
25
|
+
* string arrays today.
|
|
20
26
|
*/
|
|
21
27
|
export function attrValue(attr) {
|
|
22
28
|
if (!attr)
|
|
@@ -35,6 +41,14 @@ export function attrValue(attr) {
|
|
|
35
41
|
? v.intValue
|
|
36
42
|
: v.intValue;
|
|
37
43
|
}
|
|
44
|
+
if (v.arrayValue !== undefined) {
|
|
45
|
+
const items = [];
|
|
46
|
+
for (const inner of v.arrayValue.values) {
|
|
47
|
+
if (inner.stringValue !== undefined)
|
|
48
|
+
items.push(inner.stringValue);
|
|
49
|
+
}
|
|
50
|
+
return items;
|
|
51
|
+
}
|
|
38
52
|
return undefined;
|
|
39
53
|
}
|
|
40
54
|
/**
|
|
@@ -78,9 +78,13 @@ function projectIssueProcess(span) {
|
|
|
78
78
|
parentSpanId: span.parentSpanId ?? null,
|
|
79
79
|
issueIdentifier: identifier,
|
|
80
80
|
issueId: strAttr(m["runway.issue.id"]) ?? null,
|
|
81
|
+
issueTitle: strAttr(m["runway.issue.title"]) ?? null,
|
|
82
|
+
issueLabels: strArrayAttr(m["runway.issue.labels"]),
|
|
81
83
|
branch: strAttr(m["runway.branch"]) ?? null,
|
|
82
84
|
outcomeKind: strAttr(m["runway.outcome.kind"]) ?? null,
|
|
83
85
|
outcomeDetail: strAttr(m["runway.outcome.detail"]) ?? null,
|
|
86
|
+
prUrl: strAttr(m["runway.pr.url"]) ?? null,
|
|
87
|
+
hitlReason: strAttr(m["runway.hitl.reason"]) ?? null,
|
|
84
88
|
startTimeUnixNano: span.startTimeUnixNano,
|
|
85
89
|
endTimeUnixNano: span.endTimeUnixNano,
|
|
86
90
|
statusCode: span.status?.code ?? null,
|
|
@@ -125,3 +129,11 @@ function numAttr(v) {
|
|
|
125
129
|
}
|
|
126
130
|
return null;
|
|
127
131
|
}
|
|
132
|
+
/**
|
|
133
|
+
* VA-387: decode an OTLP arrayValue attribute into a string array.
|
|
134
|
+
* Older spans (or spans from a runway that never set the attribute)
|
|
135
|
+
* become an empty list so callers don't have to null-guard.
|
|
136
|
+
*/
|
|
137
|
+
function strArrayAttr(v) {
|
|
138
|
+
return Array.isArray(v) ? v : [];
|
|
139
|
+
}
|
package/dist/dashboard/server.js
CHANGED
|
@@ -6,6 +6,10 @@ import { renderDetailView, renderListView } from "./views.js";
|
|
|
6
6
|
// Anything else stays in raw_spans for debugging but isn't rendered.
|
|
7
7
|
const DETAIL_PHASE_NAMES = ["review", "pushBranch", "openPullRequest"];
|
|
8
8
|
const ISSUE_DETAIL_RE = /^\/issue\/([^/?#]+)\/([^/?#]+)\/?$/;
|
|
9
|
+
// VA-387: canonical detail route. `:id` is the issue process span_id;
|
|
10
|
+
// the lookup falls back to the (trace_id, span_id) pair only for
|
|
11
|
+
// older `/issue/...` links that still work for back-compat.
|
|
12
|
+
const ISSUE_PROCESS_DETAIL_RE = /^\/issue-processes\/([^/?#]+)\/?$/;
|
|
9
13
|
const MAX_BODY_BYTES = 10 * 1024 * 1024; // 10 MiB — generous; a runway drain is ~kilobytes per emit.
|
|
10
14
|
/**
|
|
11
15
|
* Construct a Node HTTP server wired to the given storage. The server
|
|
@@ -58,7 +62,14 @@ async function handle(req, res, storage) {
|
|
|
58
62
|
return;
|
|
59
63
|
}
|
|
60
64
|
if (method === "GET") {
|
|
61
|
-
const
|
|
65
|
+
const pathOnly = url.split("?")[0] ?? "";
|
|
66
|
+
const issueProcessMatch = ISSUE_PROCESS_DETAIL_RE.exec(pathOnly);
|
|
67
|
+
if (issueProcessMatch) {
|
|
68
|
+
const spanId = decodeURIComponent(issueProcessMatch[1] ?? "");
|
|
69
|
+
handleIssueProcessDetailView(res, storage, spanId);
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
const detailMatch = ISSUE_DETAIL_RE.exec(pathOnly);
|
|
62
73
|
if (detailMatch) {
|
|
63
74
|
const traceId = decodeURIComponent(detailMatch[1] ?? "");
|
|
64
75
|
const spanId = decodeURIComponent(detailMatch[2] ?? "");
|
|
@@ -66,6 +77,10 @@ async function handle(req, res, storage) {
|
|
|
66
77
|
return;
|
|
67
78
|
}
|
|
68
79
|
}
|
|
80
|
+
if (method === "GET" && (url === "/api/aggregates" || url.startsWith("/api/aggregates?"))) {
|
|
81
|
+
handleAggregates(res, storage);
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
69
84
|
if (method === "GET" && url === "/healthz") {
|
|
70
85
|
res.writeHead(200, { "content-type": "text/plain" });
|
|
71
86
|
res.end("ok");
|
|
@@ -136,8 +151,24 @@ function handleDetailView(res, storage, traceId, spanId) {
|
|
|
136
151
|
writeError(res, 404, "not_found", `no issue process for trace=${traceId} span=${spanId}`);
|
|
137
152
|
return;
|
|
138
153
|
}
|
|
139
|
-
|
|
140
|
-
|
|
154
|
+
renderDetailFor(res, storage, ip);
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* VA-387: detail-route handler keyed on the issue process span_id
|
|
158
|
+
* alone. Reuses the same view model as the older two-segment route
|
|
159
|
+
* once the row is resolved.
|
|
160
|
+
*/
|
|
161
|
+
function handleIssueProcessDetailView(res, storage, spanId) {
|
|
162
|
+
const ip = storage.getIssueProcessBySpanId(spanId);
|
|
163
|
+
if (!ip) {
|
|
164
|
+
writeError(res, 404, "not_found", `no issue process for span=${spanId}`);
|
|
165
|
+
return;
|
|
166
|
+
}
|
|
167
|
+
renderDetailFor(res, storage, ip);
|
|
168
|
+
}
|
|
169
|
+
function renderDetailFor(res, storage, ip) {
|
|
170
|
+
const iterations = storage.listAgentIterations(ip.traceId, ip.spanId);
|
|
171
|
+
const phaseSpans = storage.listPhaseSpans(ip.traceId, ip.spanId, [
|
|
141
172
|
...DETAIL_PHASE_NAMES,
|
|
142
173
|
]);
|
|
143
174
|
const html = renderDetailView({
|
|
@@ -148,6 +179,18 @@ function handleDetailView(res, storage, traceId, spanId) {
|
|
|
148
179
|
res.writeHead(200, { "content-type": "text/html; charset=utf-8" });
|
|
149
180
|
res.end(html);
|
|
150
181
|
}
|
|
182
|
+
/**
|
|
183
|
+
* VA-399: JSON snapshot of the evaluator-facing aggregates read-model.
|
|
184
|
+
* The shape mirrors `EvaluatorAggregate` (snake_case → camelCase) so
|
|
185
|
+
* IRA prompts and the dashboard UI can reference the same field names
|
|
186
|
+
* regardless of access path. See `read-model.md` for the field
|
|
187
|
+
* contract + versioning policy.
|
|
188
|
+
*/
|
|
189
|
+
function handleAggregates(res, storage) {
|
|
190
|
+
const rows = storage.listAggregates();
|
|
191
|
+
res.writeHead(200, { "content-type": "application/json" });
|
|
192
|
+
res.end(JSON.stringify({ view: "evaluator_aggregates_v1", rows }));
|
|
193
|
+
}
|
|
151
194
|
async function readBody(req) {
|
|
152
195
|
const chunks = [];
|
|
153
196
|
let total = 0;
|
|
@@ -184,7 +227,14 @@ export async function main() {
|
|
|
184
227
|
const sqlitePath = process.env.SQLITE_PATH ?? "/data/runway.sqlite";
|
|
185
228
|
const otlpPort = parsePort("OTLP_PORT", "4318");
|
|
186
229
|
const dashboardPort = parsePort("DASHBOARD_PORT", "3001");
|
|
187
|
-
|
|
230
|
+
// VA-399: rolling-window size for the evaluator aggregates view.
|
|
231
|
+
// Defaults to 30 drains; operators bump it for longer-baseline IRA
|
|
232
|
+
// comparisons. Missing/invalid → fall through to the storage layer's
|
|
233
|
+
// default rather than crashing the dashboard at boot.
|
|
234
|
+
const aggregateWindow = parsePositiveInt(process.env.DASHBOARD_AGGREGATE_WINDOW);
|
|
235
|
+
const storage = createStorage(sqlitePath, {
|
|
236
|
+
aggregateWindowDrains: aggregateWindow,
|
|
237
|
+
});
|
|
188
238
|
const otlp = await startServer({ storage, port: otlpPort });
|
|
189
239
|
const dashboard = dashboardPort === otlpPort
|
|
190
240
|
? otlp
|
|
@@ -209,6 +259,12 @@ function parsePort(envName, fallback) {
|
|
|
209
259
|
}
|
|
210
260
|
return n;
|
|
211
261
|
}
|
|
262
|
+
function parsePositiveInt(raw) {
|
|
263
|
+
if (!raw)
|
|
264
|
+
return undefined;
|
|
265
|
+
const n = Number.parseInt(raw, 10);
|
|
266
|
+
return Number.isFinite(n) && n > 0 ? n : undefined;
|
|
267
|
+
}
|
|
212
268
|
// Run as a script when executed directly (e.g. inside the Docker
|
|
213
269
|
// container's CMD). Skipped when imported by tests.
|
|
214
270
|
const isMain = (() => {
|