@valescoagency/runway 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -392,7 +392,7 @@ These are tractable, just not v1.
392
392
 
393
393
  ## Status
394
394
 
395
- 0.9.0 — production-shaped and dogfooded against live Linear queues.
395
+ 0.10.0 — production-shaped and dogfooded against live Linear queues.
396
396
  The end-to-end pipeline (init → run → review → PR) is stable; surface
397
397
  may still shift as the orchestrator's policy and iteration mechanics
398
398
  mature. See [CHANGELOG.md](./CHANGELOG.md) for per-release detail.
package/dist/cli.js CHANGED
@@ -84,5 +84,6 @@ async function main() {
84
84
  }
85
85
  main().catch((err) => {
86
86
  console.error("[runway] fatal:", err instanceof Error ? err.message : err);
87
+ console.error("[runway:exit] status=failure");
87
88
  process.exit(1);
88
89
  });
@@ -68,6 +68,25 @@ export function parseRunArgs(argv) {
68
68
  }
69
69
  opts.implTurns = n;
70
70
  }
71
+ else if (a === "--review-retries") {
72
+ const v = argv[i + 1];
73
+ if (!v)
74
+ throw new Error("--review-retries requires a number");
75
+ const n = Number.parseInt(v, 10);
76
+ if (!Number.isFinite(n) || n < 0) {
77
+ throw new Error(`--review-retries must be a non-negative integer, got "${v}"`);
78
+ }
79
+ opts.reviewRetries = n;
80
+ i += 1;
81
+ }
82
+ else if (a?.startsWith("--review-retries=")) {
83
+ const v = a.slice("--review-retries=".length);
84
+ const n = Number.parseInt(v, 10);
85
+ if (!Number.isFinite(n) || n < 0) {
86
+ throw new Error(`--review-retries must be a non-negative integer, got "${v}"`);
87
+ }
88
+ opts.reviewRetries = n;
89
+ }
71
90
  else if (a === "--help" || a === "-h") {
72
91
  printRunUsage();
73
92
  process.exit(0);
@@ -102,6 +121,14 @@ OPTIONS
102
121
  (how many turns the Claude agent gets per attempt
103
122
  before it has to signal IMPL: DONE / BLOCKED).
104
123
  Overrides RUNWAY_IMPL_TURNS. Default: 3.
124
+ --review-retries N
125
+ In-run review-rejection retry budget. When the
126
+ reviewer emits REVIEW: REJECTED-RETRY — <reason>
127
+ (mechanically fixable), runway re-runs the impl
128
+ agent with the reason in {{IN_RUN_REVIEWER_FEEDBACK}}
129
+ and re-runs review. N caps the extra impl+review
130
+ pairs per drain pickup. 0 disables retries entirely.
131
+ Overrides RUNWAY_REVIEW_RETRIES. Default: 1.
105
132
  --help, -h Show this help.
106
133
 
107
134
  ENVIRONMENT
@@ -122,6 +149,11 @@ ENVIRONMENT
122
149
  RUNWAY_IMPL_TURNS default 3 — sandcastle inner turn
123
150
  budget per impl phase. Overridden by
124
151
  --impl-turns.
152
+ RUNWAY_REVIEW_RETRIES default 1 — review-rejection retry
153
+ loop. On REVIEW: REJECTED-RETRY, runway
154
+ re-runs impl with the rejection in the
155
+ prompt, then re-runs review. 0 disables
156
+ entirely. Overridden by --review-retries.
125
157
  `);
126
158
  }
127
159
  export async function runCommand(argv) {
@@ -152,6 +184,9 @@ export async function runCommand(argv) {
152
184
  ...baseConfig,
153
185
  ...(opts.project ? { linearProject: opts.project } : {}),
154
186
  ...(opts.implTurns !== undefined ? { implTurns: opts.implTurns } : {}),
187
+ ...(opts.reviewRetries !== undefined
188
+ ? { reviewRetries: opts.reviewRetries }
189
+ : {}),
155
190
  };
156
191
  const scope = config.linearProject
157
192
  ? `team ${config.linearTeam} / project ${config.linearProject}`
@@ -167,4 +202,16 @@ export async function runCommand(argv) {
167
202
  }).pipe(Effect.scoped, Effect.provide(MainLayer));
168
203
  const result = await Effect.runPromise(program);
169
204
  console.log(`[runway] done — attempts=${result.attempts} opened=${result.opened} hitl=${result.hitl} errored=${result.errored}`);
205
+ // Single-line, parser-friendly completion marker. Background
206
+ // watchers (Claude Code's `run_in_background` bash task, CI,
207
+ // scripts) can grep for `[runway:exit]` instead of guessing
208
+ // whether the drain is still in flight.
209
+ console.log(`[runway:exit] status=success attempts=${result.attempts} opened=${result.opened} hitl=${result.hitl} errored=${result.errored}`);
210
+ // Hard exit so any lingering handle (OTel BatchSpanProcessor's
211
+ // interval when OTEL_EXPORTER_OTLP_ENDPOINT is set, a Docker
212
+ // stream Sandcastle left open, etc.) can't keep the process — and
213
+ // the background task that launched it — alive after the drain is
214
+ // logically done. By this point `Effect.scoped` has already torn
215
+ // down its finalizers.
216
+ process.exit(0);
170
217
  }
package/dist/config.js CHANGED
@@ -24,6 +24,13 @@ const configEffect = EConfig.all({
24
24
  message: "RUNWAY_IMPL_TURNS must be a positive integer",
25
25
  validation: (n) => n > 0,
26
26
  })),
27
+ // VA-418: zero is a valid value here (operator kill-switch) so the
28
+ // validation accepts >= 0, unlike implTurns/maxIterations which
29
+ // both require >= 1.
30
+ reviewRetries: EConfig.integer("RUNWAY_REVIEW_RETRIES").pipe(EConfig.withDefault(1), EConfig.validate({
31
+ message: "RUNWAY_REVIEW_RETRIES must be a non-negative integer",
32
+ validation: (n) => n >= 0,
33
+ })),
27
34
  commentAuthorAllowlist: EConfig.option(EConfig.string("RUNWAY_COMMENT_AUTHOR_ALLOWLIST")),
28
35
  }).pipe(Effect.map((raw) => ({
29
36
  linearApiKey: raw.linearApiKey,
@@ -37,6 +44,7 @@ const configEffect = EConfig.all({
37
44
  hitlLabel: raw.hitlLabel,
38
45
  maxIterations: raw.maxIterations,
39
46
  implTurns: raw.implTurns,
47
+ reviewRetries: raw.reviewRetries,
40
48
  commentAuthorAllowlist: Option.getOrUndefined(raw.commentAuthorAllowlist)
41
49
  ?.split(",")
42
50
  .map((s) => s.trim())
@@ -11,12 +11,18 @@
11
11
  /**
12
12
  * Coerce an OTLP attribute value to a plain JS scalar. We collapse
13
13
  * the typed wire variants (`stringValue` / `intValue` / `boolValue` /
14
- * `doubleValue`) into one return path so callers downstream can
15
- * pattern-match without knowing the OTLP shape.
14
+ * `doubleValue` / `arrayValue`) into one return path so callers
15
+ * downstream can pattern-match without knowing the OTLP shape.
16
16
  *
17
17
  * `intValue` round-trips as a string to preserve int64 precision.
18
18
  * Callers that want a `number` (e.g. for counters under 2^53) should
19
19
  * `Number(...)` it themselves.
20
+ *
21
+ * VA-387: `arrayValue` collapses to a `readonly string[]` so the
22
+ * dashboard's label-style attributes (`runway.issue.labels`) survive
23
+ * the wire trip with their structure intact. Non-string array
24
+ * elements drop silently — projector callers only ever ask for
25
+ * string arrays today.
20
26
  */
21
27
  export function attrValue(attr) {
22
28
  if (!attr)
@@ -35,6 +41,14 @@ export function attrValue(attr) {
35
41
  ? v.intValue
36
42
  : v.intValue;
37
43
  }
44
+ if (v.arrayValue !== undefined) {
45
+ const items = [];
46
+ for (const inner of v.arrayValue.values) {
47
+ if (inner.stringValue !== undefined)
48
+ items.push(inner.stringValue);
49
+ }
50
+ return items;
51
+ }
38
52
  return undefined;
39
53
  }
40
54
  /**
@@ -78,9 +78,13 @@ function projectIssueProcess(span) {
78
78
  parentSpanId: span.parentSpanId ?? null,
79
79
  issueIdentifier: identifier,
80
80
  issueId: strAttr(m["runway.issue.id"]) ?? null,
81
+ issueTitle: strAttr(m["runway.issue.title"]) ?? null,
82
+ issueLabels: strArrayAttr(m["runway.issue.labels"]),
81
83
  branch: strAttr(m["runway.branch"]) ?? null,
82
84
  outcomeKind: strAttr(m["runway.outcome.kind"]) ?? null,
83
85
  outcomeDetail: strAttr(m["runway.outcome.detail"]) ?? null,
86
+ prUrl: strAttr(m["runway.pr.url"]) ?? null,
87
+ hitlReason: strAttr(m["runway.hitl.reason"]) ?? null,
84
88
  startTimeUnixNano: span.startTimeUnixNano,
85
89
  endTimeUnixNano: span.endTimeUnixNano,
86
90
  statusCode: span.status?.code ?? null,
@@ -125,3 +129,11 @@ function numAttr(v) {
125
129
  }
126
130
  return null;
127
131
  }
132
+ /**
133
+ * VA-387: decode an OTLP arrayValue attribute into a string array.
134
+ * Older spans (or spans from a runway that never set the attribute)
135
+ * become an empty list so callers don't have to null-guard.
136
+ */
137
+ function strArrayAttr(v) {
138
+ return Array.isArray(v) ? v : [];
139
+ }
@@ -6,6 +6,10 @@ import { renderDetailView, renderListView } from "./views.js";
6
6
  // Anything else stays in raw_spans for debugging but isn't rendered.
7
7
  const DETAIL_PHASE_NAMES = ["review", "pushBranch", "openPullRequest"];
8
8
  const ISSUE_DETAIL_RE = /^\/issue\/([^/?#]+)\/([^/?#]+)\/?$/;
9
+ // VA-387: canonical detail route. `:id` is the issue process span_id;
10
+ // the lookup falls back to the (trace_id, span_id) pair only for
11
+ // older `/issue/...` links that still work for back-compat.
12
+ const ISSUE_PROCESS_DETAIL_RE = /^\/issue-processes\/([^/?#]+)\/?$/;
9
13
  const MAX_BODY_BYTES = 10 * 1024 * 1024; // 10 MiB — generous; a runway drain is ~kilobytes per emit.
10
14
  /**
11
15
  * Construct a Node HTTP server wired to the given storage. The server
@@ -58,7 +62,14 @@ async function handle(req, res, storage) {
58
62
  return;
59
63
  }
60
64
  if (method === "GET") {
61
- const detailMatch = ISSUE_DETAIL_RE.exec(url.split("?")[0] ?? "");
65
+ const pathOnly = url.split("?")[0] ?? "";
66
+ const issueProcessMatch = ISSUE_PROCESS_DETAIL_RE.exec(pathOnly);
67
+ if (issueProcessMatch) {
68
+ const spanId = decodeURIComponent(issueProcessMatch[1] ?? "");
69
+ handleIssueProcessDetailView(res, storage, spanId);
70
+ return;
71
+ }
72
+ const detailMatch = ISSUE_DETAIL_RE.exec(pathOnly);
62
73
  if (detailMatch) {
63
74
  const traceId = decodeURIComponent(detailMatch[1] ?? "");
64
75
  const spanId = decodeURIComponent(detailMatch[2] ?? "");
@@ -66,6 +77,10 @@ async function handle(req, res, storage) {
66
77
  return;
67
78
  }
68
79
  }
80
+ if (method === "GET" && (url === "/api/aggregates" || url.startsWith("/api/aggregates?"))) {
81
+ handleAggregates(res, storage);
82
+ return;
83
+ }
69
84
  if (method === "GET" && url === "/healthz") {
70
85
  res.writeHead(200, { "content-type": "text/plain" });
71
86
  res.end("ok");
@@ -136,8 +151,24 @@ function handleDetailView(res, storage, traceId, spanId) {
136
151
  writeError(res, 404, "not_found", `no issue process for trace=${traceId} span=${spanId}`);
137
152
  return;
138
153
  }
139
- const iterations = storage.listAgentIterations(traceId, spanId);
140
- const phaseSpans = storage.listPhaseSpans(traceId, spanId, [
154
+ renderDetailFor(res, storage, ip);
155
+ }
156
+ /**
157
+ * VA-387: detail-route handler keyed on the issue process span_id
158
+ * alone. Reuses the same view model as the older two-segment route
159
+ * once the row is resolved.
160
+ */
161
+ function handleIssueProcessDetailView(res, storage, spanId) {
162
+ const ip = storage.getIssueProcessBySpanId(spanId);
163
+ if (!ip) {
164
+ writeError(res, 404, "not_found", `no issue process for span=${spanId}`);
165
+ return;
166
+ }
167
+ renderDetailFor(res, storage, ip);
168
+ }
169
+ function renderDetailFor(res, storage, ip) {
170
+ const iterations = storage.listAgentIterations(ip.traceId, ip.spanId);
171
+ const phaseSpans = storage.listPhaseSpans(ip.traceId, ip.spanId, [
141
172
  ...DETAIL_PHASE_NAMES,
142
173
  ]);
143
174
  const html = renderDetailView({
@@ -148,6 +179,18 @@ function handleDetailView(res, storage, traceId, spanId) {
148
179
  res.writeHead(200, { "content-type": "text/html; charset=utf-8" });
149
180
  res.end(html);
150
181
  }
182
+ /**
183
+ * VA-399: JSON snapshot of the evaluator-facing aggregates read-model.
184
+ * The shape mirrors `EvaluatorAggregate` (snake_case → camelCase) so
185
+ * IRA prompts and the dashboard UI can reference the same field names
186
+ * regardless of access path. See `read-model.md` for the field
187
+ * contract + versioning policy.
188
+ */
189
+ function handleAggregates(res, storage) {
190
+ const rows = storage.listAggregates();
191
+ res.writeHead(200, { "content-type": "application/json" });
192
+ res.end(JSON.stringify({ view: "evaluator_aggregates_v1", rows }));
193
+ }
151
194
  async function readBody(req) {
152
195
  const chunks = [];
153
196
  let total = 0;
@@ -184,7 +227,14 @@ export async function main() {
184
227
  const sqlitePath = process.env.SQLITE_PATH ?? "/data/runway.sqlite";
185
228
  const otlpPort = parsePort("OTLP_PORT", "4318");
186
229
  const dashboardPort = parsePort("DASHBOARD_PORT", "3001");
187
- const storage = createStorage(sqlitePath);
230
+ // VA-399: rolling-window size for the evaluator aggregates view.
231
+ // Defaults to 30 drains; operators bump it for longer-baseline IRA
232
+ // comparisons. Missing/invalid → fall through to the storage layer's
233
+ // default rather than crashing the dashboard at boot.
234
+ const aggregateWindow = parsePositiveInt(process.env.DASHBOARD_AGGREGATE_WINDOW);
235
+ const storage = createStorage(sqlitePath, {
236
+ aggregateWindowDrains: aggregateWindow,
237
+ });
188
238
  const otlp = await startServer({ storage, port: otlpPort });
189
239
  const dashboard = dashboardPort === otlpPort
190
240
  ? otlp
@@ -209,6 +259,12 @@ function parsePort(envName, fallback) {
209
259
  }
210
260
  return n;
211
261
  }
262
+ function parsePositiveInt(raw) {
263
+ if (!raw)
264
+ return undefined;
265
+ const n = Number.parseInt(raw, 10);
266
+ return Number.isFinite(n) && n > 0 ? n : undefined;
267
+ }
212
268
  // Run as a script when executed directly (e.g. inside the Docker
213
269
  // container's CMD). Skipped when imported by tests.
214
270
  const isMain = (() => {
@@ -21,9 +21,13 @@ const SCHEMA = `
21
21
  parent_span_id TEXT,
22
22
  issue_identifier TEXT NOT NULL,
23
23
  issue_id TEXT,
24
+ issue_title TEXT,
25
+ issue_labels TEXT,
24
26
  branch TEXT,
25
27
  outcome_kind TEXT,
26
28
  outcome_detail TEXT,
29
+ pr_url TEXT,
30
+ hitl_reason TEXT,
27
31
  start_time_unix_nano TEXT NOT NULL,
28
32
  end_time_unix_nano TEXT NOT NULL,
29
33
  status_code INTEGER,
@@ -38,6 +42,9 @@ const SCHEMA = `
38
42
  CREATE INDEX IF NOT EXISTS idx_issue_processes_trace_id
39
43
  ON issue_processes(trace_id);
40
44
 
45
+ CREATE INDEX IF NOT EXISTS idx_issue_processes_span_id
46
+ ON issue_processes(span_id);
47
+
41
48
  CREATE TABLE IF NOT EXISTS raw_spans (
42
49
  trace_id TEXT NOT NULL,
43
50
  span_id TEXT NOT NULL,
@@ -63,6 +70,128 @@ const SCHEMA = `
63
70
  CREATE INDEX IF NOT EXISTS idx_agent_iterations_issue_process
64
71
  ON agent_iterations(trace_id, issue_process_id, iteration_index);
65
72
  `;
73
+ const DEFAULT_AGGREGATE_WINDOW = 30;
74
+ /**
75
+ * VA-399: SQL VIEW that computes the evaluator-facing aggregates over
76
+ * the last N drains. N is interpolated at view-creation time because
77
+ * SQLite views can't take parameters — when the dashboard process
78
+ * starts with a different `DASHBOARD_AGGREGATE_WINDOW`, the view is
79
+ * dropped and recreated with the new LIMIT.
80
+ *
81
+ * Median uses the "average of the two middle values when N is even,
82
+ * the middle value when N is odd" convention; p95 uses the
83
+ * nearest-rank method (smallest observed value whose rank meets-or-
84
+ * exceeds 95%). See `read-model.md` for the field-by-field contract.
85
+ *
86
+ * `reviewer_rejection_rate` keys on the detail prefix emitted by
87
+ * `src/review.ts` ("Sub-agent review rejected: ..."). It's a subset
88
+ * of `hitl_escape_rate` — a review rejection routes to HITL, so both
89
+ * rates count the same row.
90
+ */
91
+ function aggregatesViewDdl(windowDrains) {
92
+ // windowDrains is the only spot we interpolate rather than
93
+ // parameter-bind (CREATE VIEW can't take params). Coerce to a
94
+ // positive integer so a hostile env var can't smuggle SQL through.
95
+ const n = Math.max(1, Math.floor(windowDrains));
96
+ return `
97
+ DROP VIEW IF EXISTS evaluator_aggregates_v1;
98
+ CREATE VIEW evaluator_aggregates_v1 AS
99
+ WITH recent_drains AS (
100
+ SELECT trace_id
101
+ FROM drains
102
+ ORDER BY CAST(start_time_unix_nano AS INTEGER) DESC
103
+ LIMIT ${n}
104
+ ),
105
+ process_rows AS (
106
+ SELECT
107
+ ip.trace_id,
108
+ ip.span_id,
109
+ ip.outcome_kind,
110
+ COALESCE(ip.outcome_detail, '') AS outcome_detail,
111
+ CASE
112
+ WHEN instr(ip.issue_identifier, '-') > 0
113
+ THEN substr(ip.issue_identifier, 1, instr(ip.issue_identifier, '-') - 1)
114
+ ELSE ip.issue_identifier
115
+ END AS category,
116
+ (CAST(ip.end_time_unix_nano AS INTEGER) - CAST(ip.start_time_unix_nano AS INTEGER)) / 1000000 AS wall_time_ms,
117
+ (
118
+ SELECT COUNT(*) FROM agent_iterations a
119
+ WHERE a.trace_id = ip.trace_id AND a.issue_process_id = ip.span_id
120
+ ) AS iteration_count
121
+ FROM issue_processes ip
122
+ WHERE ip.trace_id IN (SELECT trace_id FROM recent_drains)
123
+ ),
124
+ wt_ranked AS (
125
+ SELECT
126
+ category,
127
+ wall_time_ms,
128
+ ROW_NUMBER() OVER (PARTITION BY category ORDER BY wall_time_ms) AS rn,
129
+ COUNT(*) OVER (PARTITION BY category) AS cnt
130
+ FROM process_rows
131
+ ),
132
+ it_ranked AS (
133
+ SELECT
134
+ category,
135
+ iteration_count,
136
+ ROW_NUMBER() OVER (PARTITION BY category ORDER BY iteration_count) AS rn,
137
+ COUNT(*) OVER (PARTITION BY category) AS cnt
138
+ FROM process_rows
139
+ ),
140
+ wt_median AS (
141
+ SELECT category, AVG(wall_time_ms * 1.0) AS value
142
+ FROM wt_ranked
143
+ WHERE rn IN ((cnt + 1) / 2, (cnt / 2) + 1)
144
+ GROUP BY category
145
+ ),
146
+ wt_p95 AS (
147
+ SELECT category, MIN(wall_time_ms) AS value
148
+ FROM wt_ranked
149
+ WHERE rn >= (cnt * 95 + 99) / 100
150
+ GROUP BY category
151
+ ),
152
+ it_median AS (
153
+ SELECT category, AVG(iteration_count * 1.0) AS value
154
+ FROM it_ranked
155
+ WHERE rn IN ((cnt + 1) / 2, (cnt / 2) + 1)
156
+ GROUP BY category
157
+ ),
158
+ it_p95 AS (
159
+ SELECT category, MIN(iteration_count) AS value
160
+ FROM it_ranked
161
+ WHERE rn >= (cnt * 95 + 99) / 100
162
+ GROUP BY category
163
+ ),
164
+ rates AS (
165
+ SELECT
166
+ category,
167
+ COUNT(*) AS sample_size,
168
+ AVG(CASE WHEN outcome_kind = 'hitl' AND outcome_detail LIKE 'Sub-agent review rejected%'
169
+ THEN 1.0 ELSE 0.0 END) AS reviewer_rejection_rate,
170
+ AVG(CASE WHEN outcome_kind = 'reverted' THEN 1.0 ELSE 0.0 END) AS revert_rate,
171
+ AVG(CASE WHEN outcome_kind = 'hitl' THEN 1.0 ELSE 0.0 END) AS hitl_escape_rate,
172
+ AVG(CASE WHEN outcome_kind = 'errored' THEN 1.0 ELSE 0.0 END) AS infra_error_rate
173
+ FROM process_rows
174
+ GROUP BY category
175
+ )
176
+ SELECT
177
+ r.category AS category,
178
+ r.sample_size AS sample_size,
179
+ itm.value AS median_iteration_count,
180
+ itp.value AS p95_iteration_count,
181
+ wtm.value AS median_wall_time_ms,
182
+ wtp.value AS p95_wall_time_ms,
183
+ r.reviewer_rejection_rate AS reviewer_rejection_rate,
184
+ r.revert_rate AS revert_rate,
185
+ r.hitl_escape_rate AS hitl_escape_rate,
186
+ r.infra_error_rate AS infra_error_rate
187
+ FROM rates r
188
+ LEFT JOIN wt_median wtm ON wtm.category = r.category
189
+ LEFT JOIN wt_p95 wtp ON wtp.category = r.category
190
+ LEFT JOIN it_median itm ON itm.category = r.category
191
+ LEFT JOIN it_p95 itp ON itp.category = r.category
192
+ ORDER BY r.category;
193
+ `;
194
+ }
66
195
  /**
67
196
  * Open (or create) a SQLite database at `path` and return a typed
68
197
  * `Storage` handle. Pass `:memory:` for tests — the in-memory db
@@ -72,9 +201,35 @@ const SCHEMA = `
72
201
  * OTel SDK retrying a flush) don't blow up the receiver — last writer
73
202
  * wins on (trace_id, span_id).
74
203
  */
75
- export function createStorage(path) {
204
+ export function createStorage(path, opts = {}) {
76
205
  const db = new DatabaseSync(path);
77
206
  db.exec(SCHEMA);
207
+ // VA-387: idempotent column adds for DBs created against an older
208
+ // schema. `CREATE TABLE IF NOT EXISTS` won't migrate an existing
209
+ // table; SQLite has no `ADD COLUMN IF NOT EXISTS`, so we swallow
210
+ // the duplicate-column error individually. Runs BEFORE VA-399's
211
+ // view install — `evaluator_aggregates_v1` reads from
212
+ // `issue_processes`, so the columns it may query must exist first.
213
+ for (const sql of [
214
+ `ALTER TABLE issue_processes ADD COLUMN issue_title TEXT`,
215
+ `ALTER TABLE issue_processes ADD COLUMN issue_labels TEXT`,
216
+ `ALTER TABLE issue_processes ADD COLUMN pr_url TEXT`,
217
+ `ALTER TABLE issue_processes ADD COLUMN hitl_reason TEXT`,
218
+ ]) {
219
+ try {
220
+ db.exec(sql);
221
+ }
222
+ catch {
223
+ // Column already present — fresh CREATE TABLE path, or a prior
224
+ // dashboard boot ran the same migration.
225
+ }
226
+ }
227
+ // VA-399: install the evaluator-facing read-model view after the
228
+ // base tables exist (and after VA-387's column migrations above),
229
+ // but before any prepared statement is created — a
230
+ // `SELECT FROM evaluator_aggregates_v1` would otherwise race the
231
+ // DDL on first use.
232
+ db.exec(aggregatesViewDdl(opts.aggregateWindowDrains ?? DEFAULT_AGGREGATE_WINDOW));
78
233
  const insertDrain = db.prepare(`
79
234
  INSERT INTO drains (
80
235
  trace_id, span_id, start_time_unix_nano, end_time_unix_nano,
@@ -93,16 +248,21 @@ export function createStorage(path) {
93
248
  const insertIssueProcess = db.prepare(`
94
249
  INSERT INTO issue_processes (
95
250
  trace_id, span_id, parent_span_id, issue_identifier, issue_id,
96
- branch, outcome_kind, outcome_detail,
251
+ issue_title, issue_labels, branch, outcome_kind, outcome_detail,
252
+ pr_url, hitl_reason,
97
253
  start_time_unix_nano, end_time_unix_nano, status_code, status_message
98
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
254
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
99
255
  ON CONFLICT (trace_id, span_id) DO UPDATE SET
100
256
  parent_span_id = excluded.parent_span_id,
101
257
  issue_identifier = excluded.issue_identifier,
102
258
  issue_id = excluded.issue_id,
259
+ issue_title = excluded.issue_title,
260
+ issue_labels = excluded.issue_labels,
103
261
  branch = excluded.branch,
104
262
  outcome_kind = excluded.outcome_kind,
105
263
  outcome_detail = excluded.outcome_detail,
264
+ pr_url = excluded.pr_url,
265
+ hitl_reason = excluded.hitl_reason,
106
266
  start_time_unix_nano = excluded.start_time_unix_nano,
107
267
  end_time_unix_nano = excluded.end_time_unix_nano,
108
268
  status_code = excluded.status_code,
@@ -130,35 +290,40 @@ export function createStorage(path) {
130
290
  `);
131
291
  // Two list variants instead of one with conditional SQL — keeps
132
292
  // each prepared statement static.
133
- const listAll = db.prepare(`
134
- SELECT
293
+ const ISSUE_PROCESS_COLUMNS = `
135
294
  trace_id, span_id, parent_span_id, issue_identifier, issue_id,
136
- branch, outcome_kind, outcome_detail,
295
+ issue_title, issue_labels, branch, outcome_kind, outcome_detail,
296
+ pr_url, hitl_reason,
137
297
  start_time_unix_nano, end_time_unix_nano, status_code, status_message,
138
298
  inserted_at
299
+ `;
300
+ const listAll = db.prepare(`
301
+ SELECT ${ISSUE_PROCESS_COLUMNS}
139
302
  FROM issue_processes
140
303
  ORDER BY inserted_at DESC, span_id DESC
141
304
  LIMIT ?
142
305
  `);
143
306
  const listByTrace = db.prepare(`
144
- SELECT
145
- trace_id, span_id, parent_span_id, issue_identifier, issue_id,
146
- branch, outcome_kind, outcome_detail,
147
- start_time_unix_nano, end_time_unix_nano, status_code, status_message,
148
- inserted_at
307
+ SELECT ${ISSUE_PROCESS_COLUMNS}
149
308
  FROM issue_processes
150
309
  WHERE trace_id = ?
151
310
  ORDER BY inserted_at DESC, span_id DESC
152
311
  LIMIT ?
153
312
  `);
154
313
  const getProcessStmt = db.prepare(`
155
- SELECT
156
- trace_id, span_id, parent_span_id, issue_identifier, issue_id,
157
- branch, outcome_kind, outcome_detail,
158
- start_time_unix_nano, end_time_unix_nano, status_code, status_message,
159
- inserted_at
314
+ SELECT ${ISSUE_PROCESS_COLUMNS}
160
315
  FROM issue_processes
161
316
  WHERE trace_id = ? AND span_id = ?
317
+ `);
318
+ // VA-387: span_id is unique in practice (random 64-bit ids); the
319
+ // detail route at `/issue-processes/:id` keys on span_id alone so
320
+ // operators don't have to type the trace_id in URLs.
321
+ const getProcessBySpanStmt = db.prepare(`
322
+ SELECT ${ISSUE_PROCESS_COLUMNS}
323
+ FROM issue_processes
324
+ WHERE span_id = ?
325
+ ORDER BY inserted_at DESC
326
+ LIMIT 1
162
327
  `);
163
328
  const listIterations = db.prepare(`
164
329
  SELECT
@@ -169,11 +334,16 @@ export function createStorage(path) {
169
334
  WHERE trace_id = ? AND issue_process_id = ?
170
335
  ORDER BY iteration_index ASC
171
336
  `);
337
+ const selectAggregates = db.prepare(`SELECT * FROM evaluator_aggregates_v1`);
172
338
  const saveDrain = (d) => {
173
339
  insertDrain.run(d.traceId, d.spanId, d.startTimeUnixNano, d.endTimeUnixNano, asInt(d.attempts), asInt(d.opened), asInt(d.hitl), asInt(d.errored), asInt(d.statusCode), d.statusMessage);
174
340
  };
175
341
  const saveIssueProcess = (p) => {
176
- insertIssueProcess.run(p.traceId, p.spanId, p.parentSpanId, p.issueIdentifier, p.issueId, p.branch, p.outcomeKind, p.outcomeDetail, p.startTimeUnixNano, p.endTimeUnixNano, asInt(p.statusCode), p.statusMessage);
342
+ insertIssueProcess.run(p.traceId, p.spanId, p.parentSpanId, p.issueIdentifier, p.issueId, p.issueTitle,
343
+ // VA-387: labels round-trip as a JSON array string. Keeping them
344
+ // in one column avoids a label-many-to-many table for a feature
345
+ // that's read-only on the dashboard side.
346
+ p.issueLabels.length === 0 ? null : JSON.stringify(p.issueLabels), p.branch, p.outcomeKind, p.outcomeDetail, p.prUrl, p.hitlReason, p.startTimeUnixNano, p.endTimeUnixNano, asInt(p.statusCode), p.statusMessage);
177
347
  };
178
348
  const saveAgentIteration = (a) => {
179
349
  insertAgentIteration.run(a.traceId, a.spanId, a.issueProcessSpanId, asInt(a.iterationIndex), a.startTimeUnixNano, a.endTimeUnixNano, a.sandcastleRunId, a.exitStatus);
@@ -192,6 +362,10 @@ export function createStorage(path) {
192
362
  const row = getProcessStmt.get(traceId, spanId);
193
363
  return row ? rowToIssueProcess(row) : undefined;
194
364
  };
365
+ const getIssueProcessBySpanId = (spanId) => {
366
+ const row = getProcessBySpanStmt.get(spanId);
367
+ return row ? rowToIssueProcess(row) : undefined;
368
+ };
195
369
  const listAgentIterations = (traceId, issueProcessSpanId) => {
196
370
  const rows = listIterations.all(traceId, issueProcessSpanId);
197
371
  return rows.map(rowToAgentIteration);
@@ -227,6 +401,7 @@ export function createStorage(path) {
227
401
  .all(traceId, issueProcessSpanId, ...names);
228
402
  return rows.map(rowToPhaseSpan);
229
403
  };
404
+ const listAggregates = () => selectAggregates.all().map(rowToAggregate);
230
405
  const close = () => {
231
406
  db.close();
232
407
  };
@@ -237,8 +412,10 @@ export function createStorage(path) {
237
412
  saveRawSpan,
238
413
  listIssueProcesses,
239
414
  getIssueProcess,
415
+ getIssueProcessBySpanId,
240
416
  listAgentIterations,
241
417
  listPhaseSpans,
418
+ listAggregates,
242
419
  close,
243
420
  };
244
421
  }
@@ -256,9 +433,13 @@ function rowToIssueProcess(row) {
256
433
  parentSpanId: nullableStr(r.parent_span_id),
257
434
  issueIdentifier: String(r.issue_identifier ?? ""),
258
435
  issueId: nullableStr(r.issue_id),
436
+ issueTitle: nullableStr(r.issue_title),
437
+ issueLabels: parseLabels(r.issue_labels),
259
438
  branch: nullableStr(r.branch),
260
439
  outcomeKind: nullableStr(r.outcome_kind),
261
440
  outcomeDetail: nullableStr(r.outcome_detail),
441
+ prUrl: nullableStr(r.pr_url),
442
+ hitlReason: nullableStr(r.hitl_reason),
262
443
  startTimeUnixNano: String(r.start_time_unix_nano ?? ""),
263
444
  endTimeUnixNano: String(r.end_time_unix_nano ?? ""),
264
445
  statusCode: nullableNum(r.status_code),
@@ -266,6 +447,26 @@ function rowToIssueProcess(row) {
266
447
  insertedAt: String(r.inserted_at ?? ""),
267
448
  };
268
449
  }
450
+ /**
451
+ * VA-387: decode the JSON-encoded `issue_labels` column back into a
452
+ * string array. A row stored before the column existed (or one with
453
+ * NULL / malformed JSON) collapses to an empty list.
454
+ */
455
+ function parseLabels(v) {
456
+ if (v === null || v === undefined)
457
+ return [];
458
+ if (typeof v !== "string")
459
+ return [];
460
+ try {
461
+ const parsed = JSON.parse(v);
462
+ if (!Array.isArray(parsed))
463
+ return [];
464
+ return parsed.filter((x) => typeof x === "string");
465
+ }
466
+ catch {
467
+ return [];
468
+ }
469
+ }
269
470
  function rowToAgentIteration(row) {
270
471
  const r = row;
271
472
  return {
@@ -302,3 +503,18 @@ function nullableNum(v) {
302
503
  const n = Number(v);
303
504
  return Number.isFinite(n) ? n : null;
304
505
  }
506
+ function rowToAggregate(row) {
507
+ const r = row;
508
+ return {
509
+ category: String(r.category ?? ""),
510
+ sampleSize: Number(r.sample_size ?? 0),
511
+ medianIterationCount: nullableNum(r.median_iteration_count),
512
+ p95IterationCount: nullableNum(r.p95_iteration_count),
513
+ medianWallTimeMs: nullableNum(r.median_wall_time_ms),
514
+ p95WallTimeMs: nullableNum(r.p95_wall_time_ms),
515
+ reviewerRejectionRate: Number(r.reviewer_rejection_rate ?? 0),
516
+ revertRate: Number(r.revert_rate ?? 0),
517
+ hitlEscapeRate: Number(r.hitl_escape_rate ?? 0),
518
+ infraErrorRate: Number(r.infra_error_rate ?? 0),
519
+ };
520
+ }