npm - @valescoagency/runway - Versions diffs - 0.9.0 → 0.10.0 - Mend

@valescoagency/runway 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md +1 -1
package/dist/cli.js +1 -0
package/dist/commands/run.js +47 -0
package/dist/config.js +8 -0
package/dist/dashboard/otlp.js +16 -2
package/dist/dashboard/projector.js +12 -0
package/dist/dashboard/server.js +60 -4
package/dist/dashboard/storage.js +233 -17
package/dist/dashboard/views.js +18 -1
package/dist/finalize.js +34 -2
package/dist/git.js +170 -22
package/dist/implement.js +6 -0
package/dist/linear.js +35 -9
package/dist/orchestrator.js +99 -18
package/dist/prompts.js +40 -0
package/dist/review.js +32 -18
package/package.json +1 -1
package/prompts/implement.md +11 -0
package/prompts/review.md +48 -6

package/README.md CHANGED Viewed

@@ -392,7 +392,7 @@ These are tractable, just not v1.
 ## Status
-0.9.0 — production-shaped and dogfooded against live Linear queues.
+0.10.0 — production-shaped and dogfooded against live Linear queues.
 The end-to-end pipeline (init → run → review → PR) is stable; surface
 may still shift as the orchestrator's policy and iteration mechanics
 mature. See [CHANGELOG.md](./CHANGELOG.md) for per-release detail.

package/dist/cli.js CHANGED Viewed

@@ -84,5 +84,6 @@ async function main() {
 }
 main().catch((err) => {
     console.error("[runway] fatal:", err instanceof Error ? err.message : err);
+    console.error("[runway:exit] status=failure");
     process.exit(1);
 });

package/dist/commands/run.js CHANGED Viewed

@@ -68,6 +68,25 @@ export function parseRunArgs(argv) {
             }
             opts.implTurns = n;
         }
+        else if (a === "--review-retries") {
+            const v = argv[i + 1];
+            if (!v)
+                throw new Error("--review-retries requires a number");
+            const n = Number.parseInt(v, 10);
+            if (!Number.isFinite(n) || n < 0) {
+                throw new Error(`--review-retries must be a non-negative integer, got "${v}"`);
+            }
+            opts.reviewRetries = n;
+            i += 1;
+        }
+        else if (a?.startsWith("--review-retries=")) {
+            const v = a.slice("--review-retries=".length);
+            const n = Number.parseInt(v, 10);
+            if (!Number.isFinite(n) || n < 0) {
+                throw new Error(`--review-retries must be a non-negative integer, got "${v}"`);
+            }
+            opts.reviewRetries = n;
+        }
         else if (a === "--help" || a === "-h") {
             printRunUsage();
             process.exit(0);
@@ -102,6 +121,14 @@ OPTIONS
                   (how many turns the Claude agent gets per attempt
                   before it has to signal IMPL: DONE / BLOCKED).
                   Overrides RUNWAY_IMPL_TURNS. Default: 3.
+  --review-retries N
+                  In-run review-rejection retry budget. When the
+                  reviewer emits REVIEW: REJECTED-RETRY — <reason>
+                  (mechanically fixable), runway re-runs the impl
+                  agent with the reason in {{IN_RUN_REVIEWER_FEEDBACK}}
+                  and re-runs review. N caps the extra impl+review
+                  pairs per drain pickup. 0 disables retries entirely.
+                  Overrides RUNWAY_REVIEW_RETRIES. Default: 1.
   --help, -h      Show this help.
 ENVIRONMENT
@@ -122,6 +149,11 @@ ENVIRONMENT
   RUNWAY_IMPL_TURNS           default 3  — sandcastle inner turn
                               budget per impl phase. Overridden by
                               --impl-turns.
+  RUNWAY_REVIEW_RETRIES       default 1  — review-rejection retry
+                              loop. On REVIEW: REJECTED-RETRY, runway
+                              re-runs impl with the rejection in the
+                              prompt, then re-runs review. 0 disables
+                              entirely. Overridden by --review-retries.
 `);
 }
 export async function runCommand(argv) {
@@ -152,6 +184,9 @@ export async function runCommand(argv) {
             ...baseConfig,
             ...(opts.project ? { linearProject: opts.project } : {}),
             ...(opts.implTurns !== undefined ? { implTurns: opts.implTurns } : {}),
+            ...(opts.reviewRetries !== undefined
+                ? { reviewRetries: opts.reviewRetries }
+                : {}),
         };
         const scope = config.linearProject
             ? `team ${config.linearTeam} / project ${config.linearProject}`
@@ -167,4 +202,16 @@ export async function runCommand(argv) {
     }).pipe(Effect.scoped, Effect.provide(MainLayer));
     const result = await Effect.runPromise(program);
     console.log(`[runway] done — attempts=${result.attempts} opened=${result.opened} hitl=${result.hitl} errored=${result.errored}`);
+    // Single-line, parser-friendly completion marker. Background
+    // watchers (Claude Code's `run_in_background` bash task, CI,
+    // scripts) can grep for `[runway:exit]` instead of guessing
+    // whether the drain is still in flight.
+    console.log(`[runway:exit] status=success attempts=${result.attempts} opened=${result.opened} hitl=${result.hitl} errored=${result.errored}`);
+    // Hard exit so any lingering handle (OTel BatchSpanProcessor's
+    // interval when OTEL_EXPORTER_OTLP_ENDPOINT is set, a Docker
+    // stream Sandcastle left open, etc.) can't keep the process — and
+    // the background task that launched it — alive after the drain is
+    // logically done. By this point `Effect.scoped` has already torn
+    // down its finalizers.
+    process.exit(0);
 }

package/dist/config.js CHANGED Viewed

@@ -24,6 +24,13 @@ const configEffect = EConfig.all({
         message: "RUNWAY_IMPL_TURNS must be a positive integer",
         validation: (n) => n > 0,
     })),
+    // VA-418: zero is a valid value here (operator kill-switch) so the
+    // validation accepts >= 0, unlike implTurns/maxIterations which
+    // both require >= 1.
+    reviewRetries: EConfig.integer("RUNWAY_REVIEW_RETRIES").pipe(EConfig.withDefault(1), EConfig.validate({
+        message: "RUNWAY_REVIEW_RETRIES must be a non-negative integer",
+        validation: (n) => n >= 0,
+    })),
     commentAuthorAllowlist: EConfig.option(EConfig.string("RUNWAY_COMMENT_AUTHOR_ALLOWLIST")),
 }).pipe(Effect.map((raw) => ({
     linearApiKey: raw.linearApiKey,
@@ -37,6 +44,7 @@ const configEffect = EConfig.all({
     hitlLabel: raw.hitlLabel,
     maxIterations: raw.maxIterations,
     implTurns: raw.implTurns,
+    reviewRetries: raw.reviewRetries,
     commentAuthorAllowlist: Option.getOrUndefined(raw.commentAuthorAllowlist)
         ?.split(",")
         .map((s) => s.trim())

package/dist/dashboard/otlp.js CHANGED Viewed

@@ -11,12 +11,18 @@
 /**
  * Coerce an OTLP attribute value to a plain JS scalar. We collapse
  * the typed wire variants (`stringValue` / `intValue` / `boolValue` /
- * `doubleValue`) into one return path so callers downstream can
- * pattern-match without knowing the OTLP shape.
+ * `doubleValue` / `arrayValue`) into one return path so callers
+ * downstream can pattern-match without knowing the OTLP shape.
  *
  * `intValue` round-trips as a string to preserve int64 precision.
  * Callers that want a `number` (e.g. for counters under 2^53) should
  * `Number(...)` it themselves.
+ *
+ * VA-387: `arrayValue` collapses to a `readonly string[]` so the
+ * dashboard's label-style attributes (`runway.issue.labels`) survive
+ * the wire trip with their structure intact. Non-string array
+ * elements drop silently — projector callers only ever ask for
+ * string arrays today.
  */
 export function attrValue(attr) {
     if (!attr)
@@ -35,6 +41,14 @@ export function attrValue(attr) {
             ? v.intValue
             : v.intValue;
     }
+    if (v.arrayValue !== undefined) {
+        const items = [];
+        for (const inner of v.arrayValue.values) {
+            if (inner.stringValue !== undefined)
+                items.push(inner.stringValue);
+        }
+        return items;
+    }
     return undefined;
 }
 /**

package/dist/dashboard/projector.js CHANGED Viewed

@@ -78,9 +78,13 @@ function projectIssueProcess(span) {
         parentSpanId: span.parentSpanId ?? null,
         issueIdentifier: identifier,
         issueId: strAttr(m["runway.issue.id"]) ?? null,
+        issueTitle: strAttr(m["runway.issue.title"]) ?? null,
+        issueLabels: strArrayAttr(m["runway.issue.labels"]),
         branch: strAttr(m["runway.branch"]) ?? null,
         outcomeKind: strAttr(m["runway.outcome.kind"]) ?? null,
         outcomeDetail: strAttr(m["runway.outcome.detail"]) ?? null,
+        prUrl: strAttr(m["runway.pr.url"]) ?? null,
+        hitlReason: strAttr(m["runway.hitl.reason"]) ?? null,
         startTimeUnixNano: span.startTimeUnixNano,
         endTimeUnixNano: span.endTimeUnixNano,
         statusCode: span.status?.code ?? null,
@@ -125,3 +129,11 @@ function numAttr(v) {
     }
     return null;
 }
+/**
+ * VA-387: decode an OTLP arrayValue attribute into a string array.
+ * Older spans (or spans from a runway that never set the attribute)
+ * become an empty list so callers don't have to null-guard.
+ */
+function strArrayAttr(v) {
+    return Array.isArray(v) ? v : [];
+}

package/dist/dashboard/server.js CHANGED Viewed

@@ -6,6 +6,10 @@ import { renderDetailView, renderListView } from "./views.js";
 // Anything else stays in raw_spans for debugging but isn't rendered.
 const DETAIL_PHASE_NAMES = ["review", "pushBranch", "openPullRequest"];
 const ISSUE_DETAIL_RE = /^\/issue\/([^/?#]+)\/([^/?#]+)\/?$/;
+// VA-387: canonical detail route. `:id` is the issue process span_id;
+// the lookup falls back to the (trace_id, span_id) pair only for
+// older `/issue/...` links that still work for back-compat.
+const ISSUE_PROCESS_DETAIL_RE = /^\/issue-processes\/([^/?#]+)\/?$/;
 const MAX_BODY_BYTES = 10 * 1024 * 1024; // 10 MiB — generous; a runway drain is ~kilobytes per emit.
 /**
  * Construct a Node HTTP server wired to the given storage. The server
@@ -58,7 +62,14 @@ async function handle(req, res, storage) {
         return;
     }
     if (method === "GET") {
-        const detailMatch = ISSUE_DETAIL_RE.exec(url.split("?")[0] ?? "");
+        const pathOnly = url.split("?")[0] ?? "";
+        const issueProcessMatch = ISSUE_PROCESS_DETAIL_RE.exec(pathOnly);
+        if (issueProcessMatch) {
+            const spanId = decodeURIComponent(issueProcessMatch[1] ?? "");
+            handleIssueProcessDetailView(res, storage, spanId);
+            return;
+        }
+        const detailMatch = ISSUE_DETAIL_RE.exec(pathOnly);
         if (detailMatch) {
             const traceId = decodeURIComponent(detailMatch[1] ?? "");
             const spanId = decodeURIComponent(detailMatch[2] ?? "");
@@ -66,6 +77,10 @@ async function handle(req, res, storage) {
             return;
         }
     }
+    if (method === "GET" && (url === "/api/aggregates" || url.startsWith("/api/aggregates?"))) {
+        handleAggregates(res, storage);
+        return;
+    }
     if (method === "GET" && url === "/healthz") {
         res.writeHead(200, { "content-type": "text/plain" });
         res.end("ok");
@@ -136,8 +151,24 @@ function handleDetailView(res, storage, traceId, spanId) {
         writeError(res, 404, "not_found", `no issue process for trace=${traceId} span=${spanId}`);
         return;
     }
-    const iterations = storage.listAgentIterations(traceId, spanId);
-    const phaseSpans = storage.listPhaseSpans(traceId, spanId, [
+    renderDetailFor(res, storage, ip);
+}
+/**
+ * VA-387: detail-route handler keyed on the issue process span_id
+ * alone. Reuses the same view model as the older two-segment route
+ * once the row is resolved.
+ */
+function handleIssueProcessDetailView(res, storage, spanId) {
+    const ip = storage.getIssueProcessBySpanId(spanId);
+    if (!ip) {
+        writeError(res, 404, "not_found", `no issue process for span=${spanId}`);
+        return;
+    }
+    renderDetailFor(res, storage, ip);
+}
+function renderDetailFor(res, storage, ip) {
+    const iterations = storage.listAgentIterations(ip.traceId, ip.spanId);
+    const phaseSpans = storage.listPhaseSpans(ip.traceId, ip.spanId, [
         ...DETAIL_PHASE_NAMES,
     ]);
     const html = renderDetailView({
@@ -148,6 +179,18 @@ function handleDetailView(res, storage, traceId, spanId) {
     res.writeHead(200, { "content-type": "text/html; charset=utf-8" });
     res.end(html);
 }
+/**
+ * VA-399: JSON snapshot of the evaluator-facing aggregates read-model.
+ * The shape mirrors `EvaluatorAggregate` (snake_case → camelCase) so
+ * IRA prompts and the dashboard UI can reference the same field names
+ * regardless of access path. See `read-model.md` for the field
+ * contract + versioning policy.
+ */
+function handleAggregates(res, storage) {
+    const rows = storage.listAggregates();
+    res.writeHead(200, { "content-type": "application/json" });
+    res.end(JSON.stringify({ view: "evaluator_aggregates_v1", rows }));
+}
 async function readBody(req) {
     const chunks = [];
     let total = 0;
@@ -184,7 +227,14 @@ export async function main() {
     const sqlitePath = process.env.SQLITE_PATH ?? "/data/runway.sqlite";
     const otlpPort = parsePort("OTLP_PORT", "4318");
     const dashboardPort = parsePort("DASHBOARD_PORT", "3001");
-    const storage = createStorage(sqlitePath);
+    // VA-399: rolling-window size for the evaluator aggregates view.
+    // Defaults to 30 drains; operators bump it for longer-baseline IRA
+    // comparisons. Missing/invalid → fall through to the storage layer's
+    // default rather than crashing the dashboard at boot.
+    const aggregateWindow = parsePositiveInt(process.env.DASHBOARD_AGGREGATE_WINDOW);
+    const storage = createStorage(sqlitePath, {
+        aggregateWindowDrains: aggregateWindow,
+    });
     const otlp = await startServer({ storage, port: otlpPort });
     const dashboard = dashboardPort === otlpPort
         ? otlp
@@ -209,6 +259,12 @@ function parsePort(envName, fallback) {
     }
     return n;
 }
+function parsePositiveInt(raw) {
+    if (!raw)
+        return undefined;
+    const n = Number.parseInt(raw, 10);
+    return Number.isFinite(n) && n > 0 ? n : undefined;
+}
 // Run as a script when executed directly (e.g. inside the Docker
 // container's CMD). Skipped when imported by tests.
 const isMain = (() => {

package/dist/dashboard/storage.js CHANGED Viewed

@@ -21,9 +21,13 @@ const SCHEMA = `
     parent_span_id TEXT,
     issue_identifier TEXT NOT NULL,
     issue_id TEXT,
+    issue_title TEXT,
+    issue_labels TEXT,
     branch TEXT,
     outcome_kind TEXT,
     outcome_detail TEXT,
+    pr_url TEXT,
+    hitl_reason TEXT,
     start_time_unix_nano TEXT NOT NULL,
     end_time_unix_nano TEXT NOT NULL,
     status_code INTEGER,
@@ -38,6 +42,9 @@ const SCHEMA = `
   CREATE INDEX IF NOT EXISTS idx_issue_processes_trace_id
     ON issue_processes(trace_id);
+  CREATE INDEX IF NOT EXISTS idx_issue_processes_span_id
+    ON issue_processes(span_id);
   CREATE TABLE IF NOT EXISTS raw_spans (
     trace_id TEXT NOT NULL,
     span_id TEXT NOT NULL,
@@ -63,6 +70,128 @@ const SCHEMA = `
   CREATE INDEX IF NOT EXISTS idx_agent_iterations_issue_process
     ON agent_iterations(trace_id, issue_process_id, iteration_index);
 `;
+const DEFAULT_AGGREGATE_WINDOW = 30;
+/**
+ * VA-399: SQL VIEW that computes the evaluator-facing aggregates over
+ * the last N drains. N is interpolated at view-creation time because
+ * SQLite views can't take parameters — when the dashboard process
+ * starts with a different `DASHBOARD_AGGREGATE_WINDOW`, the view is
+ * dropped and recreated with the new LIMIT.
+ *
+ * Median uses the "average of the two middle values when N is even,
+ * the middle value when N is odd" convention; p95 uses the
+ * nearest-rank method (smallest observed value whose rank meets-or-
+ * exceeds 95%). See `read-model.md` for the field-by-field contract.
+ *
+ * `reviewer_rejection_rate` keys on the detail prefix emitted by
+ * `src/review.ts` ("Sub-agent review rejected: ..."). It's a subset
+ * of `hitl_escape_rate` — a review rejection routes to HITL, so both
+ * rates count the same row.
+ */
+function aggregatesViewDdl(windowDrains) {
+    // windowDrains is the only spot we interpolate rather than
+    // parameter-bind (CREATE VIEW can't take params). Coerce to a
+    // positive integer so a hostile env var can't smuggle SQL through.
+    const n = Math.max(1, Math.floor(windowDrains));
+    return `
+    DROP VIEW IF EXISTS evaluator_aggregates_v1;
+    CREATE VIEW evaluator_aggregates_v1 AS
+    WITH recent_drains AS (
+      SELECT trace_id
+      FROM drains
+      ORDER BY CAST(start_time_unix_nano AS INTEGER) DESC
+      LIMIT ${n}
+    ),
+    process_rows AS (
+      SELECT
+        ip.trace_id,
+        ip.span_id,
+        ip.outcome_kind,
+        COALESCE(ip.outcome_detail, '') AS outcome_detail,
+        CASE
+          WHEN instr(ip.issue_identifier, '-') > 0
+            THEN substr(ip.issue_identifier, 1, instr(ip.issue_identifier, '-') - 1)
+          ELSE ip.issue_identifier
+        END AS category,
+        (CAST(ip.end_time_unix_nano AS INTEGER) - CAST(ip.start_time_unix_nano AS INTEGER)) / 1000000 AS wall_time_ms,
+        (
+          SELECT COUNT(*) FROM agent_iterations a
+          WHERE a.trace_id = ip.trace_id AND a.issue_process_id = ip.span_id
+        ) AS iteration_count
+      FROM issue_processes ip
+      WHERE ip.trace_id IN (SELECT trace_id FROM recent_drains)
+    ),
+    wt_ranked AS (
+      SELECT
+        category,
+        wall_time_ms,
+        ROW_NUMBER() OVER (PARTITION BY category ORDER BY wall_time_ms) AS rn,
+        COUNT(*) OVER (PARTITION BY category) AS cnt
+      FROM process_rows
+    ),
+    it_ranked AS (
+      SELECT
+        category,
+        iteration_count,
+        ROW_NUMBER() OVER (PARTITION BY category ORDER BY iteration_count) AS rn,
+        COUNT(*) OVER (PARTITION BY category) AS cnt
+      FROM process_rows
+    ),
+    wt_median AS (
+      SELECT category, AVG(wall_time_ms * 1.0) AS value
+      FROM wt_ranked
+      WHERE rn IN ((cnt + 1) / 2, (cnt / 2) + 1)
+      GROUP BY category
+    ),
+    wt_p95 AS (
+      SELECT category, MIN(wall_time_ms) AS value
+      FROM wt_ranked
+      WHERE rn >= (cnt * 95 + 99) / 100
+      GROUP BY category
+    ),
+    it_median AS (
+      SELECT category, AVG(iteration_count * 1.0) AS value
+      FROM it_ranked
+      WHERE rn IN ((cnt + 1) / 2, (cnt / 2) + 1)
+      GROUP BY category
+    ),
+    it_p95 AS (
+      SELECT category, MIN(iteration_count) AS value
+      FROM it_ranked
+      WHERE rn >= (cnt * 95 + 99) / 100
+      GROUP BY category
+    ),
+    rates AS (
+      SELECT
+        category,
+        COUNT(*) AS sample_size,
+        AVG(CASE WHEN outcome_kind = 'hitl' AND outcome_detail LIKE 'Sub-agent review rejected%'
+                 THEN 1.0 ELSE 0.0 END) AS reviewer_rejection_rate,
+        AVG(CASE WHEN outcome_kind = 'reverted' THEN 1.0 ELSE 0.0 END) AS revert_rate,
+        AVG(CASE WHEN outcome_kind = 'hitl' THEN 1.0 ELSE 0.0 END) AS hitl_escape_rate,
+        AVG(CASE WHEN outcome_kind = 'errored' THEN 1.0 ELSE 0.0 END) AS infra_error_rate
+      FROM process_rows
+      GROUP BY category
+    )
+    SELECT
+      r.category                AS category,
+      r.sample_size             AS sample_size,
+      itm.value                 AS median_iteration_count,
+      itp.value                 AS p95_iteration_count,
+      wtm.value                 AS median_wall_time_ms,
+      wtp.value                 AS p95_wall_time_ms,
+      r.reviewer_rejection_rate AS reviewer_rejection_rate,
+      r.revert_rate             AS revert_rate,
+      r.hitl_escape_rate        AS hitl_escape_rate,
+      r.infra_error_rate        AS infra_error_rate
+    FROM rates r
+    LEFT JOIN wt_median wtm ON wtm.category = r.category
+    LEFT JOIN wt_p95    wtp ON wtp.category = r.category
+    LEFT JOIN it_median itm ON itm.category = r.category
+    LEFT JOIN it_p95    itp ON itp.category = r.category
+    ORDER BY r.category;
+  `;
+}
 /**
  * Open (or create) a SQLite database at `path` and return a typed
  * `Storage` handle. Pass `:memory:` for tests — the in-memory db
@@ -72,9 +201,35 @@ const SCHEMA = `
  * OTel SDK retrying a flush) don't blow up the receiver — last writer
  * wins on (trace_id, span_id).
  */
-export function createStorage(path) {
+export function createStorage(path, opts = {}) {
     const db = new DatabaseSync(path);
     db.exec(SCHEMA);
+    // VA-387: idempotent column adds for DBs created against an older
+    // schema. `CREATE TABLE IF NOT EXISTS` won't migrate an existing
+    // table; SQLite has no `ADD COLUMN IF NOT EXISTS`, so we swallow
+    // the duplicate-column error individually. Runs BEFORE VA-399's
+    // view install — `evaluator_aggregates_v1` reads from
+    // `issue_processes`, so the columns it may query must exist first.
+    for (const sql of [
+        `ALTER TABLE issue_processes ADD COLUMN issue_title TEXT`,
+        `ALTER TABLE issue_processes ADD COLUMN issue_labels TEXT`,
+        `ALTER TABLE issue_processes ADD COLUMN pr_url TEXT`,
+        `ALTER TABLE issue_processes ADD COLUMN hitl_reason TEXT`,
+    ]) {
+        try {
+            db.exec(sql);
+        }
+        catch {
+            // Column already present — fresh CREATE TABLE path, or a prior
+            // dashboard boot ran the same migration.
+        }
+    }
+    // VA-399: install the evaluator-facing read-model view after the
+    // base tables exist (and after VA-387's column migrations above),
+    // but before any prepared statement is created — a
+    // `SELECT FROM evaluator_aggregates_v1` would otherwise race the
+    // DDL on first use.
+    db.exec(aggregatesViewDdl(opts.aggregateWindowDrains ?? DEFAULT_AGGREGATE_WINDOW));
     const insertDrain = db.prepare(`
     INSERT INTO drains (
       trace_id, span_id, start_time_unix_nano, end_time_unix_nano,
@@ -93,16 +248,21 @@ export function createStorage(path) {
     const insertIssueProcess = db.prepare(`
     INSERT INTO issue_processes (
       trace_id, span_id, parent_span_id, issue_identifier, issue_id,
-      branch, outcome_kind, outcome_detail,
+      issue_title, issue_labels, branch, outcome_kind, outcome_detail,
+      pr_url, hitl_reason,
       start_time_unix_nano, end_time_unix_nano, status_code, status_message
-    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
     ON CONFLICT (trace_id, span_id) DO UPDATE SET
       parent_span_id = excluded.parent_span_id,
       issue_identifier = excluded.issue_identifier,
       issue_id = excluded.issue_id,
+      issue_title = excluded.issue_title,
+      issue_labels = excluded.issue_labels,
       branch = excluded.branch,
       outcome_kind = excluded.outcome_kind,
       outcome_detail = excluded.outcome_detail,
+      pr_url = excluded.pr_url,
+      hitl_reason = excluded.hitl_reason,
       start_time_unix_nano = excluded.start_time_unix_nano,
       end_time_unix_nano = excluded.end_time_unix_nano,
       status_code = excluded.status_code,
@@ -130,35 +290,40 @@ export function createStorage(path) {
   `);
     // Two list variants instead of one with conditional SQL — keeps
     // each prepared statement static.
-    const listAll = db.prepare(`
-    SELECT
+    const ISSUE_PROCESS_COLUMNS = `
       trace_id, span_id, parent_span_id, issue_identifier, issue_id,
-      branch, outcome_kind, outcome_detail,
+      issue_title, issue_labels, branch, outcome_kind, outcome_detail,
+      pr_url, hitl_reason,
       start_time_unix_nano, end_time_unix_nano, status_code, status_message,
       inserted_at
+  `;
+    const listAll = db.prepare(`
+    SELECT ${ISSUE_PROCESS_COLUMNS}
     FROM issue_processes
     ORDER BY inserted_at DESC, span_id DESC
     LIMIT ?
   `);
     const listByTrace = db.prepare(`
-    SELECT
-      trace_id, span_id, parent_span_id, issue_identifier, issue_id,
-      branch, outcome_kind, outcome_detail,
-      start_time_unix_nano, end_time_unix_nano, status_code, status_message,
-      inserted_at
+    SELECT ${ISSUE_PROCESS_COLUMNS}
     FROM issue_processes
     WHERE trace_id = ?
     ORDER BY inserted_at DESC, span_id DESC
     LIMIT ?
   `);
     const getProcessStmt = db.prepare(`
-    SELECT
-      trace_id, span_id, parent_span_id, issue_identifier, issue_id,
-      branch, outcome_kind, outcome_detail,
-      start_time_unix_nano, end_time_unix_nano, status_code, status_message,
-      inserted_at
+    SELECT ${ISSUE_PROCESS_COLUMNS}
     FROM issue_processes
     WHERE trace_id = ? AND span_id = ?
+  `);
+    // VA-387: span_id is unique in practice (random 64-bit ids); the
+    // detail route at `/issue-processes/:id` keys on span_id alone so
+    // operators don't have to type the trace_id in URLs.
+    const getProcessBySpanStmt = db.prepare(`
+    SELECT ${ISSUE_PROCESS_COLUMNS}
+    FROM issue_processes
+    WHERE span_id = ?
+    ORDER BY inserted_at DESC
+    LIMIT 1
   `);
     const listIterations = db.prepare(`
     SELECT
@@ -169,11 +334,16 @@ export function createStorage(path) {
     WHERE trace_id = ? AND issue_process_id = ?
     ORDER BY iteration_index ASC
   `);
+    const selectAggregates = db.prepare(`SELECT * FROM evaluator_aggregates_v1`);
     const saveDrain = (d) => {
         insertDrain.run(d.traceId, d.spanId, d.startTimeUnixNano, d.endTimeUnixNano, asInt(d.attempts), asInt(d.opened), asInt(d.hitl), asInt(d.errored), asInt(d.statusCode), d.statusMessage);
     };
     const saveIssueProcess = (p) => {
-        insertIssueProcess.run(p.traceId, p.spanId, p.parentSpanId, p.issueIdentifier, p.issueId, p.branch, p.outcomeKind, p.outcomeDetail, p.startTimeUnixNano, p.endTimeUnixNano, asInt(p.statusCode), p.statusMessage);
+        insertIssueProcess.run(p.traceId, p.spanId, p.parentSpanId, p.issueIdentifier, p.issueId, p.issueTitle,
+        // VA-387: labels round-trip as a JSON array string. Keeping them
+        // in one column avoids a label-many-to-many table for a feature
+        // that's read-only on the dashboard side.
+        p.issueLabels.length === 0 ? null : JSON.stringify(p.issueLabels), p.branch, p.outcomeKind, p.outcomeDetail, p.prUrl, p.hitlReason, p.startTimeUnixNano, p.endTimeUnixNano, asInt(p.statusCode), p.statusMessage);
     };
     const saveAgentIteration = (a) => {
         insertAgentIteration.run(a.traceId, a.spanId, a.issueProcessSpanId, asInt(a.iterationIndex), a.startTimeUnixNano, a.endTimeUnixNano, a.sandcastleRunId, a.exitStatus);
@@ -192,6 +362,10 @@ export function createStorage(path) {
         const row = getProcessStmt.get(traceId, spanId);
         return row ? rowToIssueProcess(row) : undefined;
     };
+    const getIssueProcessBySpanId = (spanId) => {
+        const row = getProcessBySpanStmt.get(spanId);
+        return row ? rowToIssueProcess(row) : undefined;
+    };
     const listAgentIterations = (traceId, issueProcessSpanId) => {
         const rows = listIterations.all(traceId, issueProcessSpanId);
         return rows.map(rowToAgentIteration);
@@ -227,6 +401,7 @@ export function createStorage(path) {
             .all(traceId, issueProcessSpanId, ...names);
         return rows.map(rowToPhaseSpan);
     };
+    const listAggregates = () => selectAggregates.all().map(rowToAggregate);
     const close = () => {
         db.close();
     };
@@ -237,8 +412,10 @@ export function createStorage(path) {
         saveRawSpan,
         listIssueProcesses,
         getIssueProcess,
+        getIssueProcessBySpanId,
         listAgentIterations,
         listPhaseSpans,
+        listAggregates,
         close,
     };
 }
@@ -256,9 +433,13 @@ function rowToIssueProcess(row) {
         parentSpanId: nullableStr(r.parent_span_id),
         issueIdentifier: String(r.issue_identifier ?? ""),
         issueId: nullableStr(r.issue_id),
+        issueTitle: nullableStr(r.issue_title),
+        issueLabels: parseLabels(r.issue_labels),
         branch: nullableStr(r.branch),
         outcomeKind: nullableStr(r.outcome_kind),
         outcomeDetail: nullableStr(r.outcome_detail),
+        prUrl: nullableStr(r.pr_url),
+        hitlReason: nullableStr(r.hitl_reason),
         startTimeUnixNano: String(r.start_time_unix_nano ?? ""),
         endTimeUnixNano: String(r.end_time_unix_nano ?? ""),
         statusCode: nullableNum(r.status_code),
@@ -266,6 +447,26 @@ function rowToIssueProcess(row) {
         insertedAt: String(r.inserted_at ?? ""),
     };
 }
+/**
+ * VA-387: decode the JSON-encoded `issue_labels` column back into a
+ * string array. A row stored before the column existed (or one with
+ * NULL / malformed JSON) collapses to an empty list.
+ */
+function parseLabels(v) {
+    if (v === null || v === undefined)
+        return [];
+    if (typeof v !== "string")
+        return [];
+    try {
+        const parsed = JSON.parse(v);
+        if (!Array.isArray(parsed))
+            return [];
+        return parsed.filter((x) => typeof x === "string");
+    }
+    catch {
+        return [];
+    }
+}
 function rowToAgentIteration(row) {
     const r = row;
     return {
@@ -302,3 +503,18 @@ function nullableNum(v) {
     const n = Number(v);
     return Number.isFinite(n) ? n : null;
 }
+function rowToAggregate(row) {
+    const r = row;
+    return {
+        category: String(r.category ?? ""),
+        sampleSize: Number(r.sample_size ?? 0),
+        medianIterationCount: nullableNum(r.median_iteration_count),
+        p95IterationCount: nullableNum(r.p95_iteration_count),
+        medianWallTimeMs: nullableNum(r.median_wall_time_ms),
+        p95WallTimeMs: nullableNum(r.p95_wall_time_ms),
+        reviewerRejectionRate: Number(r.reviewer_rejection_rate ?? 0),
+        revertRate: Number(r.revert_rate ?? 0),
+        hitlEscapeRate: Number(r.hitl_escape_rate ?? 0),
+        infraErrorRate: Number(r.infra_error_rate ?? 0),
+    };
+}