npm - @valescoagency/runway - Versions diffs - 0.9.0 → 0.10.1 - Mend

@valescoagency/runway 0.9.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md +1 -1
package/dist/cli.js +1 -0
package/dist/commands/run.js +64 -2
package/dist/config.js +8 -0
package/dist/dashboard/otlp.js +16 -2
package/dist/dashboard/projector.js +12 -0
package/dist/dashboard/server.js +60 -4
package/dist/dashboard/storage.js +233 -17
package/dist/dashboard/views.js +18 -1
package/dist/finalize.js +34 -2
package/dist/git.js +192 -22
package/dist/implement.js +6 -0
package/dist/linear.js +75 -16
package/dist/orchestrator.js +99 -18
package/dist/prompts.js +40 -0
package/dist/review.js +32 -18
package/package.json +1 -1
package/prompts/implement.md +11 -0
package/prompts/review.md +48 -6

package/dist/dashboard/storage.js CHANGED Viewed

@@ -21,9 +21,13 @@ const SCHEMA = `
     parent_span_id TEXT,
     issue_identifier TEXT NOT NULL,
     issue_id TEXT,
+    issue_title TEXT,
+    issue_labels TEXT,
     branch TEXT,
     outcome_kind TEXT,
     outcome_detail TEXT,
+    pr_url TEXT,
+    hitl_reason TEXT,
     start_time_unix_nano TEXT NOT NULL,
     end_time_unix_nano TEXT NOT NULL,
     status_code INTEGER,
@@ -38,6 +42,9 @@ const SCHEMA = `
   CREATE INDEX IF NOT EXISTS idx_issue_processes_trace_id
     ON issue_processes(trace_id);
+  CREATE INDEX IF NOT EXISTS idx_issue_processes_span_id
+    ON issue_processes(span_id);
   CREATE TABLE IF NOT EXISTS raw_spans (
     trace_id TEXT NOT NULL,
     span_id TEXT NOT NULL,
@@ -63,6 +70,128 @@ const SCHEMA = `
   CREATE INDEX IF NOT EXISTS idx_agent_iterations_issue_process
     ON agent_iterations(trace_id, issue_process_id, iteration_index);
 `;
+const DEFAULT_AGGREGATE_WINDOW = 30;
+/**
+ * VA-399: SQL VIEW that computes the evaluator-facing aggregates over
+ * the last N drains. N is interpolated at view-creation time because
+ * SQLite views can't take parameters — when the dashboard process
+ * starts with a different `DASHBOARD_AGGREGATE_WINDOW`, the view is
+ * dropped and recreated with the new LIMIT.
+ *
+ * Median uses the "average of the two middle values when N is even,
+ * the middle value when N is odd" convention; p95 uses the
+ * nearest-rank method (smallest observed value whose rank meets-or-
+ * exceeds 95%). See `read-model.md` for the field-by-field contract.
+ *
+ * `reviewer_rejection_rate` keys on the detail prefix emitted by
+ * `src/review.ts` ("Sub-agent review rejected: ..."). It's a subset
+ * of `hitl_escape_rate` — a review rejection routes to HITL, so both
+ * rates count the same row.
+ */
+function aggregatesViewDdl(windowDrains) {
+    // windowDrains is the only spot we interpolate rather than
+    // parameter-bind (CREATE VIEW can't take params). Coerce to a
+    // positive integer so a hostile env var can't smuggle SQL through.
+    const n = Math.max(1, Math.floor(windowDrains));
+    return `
+    DROP VIEW IF EXISTS evaluator_aggregates_v1;
+    CREATE VIEW evaluator_aggregates_v1 AS
+    WITH recent_drains AS (
+      SELECT trace_id
+      FROM drains
+      ORDER BY CAST(start_time_unix_nano AS INTEGER) DESC
+      LIMIT ${n}
+    ),
+    process_rows AS (
+      SELECT
+        ip.trace_id,
+        ip.span_id,
+        ip.outcome_kind,
+        COALESCE(ip.outcome_detail, '') AS outcome_detail,
+        CASE
+          WHEN instr(ip.issue_identifier, '-') > 0
+            THEN substr(ip.issue_identifier, 1, instr(ip.issue_identifier, '-') - 1)
+          ELSE ip.issue_identifier
+        END AS category,
+        (CAST(ip.end_time_unix_nano AS INTEGER) - CAST(ip.start_time_unix_nano AS INTEGER)) / 1000000 AS wall_time_ms,
+        (
+          SELECT COUNT(*) FROM agent_iterations a
+          WHERE a.trace_id = ip.trace_id AND a.issue_process_id = ip.span_id
+        ) AS iteration_count
+      FROM issue_processes ip
+      WHERE ip.trace_id IN (SELECT trace_id FROM recent_drains)
+    ),
+    wt_ranked AS (
+      SELECT
+        category,
+        wall_time_ms,
+        ROW_NUMBER() OVER (PARTITION BY category ORDER BY wall_time_ms) AS rn,
+        COUNT(*) OVER (PARTITION BY category) AS cnt
+      FROM process_rows
+    ),
+    it_ranked AS (
+      SELECT
+        category,
+        iteration_count,
+        ROW_NUMBER() OVER (PARTITION BY category ORDER BY iteration_count) AS rn,
+        COUNT(*) OVER (PARTITION BY category) AS cnt
+      FROM process_rows
+    ),
+    wt_median AS (
+      SELECT category, AVG(wall_time_ms * 1.0) AS value
+      FROM wt_ranked
+      WHERE rn IN ((cnt + 1) / 2, (cnt / 2) + 1)
+      GROUP BY category
+    ),
+    wt_p95 AS (
+      SELECT category, MIN(wall_time_ms) AS value
+      FROM wt_ranked
+      WHERE rn >= (cnt * 95 + 99) / 100
+      GROUP BY category
+    ),
+    it_median AS (
+      SELECT category, AVG(iteration_count * 1.0) AS value
+      FROM it_ranked
+      WHERE rn IN ((cnt + 1) / 2, (cnt / 2) + 1)
+      GROUP BY category
+    ),
+    it_p95 AS (
+      SELECT category, MIN(iteration_count) AS value
+      FROM it_ranked
+      WHERE rn >= (cnt * 95 + 99) / 100
+      GROUP BY category
+    ),
+    rates AS (
+      SELECT
+        category,
+        COUNT(*) AS sample_size,
+        AVG(CASE WHEN outcome_kind = 'hitl' AND outcome_detail LIKE 'Sub-agent review rejected%'
+                 THEN 1.0 ELSE 0.0 END) AS reviewer_rejection_rate,
+        AVG(CASE WHEN outcome_kind = 'reverted' THEN 1.0 ELSE 0.0 END) AS revert_rate,
+        AVG(CASE WHEN outcome_kind = 'hitl' THEN 1.0 ELSE 0.0 END) AS hitl_escape_rate,
+        AVG(CASE WHEN outcome_kind = 'errored' THEN 1.0 ELSE 0.0 END) AS infra_error_rate
+      FROM process_rows
+      GROUP BY category
+    )
+    SELECT
+      r.category                AS category,
+      r.sample_size             AS sample_size,
+      itm.value                 AS median_iteration_count,
+      itp.value                 AS p95_iteration_count,
+      wtm.value                 AS median_wall_time_ms,
+      wtp.value                 AS p95_wall_time_ms,
+      r.reviewer_rejection_rate AS reviewer_rejection_rate,
+      r.revert_rate             AS revert_rate,
+      r.hitl_escape_rate        AS hitl_escape_rate,
+      r.infra_error_rate        AS infra_error_rate
+    FROM rates r
+    LEFT JOIN wt_median wtm ON wtm.category = r.category
+    LEFT JOIN wt_p95    wtp ON wtp.category = r.category
+    LEFT JOIN it_median itm ON itm.category = r.category
+    LEFT JOIN it_p95    itp ON itp.category = r.category
+    ORDER BY r.category;
+  `;
+}
 /**
  * Open (or create) a SQLite database at `path` and return a typed
  * `Storage` handle. Pass `:memory:` for tests — the in-memory db
@@ -72,9 +201,35 @@ const SCHEMA = `
  * OTel SDK retrying a flush) don't blow up the receiver — last writer
  * wins on (trace_id, span_id).
  */
-export function createStorage(path) {
+export function createStorage(path, opts = {}) {
     const db = new DatabaseSync(path);
     db.exec(SCHEMA);
+    // VA-387: idempotent column adds for DBs created against an older
+    // schema. `CREATE TABLE IF NOT EXISTS` won't migrate an existing
+    // table; SQLite has no `ADD COLUMN IF NOT EXISTS`, so we swallow
+    // the duplicate-column error individually. Runs BEFORE VA-399's
+    // view install — `evaluator_aggregates_v1` reads from
+    // `issue_processes`, so the columns it may query must exist first.
+    for (const sql of [
+        `ALTER TABLE issue_processes ADD COLUMN issue_title TEXT`,
+        `ALTER TABLE issue_processes ADD COLUMN issue_labels TEXT`,
+        `ALTER TABLE issue_processes ADD COLUMN pr_url TEXT`,
+        `ALTER TABLE issue_processes ADD COLUMN hitl_reason TEXT`,
+    ]) {
+        try {
+            db.exec(sql);
+        }
+        catch {
+            // Column already present — fresh CREATE TABLE path, or a prior
+            // dashboard boot ran the same migration.
+        }
+    }
+    // VA-399: install the evaluator-facing read-model view after the
+    // base tables exist (and after VA-387's column migrations above),
+    // but before any prepared statement is created — a
+    // `SELECT FROM evaluator_aggregates_v1` would otherwise race the
+    // DDL on first use.
+    db.exec(aggregatesViewDdl(opts.aggregateWindowDrains ?? DEFAULT_AGGREGATE_WINDOW));
     const insertDrain = db.prepare(`
     INSERT INTO drains (
       trace_id, span_id, start_time_unix_nano, end_time_unix_nano,
@@ -93,16 +248,21 @@ export function createStorage(path) {
     const insertIssueProcess = db.prepare(`
     INSERT INTO issue_processes (
       trace_id, span_id, parent_span_id, issue_identifier, issue_id,
-      branch, outcome_kind, outcome_detail,
+      issue_title, issue_labels, branch, outcome_kind, outcome_detail,
+      pr_url, hitl_reason,
       start_time_unix_nano, end_time_unix_nano, status_code, status_message
-    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
     ON CONFLICT (trace_id, span_id) DO UPDATE SET
       parent_span_id = excluded.parent_span_id,
       issue_identifier = excluded.issue_identifier,
       issue_id = excluded.issue_id,
+      issue_title = excluded.issue_title,
+      issue_labels = excluded.issue_labels,
       branch = excluded.branch,
       outcome_kind = excluded.outcome_kind,
       outcome_detail = excluded.outcome_detail,
+      pr_url = excluded.pr_url,
+      hitl_reason = excluded.hitl_reason,
       start_time_unix_nano = excluded.start_time_unix_nano,
       end_time_unix_nano = excluded.end_time_unix_nano,
       status_code = excluded.status_code,
@@ -130,35 +290,40 @@ export function createStorage(path) {
   `);
     // Two list variants instead of one with conditional SQL — keeps
     // each prepared statement static.
-    const listAll = db.prepare(`
-    SELECT
+    const ISSUE_PROCESS_COLUMNS = `
       trace_id, span_id, parent_span_id, issue_identifier, issue_id,
-      branch, outcome_kind, outcome_detail,
+      issue_title, issue_labels, branch, outcome_kind, outcome_detail,
+      pr_url, hitl_reason,
       start_time_unix_nano, end_time_unix_nano, status_code, status_message,
       inserted_at
+  `;
+    const listAll = db.prepare(`
+    SELECT ${ISSUE_PROCESS_COLUMNS}
     FROM issue_processes
     ORDER BY inserted_at DESC, span_id DESC
     LIMIT ?
   `);
     const listByTrace = db.prepare(`
-    SELECT
-      trace_id, span_id, parent_span_id, issue_identifier, issue_id,
-      branch, outcome_kind, outcome_detail,
-      start_time_unix_nano, end_time_unix_nano, status_code, status_message,
-      inserted_at
+    SELECT ${ISSUE_PROCESS_COLUMNS}
     FROM issue_processes
     WHERE trace_id = ?
     ORDER BY inserted_at DESC, span_id DESC
     LIMIT ?
   `);
     const getProcessStmt = db.prepare(`
-    SELECT
-      trace_id, span_id, parent_span_id, issue_identifier, issue_id,
-      branch, outcome_kind, outcome_detail,
-      start_time_unix_nano, end_time_unix_nano, status_code, status_message,
-      inserted_at
+    SELECT ${ISSUE_PROCESS_COLUMNS}
     FROM issue_processes
     WHERE trace_id = ? AND span_id = ?
+  `);
+    // VA-387: span_id is unique in practice (random 64-bit ids); the
+    // detail route at `/issue-processes/:id` keys on span_id alone so
+    // operators don't have to type the trace_id in URLs.
+    const getProcessBySpanStmt = db.prepare(`
+    SELECT ${ISSUE_PROCESS_COLUMNS}
+    FROM issue_processes
+    WHERE span_id = ?
+    ORDER BY inserted_at DESC
+    LIMIT 1
   `);
     const listIterations = db.prepare(`
     SELECT
@@ -169,11 +334,16 @@ export function createStorage(path) {
     WHERE trace_id = ? AND issue_process_id = ?
     ORDER BY iteration_index ASC
   `);
+    const selectAggregates = db.prepare(`SELECT * FROM evaluator_aggregates_v1`);
     const saveDrain = (d) => {
         insertDrain.run(d.traceId, d.spanId, d.startTimeUnixNano, d.endTimeUnixNano, asInt(d.attempts), asInt(d.opened), asInt(d.hitl), asInt(d.errored), asInt(d.statusCode), d.statusMessage);
     };
     const saveIssueProcess = (p) => {
-        insertIssueProcess.run(p.traceId, p.spanId, p.parentSpanId, p.issueIdentifier, p.issueId, p.branch, p.outcomeKind, p.outcomeDetail, p.startTimeUnixNano, p.endTimeUnixNano, asInt(p.statusCode), p.statusMessage);
+        insertIssueProcess.run(p.traceId, p.spanId, p.parentSpanId, p.issueIdentifier, p.issueId, p.issueTitle,
+        // VA-387: labels round-trip as a JSON array string. Keeping them
+        // in one column avoids a label-many-to-many table for a feature
+        // that's read-only on the dashboard side.
+        p.issueLabels.length === 0 ? null : JSON.stringify(p.issueLabels), p.branch, p.outcomeKind, p.outcomeDetail, p.prUrl, p.hitlReason, p.startTimeUnixNano, p.endTimeUnixNano, asInt(p.statusCode), p.statusMessage);
     };
     const saveAgentIteration = (a) => {
         insertAgentIteration.run(a.traceId, a.spanId, a.issueProcessSpanId, asInt(a.iterationIndex), a.startTimeUnixNano, a.endTimeUnixNano, a.sandcastleRunId, a.exitStatus);
@@ -192,6 +362,10 @@ export function createStorage(path) {
         const row = getProcessStmt.get(traceId, spanId);
         return row ? rowToIssueProcess(row) : undefined;
     };
+    const getIssueProcessBySpanId = (spanId) => {
+        const row = getProcessBySpanStmt.get(spanId);
+        return row ? rowToIssueProcess(row) : undefined;
+    };
     const listAgentIterations = (traceId, issueProcessSpanId) => {
         const rows = listIterations.all(traceId, issueProcessSpanId);
         return rows.map(rowToAgentIteration);
@@ -227,6 +401,7 @@ export function createStorage(path) {
             .all(traceId, issueProcessSpanId, ...names);
         return rows.map(rowToPhaseSpan);
     };
+    const listAggregates = () => selectAggregates.all().map(rowToAggregate);
     const close = () => {
         db.close();
     };
@@ -237,8 +412,10 @@ export function createStorage(path) {
         saveRawSpan,
         listIssueProcesses,
         getIssueProcess,
+        getIssueProcessBySpanId,
         listAgentIterations,
         listPhaseSpans,
+        listAggregates,
         close,
     };
 }
@@ -256,9 +433,13 @@ function rowToIssueProcess(row) {
         parentSpanId: nullableStr(r.parent_span_id),
         issueIdentifier: String(r.issue_identifier ?? ""),
         issueId: nullableStr(r.issue_id),
+        issueTitle: nullableStr(r.issue_title),
+        issueLabels: parseLabels(r.issue_labels),
         branch: nullableStr(r.branch),
         outcomeKind: nullableStr(r.outcome_kind),
         outcomeDetail: nullableStr(r.outcome_detail),
+        prUrl: nullableStr(r.pr_url),
+        hitlReason: nullableStr(r.hitl_reason),
         startTimeUnixNano: String(r.start_time_unix_nano ?? ""),
         endTimeUnixNano: String(r.end_time_unix_nano ?? ""),
         statusCode: nullableNum(r.status_code),
@@ -266,6 +447,26 @@ function rowToIssueProcess(row) {
         insertedAt: String(r.inserted_at ?? ""),
     };
 }
+/**
+ * VA-387: decode the JSON-encoded `issue_labels` column back into a
+ * string array. A row stored before the column existed (or one with
+ * NULL / malformed JSON) collapses to an empty list.
+ */
+function parseLabels(v) {
+    if (v === null || v === undefined)
+        return [];
+    if (typeof v !== "string")
+        return [];
+    try {
+        const parsed = JSON.parse(v);
+        if (!Array.isArray(parsed))
+            return [];
+        return parsed.filter((x) => typeof x === "string");
+    }
+    catch {
+        return [];
+    }
+}
 function rowToAgentIteration(row) {
     const r = row;
     return {
@@ -302,3 +503,18 @@ function nullableNum(v) {
     const n = Number(v);
     return Number.isFinite(n) ? n : null;
 }
+function rowToAggregate(row) {
+    const r = row;
+    return {
+        category: String(r.category ?? ""),
+        sampleSize: Number(r.sample_size ?? 0),
+        medianIterationCount: nullableNum(r.median_iteration_count),
+        p95IterationCount: nullableNum(r.p95_iteration_count),
+        medianWallTimeMs: nullableNum(r.median_wall_time_ms),
+        p95WallTimeMs: nullableNum(r.p95_wall_time_ms),
+        reviewerRejectionRate: Number(r.reviewer_rejection_rate ?? 0),
+        revertRate: Number(r.revert_rate ?? 0),
+        hitlEscapeRate: Number(r.hitl_escape_rate ?? 0),
+        infraErrorRate: Number(r.infra_error_rate ?? 0),
+    };
+}

package/dist/dashboard/views.js CHANGED Viewed

@@ -60,7 +60,8 @@ export function renderListView(rows) {
 function renderRow(r) {
     const kind = r.outcomeKind ?? "pending";
     const outcomeCls = `outcome outcome-${escapeHtml(kind)}`;
-    const href = `/issue/${encodeURIComponent(r.traceId)}/${encodeURIComponent(r.spanId)}`;
+    // VA-387: canonical detail link uses the span_id alone.
+    const href = `/issue-processes/${encodeURIComponent(r.spanId)}`;
     return `<tr>
     <td class="id"><a href="${escapeHtml(href)}">${escapeHtml(r.issueIdentifier)}</a></td>
     <td class="${outcomeCls}">${escapeHtml(kind)}</td>
@@ -83,6 +84,18 @@ export function renderDetailView(vm) {
     const ip = vm.issueProcess;
     const kind = ip.outcomeKind ?? "pending";
     const outcomeCls = `outcome outcome-${escapeHtml(kind)}`;
+    const titleLine = ip.issueTitle
+        ? `<div class="title">${escapeHtml(ip.issueTitle)}</div>`
+        : "";
+    const prLine = ip.prUrl
+        ? `<div><span class="label">PR:</span><a href="${escapeHtml(ip.prUrl)}" rel="noopener noreferrer" target="_blank">${escapeHtml(ip.prUrl)}</a></div>`
+        : "";
+    // HITL reason is only rendered when present (HITL or errored runs).
+    // For opened/reverted outcomes we omit the row entirely rather than
+    // showing an empty label.
+    const hitlLine = ip.hitlReason
+        ? `<div><span class="label">HITL reason:</span><span class="detail">${escapeHtml(ip.hitlReason)}</span></div>`
+        : "";
     return `<!doctype html>
 <html lang="en">
 <head>
@@ -90,6 +103,7 @@ export function renderDetailView(vm) {
   <title>${escapeHtml(ip.issueIdentifier)} · runway dashboard</title>
   <style>${SHARED_STYLE}
     .breadcrumb { color: #9ca3af; margin-bottom: 16px; font-size: 12px; }
+    .title { font-size: 14px; color: #d4d4d8; margin: -8px 0 12px; }
     .meta { margin: 4px 0 16px; }
     .meta .label { color: #9ca3af; margin-right: 4px; }
     .timeline { position: relative; height: 28px; background: #18181b;
@@ -113,8 +127,11 @@ export function renderDetailView(vm) {
 <body>
   <div class="breadcrumb"><a href="/">← all issue processes</a></div>
   <h1>${escapeHtml(ip.issueIdentifier)} · <span class="${outcomeCls}">${escapeHtml(kind)}</span></h1>
+  ${titleLine}
   <div class="meta">
     <div><span class="label">branch:</span><code>${escapeHtml(ip.branch ?? "—")}</code></div>
+    ${prLine}
+    ${hitlLine}
     <div><span class="label">detail:</span><span class="detail">${escapeHtml(ip.outcomeDetail ?? "")}</span></div>
     <div><span class="label">seen at:</span>${escapeHtml(ip.insertedAt)}</div>
   </div>

package/dist/finalize.js CHANGED Viewed

@@ -1,10 +1,23 @@
 import { Effect } from "effect";
+import { rebaseOntoBase } from "./git.js";
 /**
- * Push the agent branch, open the PR, transition the Linear issue to
- * the in-review status, and link the PR back on the issue.
+ * VA-419: rebase the agent branch onto the latest `origin/<baseBranch>`,
+ * then push and open the PR. If the rebase hits a conflict, restore
+ * pre-rebase state and surface a `rebase-conflict` outcome — the
+ * orchestrator routes it to HITL so the operator can reconcile
+ * manually (or let VA-417 reset the branch on the next drain).
  */
 export const finalize = (issue, deps, branch) => Effect.gen(function* () {
     const { config, cwd, baseBranch, linear, github } = deps;
+    const rebase = yield* rebaseOntoBase(cwd, baseBranch, branch).pipe(Effect.withSpan("rebaseOntoBase"));
+    if (rebase.kind === "conflict") {
+        return {
+            kind: "rebase-conflict",
+            baseBranch,
+            conflictedFiles: rebase.conflictedFiles,
+            reviewVerdict: "REVIEW: APPROVED",
+        };
+    }
     yield* github.pushBranch(cwd, branch).pipe(Effect.withSpan("pushBranch"));
     const prBody = buildPrBody(issue);
     const prUrl = yield* github
@@ -20,6 +33,25 @@ export const finalize = (issue, deps, branch) => Effect.gen(function* () {
     yield* linear.comment(issue.id, `Runway opened a PR for review: ${prUrl}`);
     return { kind: "opened", detail: prUrl };
 });
+/**
+ * VA-419: format the HITL message body for a rebase-conflict outcome.
+ * Names the base branch and the conflicted file list, and quotes the
+ * reviewer's APPROVED verdict so the operator knows the agent's diff
+ * was good before the conflict surfaced.
+ */
+export function formatRebaseConflictReason(args) {
+    const fileLines = args.conflictedFiles.length
+        ? args.conflictedFiles.map((f) => `  - ${f}`).join("\n")
+        : "  (no conflicted files reported by git — inspect manually)";
+    return [
+        `Upstream base \`${args.baseBranch}\` advanced during this drain; the rebase`,
+        "onto the latest base produced conflicts in:",
+        fileLines,
+        `Review was APPROVED before the rebase (\`${args.reviewVerdict}\`); the agent's diff`,
+        "is good but needs operator reconciliation against the new base. Re-run runway",
+        "after rebasing manually, or let VA-417 handle it on the next drain.",
+    ].join("\n");
+}
 // VA-412: `Closes` (not `Refs`) is the Linear GitHub-integration magic
 // word that auto-transitions the issue to Done on PR merge. `Refs`
 // only attaches the PR to the issue and leaves it stuck In Progress.