@valescoagency/runway 0.13.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -541,7 +541,7 @@ These are tractable, just not v1.
541
541
 
542
542
  ## Status
543
543
 
544
- 0.13.0 — production-shaped and dogfooded against live Linear queues.
544
+ 0.14.1 — production-shaped and dogfooded against live Linear queues.
545
545
  The end-to-end pipeline (init → run → review → PR) is stable; surface
546
546
  may still shift as the orchestrator's policy and iteration mechanics
547
547
  mature. See [CHANGELOG.md](./CHANGELOG.md) for per-release detail.
@@ -52,6 +52,10 @@ function projectDrain(span) {
52
52
  spanId: span.spanId,
53
53
  startTimeUnixNano: span.startTimeUnixNano,
54
54
  endTimeUnixNano: span.endTimeUnixNano,
55
+ // VA-455: an ended drain's "last activity" is its end time —
56
+ // the storage layer keeps last_seen monotonic, so setting it
57
+ // here ensures the value never regresses below the real end.
58
+ lastSeenUnixNano: span.endTimeUnixNano,
55
59
  attempts: numAttr(m["runway.drain.attempts"]),
56
60
  opened: numAttr(m["runway.drain.opened"]),
57
61
  hitl: numAttr(m["runway.drain.hitl"]),
@@ -137,6 +141,53 @@ function numAttr(v) {
137
141
  function strArrayAttr(v) {
138
142
  return Array.isArray(v) ? v : [];
139
143
  }
144
+ /**
145
+ * VA-455: the canonical body string runway emits at the top of the
146
+ * `drainQueue` span. The projector matches on this exact value to
147
+ * decide which log records become active-drain markers — keep them
148
+ * in lock-step with `orchestrator.ts`.
149
+ */
150
+ export const DRAIN_STARTED_LOG = "drain.started";
151
+ /**
152
+ * VA-455: scan an OTLP logs payload for `drain.started` markers.
153
+ * Each match becomes an `ActiveDrainMarker` carrying the drain's
154
+ * trace_id, the drainQueue span_id, and the log timestamp. The
155
+ * storage layer's `markDrainActive` upserts a `drains` row on each
156
+ * marker so the dashboard's active-drain card lights up within ~1s
157
+ * of `runway run` starting, instead of waiting for the first
158
+ * `processIssue` span to end.
159
+ *
160
+ * Records missing trace_id, span_id, or a timestamp are dropped —
161
+ * we won't fabricate any of those, and a drain.started marker
162
+ * without them has nothing useful to bind.
163
+ */
164
+ export function extractActiveDrainMarkers(payload) {
165
+ const out = [];
166
+ for (const rl of payload.resourceLogs ?? []) {
167
+ for (const sl of rl.scopeLogs ?? []) {
168
+ for (const rec of sl.logRecords ?? []) {
169
+ if (rec.body?.stringValue !== DRAIN_STARTED_LOG)
170
+ continue;
171
+ const traceId = rec.traceId?.trim();
172
+ if (!traceId)
173
+ continue;
174
+ const spanId = rec.spanId?.trim();
175
+ if (!spanId)
176
+ continue;
177
+ const ts = rec.timeUnixNano ?? rec.observedTimeUnixNano;
178
+ if (!ts)
179
+ continue;
180
+ out.push({
181
+ traceId,
182
+ spanId,
183
+ startTimeUnixNano: ts,
184
+ lastSeenUnixNano: ts,
185
+ });
186
+ }
187
+ }
188
+ }
189
+ return out;
190
+ }
140
191
  /**
141
192
  * VA-388: project an OTLP logs payload into `LogRecordRow`s. Records
142
193
  * without a trace_id are dropped — every Effect log emitted under
@@ -1,7 +1,7 @@
1
1
  import { createServer } from "node:http";
2
2
  import { createLinearAdapter, startLinearSync, } from "./linear-sync.js";
3
3
  import { createEventBus } from "./events.js";
4
- import { projectLogs, projectPayload } from "./projector.js";
4
+ import { extractActiveDrainMarkers, projectLogs, projectPayload, } from "./projector.js";
5
5
  import { createStorage, } from "./storage.js";
6
6
  import { META_REVIEW_PAGE_SIZE, renderDetailView, renderIssueProcessRows, renderListView, renderLogsSection, renderMetaReviewDetailView, renderMetaReviewListView, } from "./views.js";
7
7
  // VA-389: phase spans we surface on the detail page's timeline.
@@ -239,6 +239,14 @@ async function handleOtlpLogs(req, res, storage, events) {
239
239
  writeError(res, 400, "invalid_json", asMessage(err));
240
240
  return;
241
241
  }
242
+ // VA-455: surface in-flight drains the moment the runway process
243
+ // emits its `drain.started` log marker — well before the first
244
+ // processIssue span ends. `markDrainActive` is no-op when the
245
+ // drain has already closed, so reordering across OTLP retries is
246
+ // safe.
247
+ for (const m of extractActiveDrainMarkers(payload)) {
248
+ storage.markDrainActive(m);
249
+ }
242
250
  for (const r of projectLogs(payload)) {
243
251
  storage.appendLogRecord(r);
244
252
  // VA-391: the SSE detail-pane stream live-tails the Logs section
@@ -1,4 +1,11 @@
1
1
  import { DatabaseSync } from "node:sqlite";
2
+ /**
3
+ * VA-455: how stale a drain's last_seen can be before getActiveDrain
4
+ * treats it as crashed and hides it. Runway emits a heartbeat log
5
+ * every 30s while drainQueue is active; 90s gives 3× headroom so a
6
+ * single dropped heartbeat doesn't toggle the dashboard card.
7
+ */
8
+ export const ACTIVE_DRAIN_STALENESS_NANOS = 90n * 1000000000n;
2
9
  /**
3
10
  * VA-406: named constants for the `meta_reviews.kind` alphabet.
4
11
  * Used by the IRA passes when stamping rows + by gateway queries
@@ -23,11 +30,17 @@ export const META_RUN_REVIEW_COMPLETION_KINDS = [
23
30
  META_REVIEW_KIND.FAILED,
24
31
  ];
25
32
  const SCHEMA = `
33
+ -- VA-455: end_time_unix_nano is nullable so an "active drain" row
34
+ -- inserted by the drain.started log marker can exist before the
35
+ -- drainQueue span has actually ended. last_seen_unix_nano carries
36
+ -- the most recent log record's timestamp for the trace so the
37
+ -- dashboard's active-drain query can age out crashed drains.
26
38
  CREATE TABLE IF NOT EXISTS drains (
27
39
  trace_id TEXT NOT NULL,
28
40
  span_id TEXT NOT NULL,
29
41
  start_time_unix_nano TEXT NOT NULL,
30
- end_time_unix_nano TEXT NOT NULL,
42
+ end_time_unix_nano TEXT,
43
+ last_seen_unix_nano TEXT,
31
44
  attempts INTEGER,
32
45
  opened INTEGER,
33
46
  hitl INTEGER,
@@ -165,6 +178,59 @@ const DEFAULT_AGGREGATE_WINDOW = 30;
165
178
  * of `hitl_escape_rate` — a review rejection routes to HITL, so both
166
179
  * rates count the same row.
167
180
  */
181
+ /**
182
+ * VA-455: relax the `end_time_unix_nano TEXT NOT NULL` constraint on
183
+ * pre-VA-455 `drains` tables so the log-driven active-drain row can
184
+ * land with end_time = NULL. SQLite has no `ALTER COLUMN`, so the
185
+ * idiomatic move is a rebuild — guarded by PRAGMA so fresh installs
186
+ * (which already created the new schema) skip the work.
187
+ */
188
+ function relaxDrainsEndTimeNotNull(db) {
189
+ const cols = db
190
+ .prepare("PRAGMA table_info('drains')")
191
+ .all();
192
+ const endTime = cols.find((c) => c.name === "end_time_unix_nano");
193
+ if (!endTime || endTime.notnull === 0)
194
+ return;
195
+ // Single transaction so the `drains` name is never absent from the
196
+ // schema between DROP and RENAME — concurrent readers (none today,
197
+ // but cheap insurance) keep seeing the old table until COMMIT.
198
+ db.prepare("BEGIN").run();
199
+ try {
200
+ db.prepare(`CREATE TABLE drains_v2 (
201
+ trace_id TEXT NOT NULL,
202
+ span_id TEXT NOT NULL,
203
+ start_time_unix_nano TEXT NOT NULL,
204
+ end_time_unix_nano TEXT,
205
+ last_seen_unix_nano TEXT,
206
+ attempts INTEGER,
207
+ opened INTEGER,
208
+ hitl INTEGER,
209
+ errored INTEGER,
210
+ status_code INTEGER,
211
+ status_message TEXT,
212
+ inserted_at TEXT NOT NULL DEFAULT (datetime('now')),
213
+ PRIMARY KEY (trace_id, span_id)
214
+ )`).run();
215
+ db.prepare(`INSERT INTO drains_v2 (
216
+ trace_id, span_id, start_time_unix_nano, end_time_unix_nano,
217
+ last_seen_unix_nano, attempts, opened, hitl, errored,
218
+ status_code, status_message, inserted_at
219
+ )
220
+ SELECT
221
+ trace_id, span_id, start_time_unix_nano, end_time_unix_nano,
222
+ last_seen_unix_nano, attempts, opened, hitl, errored,
223
+ status_code, status_message, inserted_at
224
+ FROM drains`).run();
225
+ db.prepare("DROP TABLE drains").run();
226
+ db.prepare("ALTER TABLE drains_v2 RENAME TO drains").run();
227
+ db.prepare("COMMIT").run();
228
+ }
229
+ catch (err) {
230
+ db.prepare("ROLLBACK").run();
231
+ throw err;
232
+ }
233
+ }
168
234
  function aggregatesViewDdl(windowDrains) {
169
235
  // windowDrains is the only spot we interpolate rather than
170
236
  // parameter-bind (CREATE VIEW can't take params). Coerce to a
@@ -299,6 +365,10 @@ export function createStorage(path, opts = {}) {
299
365
  // filtering. Old rows decode to "" — the poller only filters on
300
366
  // known type values so pre-migration rows behave unchanged.
301
367
  `ALTER TABLE linear_snapshots ADD COLUMN status_type TEXT`,
368
+ // VA-455: per-trace heartbeat timestamp for the log-driven active
369
+ // drain query (see SCHEMA above for semantics). Older DBs need
370
+ // the column added in-place.
371
+ `ALTER TABLE drains ADD COLUMN last_seen_unix_nano TEXT`,
302
372
  ]) {
303
373
  try {
304
374
  db.exec(sql);
@@ -308,6 +378,12 @@ export function createStorage(path, opts = {}) {
308
378
  // dashboard boot ran the same migration.
309
379
  }
310
380
  }
381
+ // VA-455: SQLite has no `ALTER COLUMN`, so relaxing the original
382
+ // `end_time_unix_nano TEXT NOT NULL` constraint on legacy DBs
383
+ // requires a table-rebuild. Only fire when PRAGMA reports the
384
+ // column is still NOT NULL — fresh installs go through the new
385
+ // SCHEMA above and skip this branch.
386
+ relaxDrainsEndTimeNotNull(db);
311
387
  // VA-399: install the evaluator-facing read-model view after the
312
388
  // base tables exist (and after VA-387's column migrations above),
313
389
  // but before any prepared statement is created — a
@@ -317,17 +393,58 @@ export function createStorage(path, opts = {}) {
317
393
  const insertDrain = db.prepare(`
318
394
  INSERT INTO drains (
319
395
  trace_id, span_id, start_time_unix_nano, end_time_unix_nano,
396
+ last_seen_unix_nano,
320
397
  attempts, opened, hitl, errored, status_code, status_message
321
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
398
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
322
399
  ON CONFLICT (trace_id, span_id) DO UPDATE SET
323
400
  start_time_unix_nano = excluded.start_time_unix_nano,
324
401
  end_time_unix_nano = excluded.end_time_unix_nano,
402
+ last_seen_unix_nano = excluded.last_seen_unix_nano,
325
403
  attempts = excluded.attempts,
326
404
  opened = excluded.opened,
327
405
  hitl = excluded.hitl,
328
406
  errored = excluded.errored,
329
407
  status_code = excluded.status_code,
330
408
  status_message = excluded.status_message
409
+ `);
410
+ // VA-455: insert (or refresh last_seen on) the active-drain row
411
+ // when runway emits its `drain.started` log marker. ON CONFLICT
412
+ // guards against two cases:
413
+ // 1. The drainQueue span has already ended (end_time NOT NULL)
414
+ // — a late-arriving marker can't reanimate a closed drain,
415
+ // so we leave the row untouched.
416
+ // 2. Repeated markers (shouldn't happen, but if they do we just
417
+ // bump last_seen forward; monotonic via the MAX comparison).
418
+ const markActiveDrainStmt = db.prepare(`
419
+ INSERT INTO drains (
420
+ trace_id, span_id, start_time_unix_nano,
421
+ end_time_unix_nano, last_seen_unix_nano,
422
+ attempts, opened, hitl, errored,
423
+ status_code, status_message
424
+ ) VALUES (?, ?, ?, NULL, ?, 0, 0, 0, 0, NULL, NULL)
425
+ ON CONFLICT (trace_id, span_id) DO UPDATE SET
426
+ last_seen_unix_nano = excluded.last_seen_unix_nano
427
+ WHERE drains.end_time_unix_nano IS NULL
428
+ AND (
429
+ drains.last_seen_unix_nano IS NULL
430
+ OR CAST(drains.last_seen_unix_nano AS INTEGER)
431
+ < CAST(excluded.last_seen_unix_nano AS INTEGER)
432
+ )
433
+ `);
434
+ // VA-455: every log record carrying a trace_id pushes that
435
+ // trace's active drain's last_seen forward, so the active-drain
436
+ // query can age out crashed drains. UPDATE-only (never inserts)
437
+ // so a stray log record from a trace without a `drain.started`
438
+ // marker doesn't fabricate an active drain.
439
+ const bumpDrainLastSeenStmt = db.prepare(`
440
+ UPDATE drains
441
+ SET last_seen_unix_nano = ?
442
+ WHERE trace_id = ?
443
+ AND end_time_unix_nano IS NULL
444
+ AND (
445
+ last_seen_unix_nano IS NULL
446
+ OR CAST(last_seen_unix_nano AS INTEGER) < CAST(? AS INTEGER)
447
+ )
331
448
  `);
332
449
  const insertIssueProcess = db.prepare(`
333
450
  INSERT INTO issue_processes (
@@ -439,33 +556,64 @@ export function createStorage(path, opts = {}) {
439
556
  project_id, project_name, status_type
440
557
  FROM linear_snapshots
441
558
  `);
442
- // VA-391: "active drain" = a trace_id with one or more
443
- // issue_processes rows but no row in `drains`. The drain span
444
- // hasn't been emitted yet (BatchSpanProcessor flushes on end), so
445
- // we infer in-flight by the absence of the parent. Earliest
446
- // start_time across the trace's issue_processes is the drain
447
- // start; the counters slice by outcome_kind. LIMIT 1 picks the
448
- // most-recently-started in-flight drain v1 only runs one drain
449
- // at a time but the SQL is robust to a future parallel mode.
559
+ // VA-391 + VA-455: "active drain" comes from two paths today.
560
+ //
561
+ // (a) VA-455 log-driven: a `drains` row with end_time NULL and a
562
+ // fresh `last_seen_unix_nano` (within the staleness window).
563
+ // runway emits `Effect.logInfo("drain.started")` at the top
564
+ // of the drainQueue span; the projector recognises that body
565
+ // and inserts the row via `markDrainActive`. Subsequent log
566
+ // records bump last_seen.
567
+ //
568
+ // (b) VA-391 legacy fallback: a trace_id with issue_processes
569
+ // rows but no `drains` row at all. Kept so dashboards running
570
+ // against pre-VA-455 runway binaries still light up — once
571
+ // the first processIssue ends.
572
+ //
573
+ // The two paths are mutually exclusive by construction (path (a)
574
+ // writes a `drains` row, which excludes the trace from path (b)).
575
+ // Among candidates, most-recently-started wins; v1 only runs one
576
+ // drain at a time but the SQL is robust to a future parallel mode.
577
+ //
578
+ // Bind param: the staleness floor in unix-nanos as a TEXT-encoded
579
+ // integer. Pass `String(BigInt(Date.now()) * 1_000_000n - staleness)`
580
+ // at call time so the query stays a static prepared statement.
450
581
  const getActiveDrainStmt = db.prepare(`
451
- WITH active AS (
452
- SELECT trace_id
582
+ WITH active_candidates AS (
583
+ SELECT
584
+ trace_id,
585
+ start_time_unix_nano AS started_at_unix_nano
586
+ FROM drains
587
+ WHERE end_time_unix_nano IS NULL
588
+ AND last_seen_unix_nano IS NOT NULL
589
+ AND CAST(last_seen_unix_nano AS INTEGER) > CAST(? AS INTEGER)
590
+
591
+ UNION ALL
592
+
593
+ SELECT
594
+ trace_id,
595
+ CAST(MIN(CAST(start_time_unix_nano AS INTEGER)) AS TEXT)
596
+ AS started_at_unix_nano
453
597
  FROM issue_processes
454
598
  WHERE trace_id NOT IN (SELECT trace_id FROM drains)
455
599
  GROUP BY trace_id
456
- ORDER BY MIN(CAST(start_time_unix_nano AS INTEGER)) DESC
600
+ ),
601
+ chosen AS (
602
+ SELECT trace_id, started_at_unix_nano
603
+ FROM active_candidates
604
+ ORDER BY CAST(started_at_unix_nano AS INTEGER) DESC
457
605
  LIMIT 1
458
606
  )
459
607
  SELECT
460
- ip.trace_id,
461
- MIN(ip.start_time_unix_nano) AS started_at_unix_nano,
462
- COUNT(*) AS issue_count,
463
- SUM(CASE WHEN ip.outcome_kind = 'opened' THEN 1 ELSE 0 END) AS opened_count,
464
- SUM(CASE WHEN ip.outcome_kind = 'hitl' THEN 1 ELSE 0 END) AS hitl_count,
465
- SUM(CASE WHEN ip.outcome_kind = 'errored' THEN 1 ELSE 0 END) AS errored_count
466
- FROM issue_processes ip
467
- WHERE ip.trace_id = (SELECT trace_id FROM active)
468
- GROUP BY ip.trace_id
608
+ c.trace_id,
609
+ c.started_at_unix_nano,
610
+ COUNT(ip.span_id) AS issue_count,
611
+ COALESCE(SUM(CASE WHEN ip.outcome_kind = 'opened' THEN 1 ELSE 0 END), 0) AS opened_count,
612
+ COALESCE(SUM(CASE WHEN ip.outcome_kind = 'hitl' THEN 1 ELSE 0 END), 0) AS hitl_count,
613
+ COALESCE(SUM(CASE WHEN ip.outcome_kind = 'errored' THEN 1 ELSE 0 END), 0) AS errored_count
614
+ FROM chosen c
615
+ LEFT JOIN issue_processes ip ON ip.trace_id = c.trace_id
616
+ GROUP BY c.trace_id, c.started_at_unix_nano
469
617
  `);
470
618
  const insertLogRecord = db.prepare(`
471
619
  INSERT INTO log_records (
@@ -507,7 +655,15 @@ export function createStorage(path, opts = {}) {
507
655
  ORDER BY CAST(timestamp_unix_nano AS INTEGER) ASC, span_id ASC
508
656
  `);
509
657
  const saveDrain = (d) => {
510
- insertDrain.run(d.traceId, d.spanId, d.startTimeUnixNano, d.endTimeUnixNano, asInt(d.attempts), asInt(d.opened), asInt(d.hitl), asInt(d.errored), asInt(d.statusCode), d.statusMessage);
658
+ insertDrain.run(d.traceId, d.spanId, d.startTimeUnixNano, d.endTimeUnixNano, d.lastSeenUnixNano, asInt(d.attempts), asInt(d.opened), asInt(d.hitl), asInt(d.errored), asInt(d.statusCode), d.statusMessage);
659
+ };
660
+ // VA-455: log-marker-driven insert for in-flight drains. Called
661
+ // from server.ts when `projectLogs` surfaces a `drain.started`
662
+ // record (see `extractActiveDrainMarkers`). The first call lands
663
+ // the row with end_time NULL; subsequent calls (or any log on the
664
+ // trace flowing through `appendLogRecord`) push last_seen forward.
665
+ const markDrainActive = (m) => {
666
+ markActiveDrainStmt.run(m.traceId, m.spanId, m.startTimeUnixNano, m.lastSeenUnixNano);
511
667
  };
512
668
  const saveIssueProcess = (p) => {
513
669
  insertIssueProcess.run(p.traceId, p.spanId, p.parentSpanId, p.issueIdentifier, p.issueId, p.issueTitle,
@@ -647,10 +803,20 @@ export function createStorage(path, opts = {}) {
647
803
  Object.keys(r.attributes).length === 0
648
804
  ? null
649
805
  : JSON.stringify(r.attributes));
806
+ // VA-455: piggy-back on every log record to keep the trace's
807
+ // active-drain row "alive" — drives the staleness TTL in
808
+ // getActiveDrain. No-op when no active drain exists for the
809
+ // trace (UPDATE matches zero rows).
810
+ bumpDrainLastSeenStmt.run(r.timestampUnixNano, r.traceId, r.timestampUnixNano);
650
811
  };
651
812
  const streamLogsFor = (traceId) => listLogsByTrace.all(traceId).map(rowToLogRecord);
652
813
  const getActiveDrain = () => {
653
- const row = getActiveDrainStmt.get();
814
+ // VA-455: staleness floor — drains whose most recent log record
815
+ // is older than this no longer count as active. Matches the 30s
816
+ // heartbeat cadence on the runway side with a 3× safety margin
817
+ // so a single dropped heartbeat doesn't toggle the card to Idle.
818
+ const stalenessFloorNanos = BigInt(Date.now()) * 1000000n - ACTIVE_DRAIN_STALENESS_NANOS;
819
+ const row = getActiveDrainStmt.get(String(stalenessFloorNanos));
654
820
  if (!row || row.trace_id == null)
655
821
  return null;
656
822
  return {
@@ -758,6 +924,7 @@ export function createStorage(path, opts = {}) {
758
924
  };
759
925
  return {
760
926
  saveDrain,
927
+ markDrainActive,
761
928
  saveIssueProcess,
762
929
  saveAgentIteration,
763
930
  saveRawSpan,
@@ -1,6 +1,6 @@
1
1
  import { existsSync } from "node:fs";
2
2
  import { join } from "node:path";
3
- import { Effect } from "effect";
3
+ import { Duration, Effect, Schedule } from "effect";
4
4
  import { detectBaseBranch, pruneStaleAgentBranch } from "./git.js";
5
5
  import { loadPolicy } from "./policy.js";
6
6
  import { flagHitl, handleProcessFailure } from "./hitl.js";
@@ -41,6 +41,19 @@ export function assertSandcastleInitialised(cwd) {
41
41
  export const drainQueue = (deps, opts = {}) => Effect.gen(function* () {
42
42
  const { config, linear } = deps;
43
43
  const max = opts.max ?? Number.POSITIVE_INFINITY;
44
+ // VA-455: announce the drain to the dashboard before any other
45
+ // work. The log carries the drainQueue span's trace_id/span_id
46
+ // (we're already inside `Effect.withSpan("drainQueue")`), and the
47
+ // dashboard's projector matches on the literal body string —
48
+ // keep it in lock-step with `DRAIN_STARTED_LOG` in projector.ts.
49
+ yield* Effect.logInfo("drain.started");
50
+ // VA-455: keep the dashboard's "still alive" signal warm during
51
+ // long impl/review phases. `Effect.fork` ties the heartbeat fiber
52
+ // to this gen's lifetime, so it's interrupted automatically when
53
+ // drainQueue completes (success, failure, or interrupt). The 30s
54
+ // cadence matches `ACTIVE_DRAIN_STALENESS_NANOS / 3` on the
55
+ // dashboard so a single dropped flush doesn't toggle the card.
56
+ yield* Effect.fork(Effect.logInfo("drain.heartbeat").pipe(Effect.repeat(Schedule.spaced(Duration.seconds(30)))));
44
57
  // Resolve the base branch once at startup so every issue in the
45
58
  // drain sees the same answer (and so a misconfigured repo fails
46
59
  // fast, before we touch any Linear state).
@@ -1,4 +1,4 @@
1
- import { existsSync, readFileSync, writeFileSync } from "node:fs";
1
+ import { existsSync, readdirSync, readFileSync, writeFileSync } from "node:fs";
2
2
  import { join } from "node:path";
3
3
  import { AUTH_MODE_ENV_VAR, TEMPLATES_DIR, } from "./scaffolder.js";
4
4
  import { buildAgentImage } from "./scaffolder-image.js";
@@ -80,7 +80,36 @@ export function parseOpRefs(schema, opts) {
80
80
  // ---------------------------------------------------------------------------
81
81
  // Render: Dockerfile
82
82
  // ---------------------------------------------------------------------------
83
- function renderDockerfile(cwd, tier, opts, enforceManualEditGuard) {
83
+ /**
84
+ * VA-466: read every historical snapshot under `templates/history/`
85
+ * that matches the base Dockerfile naming pattern, flatten them to
86
+ * the set of lines that have ever shipped as canonical content.
87
+ *
88
+ * Files are named `Dockerfile.claude-code.base.v<n>` (where `<n>` is
89
+ * the runway version that owned that exact content). The detector
90
+ * doesn't care about version ordering — it just needs the union of
91
+ * lines so a target Dockerfile rendered at any prior version doesn't
92
+ * trigger false-positive "manual edit" reports.
93
+ *
94
+ * Missing or empty `history/` directory returns an empty list — both
95
+ * are valid states (a fresh checkout that hasn't accumulated history
96
+ * yet, or a tier-1 repo whose template never changed).
97
+ */
98
+ function historicalBaseTemplateLines() {
99
+ const historyDir = join(TEMPLATES_DIR, "history");
100
+ if (!existsSync(historyDir))
101
+ return [];
102
+ const out = [];
103
+ for (const name of readdirSync(historyDir)) {
104
+ if (!name.startsWith("Dockerfile.claude-code.base"))
105
+ continue;
106
+ const contents = readFileSync(join(historyDir, name), "utf8");
107
+ for (const line of contents.split("\n"))
108
+ out.push(line);
109
+ }
110
+ return out;
111
+ }
112
+ export function renderDockerfile(cwd, tier, opts, enforceManualEditGuard) {
84
113
  const dockerfilePath = join(cwd, ".sandcastle", "Dockerfile");
85
114
  const before = readFileSync(dockerfilePath, "utf8");
86
115
  const base = readFileSync(join(TEMPLATES_DIR, "Dockerfile.claude-code.base"), "utf8");
@@ -98,8 +127,22 @@ function renderDockerfile(cwd, tier, opts, enforceManualEditGuard) {
98
127
  }
99
128
  // Detect manual user edits: any line in `before` that isn't in the
100
129
  // expected re-rendered output is foreign. Warn loudly unless --force.
130
+ //
131
+ // VA-466: expectedLines unions the current template with every
132
+ // historical canonical template snapshot under `templates/history/`.
133
+ // Without this, a target repo's Dockerfile rendered by an older
134
+ // runway version flags every cross-version line delta as a "manual
135
+ // edit" — false-positive that forces operators to pass `--force`
136
+ // and lose the detector's real signal. Each release that mutates
137
+ // `Dockerfile.claude-code.base` snapshots the prior version into
138
+ // `templates/history/Dockerfile.claude-code.base.v<n>` BEFORE the
139
+ // edit lands; the detector unions all of them so any line that
140
+ // ever shipped as canonical stays trusted.
101
141
  if (before !== after && !opts.force && enforceManualEditGuard) {
102
142
  const expectedLines = new Set(after.split("\n"));
143
+ for (const line of historicalBaseTemplateLines()) {
144
+ expectedLines.add(line);
145
+ }
103
146
  const foreign = before
104
147
  .split("\n")
105
148
  .filter((l) => l.trim().length && !expectedLines.has(l));
package/dist/review.js CHANGED
@@ -79,7 +79,14 @@ export const runReviewPass = (issue, deps, branch) => Effect.gen(function* () {
79
79
  sandbox: docker({ env: dockerEnv(config) }),
80
80
  cwd,
81
81
  prompt: reviewPrompt,
82
- branchStrategy: { type: "head" },
82
+ // VA-456: review must check out the impl agent's branch
83
+ // explicitly. The previous `{ type: "head" }` inherited
84
+ // whatever branch the operator happened to be on in the
85
+ // main checkout — when that wasn't `branch`, the reviewer
86
+ // saw a working tree that didn't reconcile with the diff in
87
+ // its prompt and stalled silently until sandcastle's idle
88
+ // timeout fired (10 min of dead air, then INFRA_ERROR).
89
+ branchStrategy: { type: "branch", branch },
83
90
  maxIterations: 1,
84
91
  name: `review-${issue.identifier}`,
85
92
  });
@@ -1,5 +1,55 @@
1
1
  import { run } from "@ai-hero/sandcastle";
2
2
  import { Effect, Redacted } from "effect";
3
+ import { readdirSync, readFileSync, statSync } from "node:fs";
4
+ import { join } from "node:path";
5
+ /**
6
+ * VA-454: when sandcastle throws, its `Error.message` is opaque
7
+ * (`claude-code exited with code 1:` with no body — the colon is
8
+ * literal, the stderr text goes only to the log file). To stop
9
+ * operators from having to hunt `.sandcastle/logs/` for the real
10
+ * cause on every failure, we read the tail of the most-recently
11
+ * modified log file in the project's `.sandcastle/logs/` directory
12
+ * and append it to the error message.
13
+ *
14
+ * Tail is read by capping the byte window (cheap on huge logs) and
15
+ * then trimming to the last `maxLines`. A missing directory or a
16
+ * read error degrades silently — the error path must not be the
17
+ * source of a NEW error.
18
+ */
19
+ const SANDCASTLE_LOG_TAIL_LINES = 20;
20
+ const SANDCASTLE_LOG_TAIL_MAX_BYTES = 16 * 1024;
21
+ export function readLatestSandcastleLogTail(cwd, maxLines = SANDCASTLE_LOG_TAIL_LINES) {
22
+ try {
23
+ const dir = join(cwd, ".sandcastle", "logs");
24
+ const entries = readdirSync(dir);
25
+ let newestPath = null;
26
+ let newestMtime = -Infinity;
27
+ for (const name of entries) {
28
+ if (!name.endsWith(".log"))
29
+ continue;
30
+ const path = join(dir, name);
31
+ const mtime = statSync(path).mtimeMs;
32
+ if (mtime > newestMtime) {
33
+ newestMtime = mtime;
34
+ newestPath = path;
35
+ }
36
+ }
37
+ if (!newestPath)
38
+ return null;
39
+ const size = statSync(newestPath).size;
40
+ const offset = Math.max(0, size - SANDCASTLE_LOG_TAIL_MAX_BYTES);
41
+ const buf = readFileSync(newestPath);
42
+ // Trim trailing newline BEFORE splitting so a `…line 200\n` file
43
+ // doesn't yield an empty final element that eats one slot in the
44
+ // `slice(-maxLines)` window.
45
+ const tail = buf.subarray(offset).toString("utf8").replace(/\n+$/, "");
46
+ const lines = tail.split("\n");
47
+ return lines.slice(-maxLines).join("\n");
48
+ }
49
+ catch {
50
+ return null;
51
+ }
52
+ }
3
53
  /**
4
54
  * VA-358: thin Effect wrapper around `sandcastle.run`. The agent run
5
55
  * happens inside Docker — sandcastle doesn't (yet) expose a kill
@@ -12,9 +62,18 @@ import { Effect, Redacted } from "effect";
12
62
  */
13
63
  export const runSandcastle = (args) => Effect.tryPromise({
14
64
  try: () => run(args),
15
- catch: (err) => ({
16
- message: err instanceof Error ? err.message : String(err),
17
- }),
65
+ catch: (err) => {
66
+ const base = err instanceof Error ? err.message : String(err);
67
+ // VA-454: append the tail of the most-recent `.sandcastle/logs/`
68
+ // file so the operator sees the real cause (e.g. "Invalid API
69
+ // key · Fix external API key") on the same line as the
70
+ // INFRA_ERROR — instead of having to scroll back to the run-
71
+ // start banner for the log path and open it.
72
+ const tail = readLatestSandcastleLogTail(args.cwd ?? process.cwd());
73
+ return {
74
+ message: tail ? `${base}\n${tail}` : base,
75
+ };
76
+ },
18
77
  });
19
78
  /**
20
79
  * Env vars to inject into every sandcastle container. Today this is
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@valescoagency/runway",
3
- "version": "0.13.0",
3
+ "version": "0.14.1",
4
4
  "description": "Linear-driven orchestrator + scaffolder for coding agents on Sandcastle. `runway init` scaffolds a target repo (sandcastle + varlock + 1Password); `runway run` drains a Linear queue against it; `runway doctor`, `runway upgrade`, `runway upgrade-repo` round out the lifecycle.",
5
5
  "license": "MIT",
6
6
  "author": {
@@ -49,19 +49,29 @@ ENV HOME=/home/agent
49
49
  ENV XDG_CACHE_HOME=/home/agent/.cache
50
50
  ENV TURBO_CACHE_DIR=/tmp/turbo-cache
51
51
  ENV pnpm_config_cache=/home/agent/.cache/pnpm
52
+ # VA-457: pin corepack's data dir under the agent's cache. `corepack
53
+ # prepare` below runs as root (before the USER switch), so without
54
+ # COREPACK_HOME it would write the cached pnpm tarball to /root's
55
+ # default and the agent UID couldn't read it. Anything that resolves
56
+ # `pnpm` through the corepack shim after `USER` — git hooks invoked
57
+ # by lefthook in particular — then fails or hangs trying to refetch.
58
+ ENV COREPACK_HOME=/home/agent/.cache/corepack
52
59
 
53
60
  # Pre-create cache dirs with agent ownership so the first pnpm/turbo
54
61
  # run doesn't have to chown them. Both are inside paths the agent owns
55
62
  # anyway; this just makes them exist.
56
- RUN mkdir -p /home/agent/.cache /home/agent/.cache/pnpm /tmp/turbo-cache \
63
+ RUN mkdir -p /home/agent/.cache /home/agent/.cache/pnpm /home/agent/.cache/corepack /tmp/turbo-cache \
57
64
  && chown -R $AGENT_UID:$AGENT_GID /home/agent/.cache /tmp/turbo-cache
58
65
 
59
66
  # Bake pnpm via corepack at build time so `pnpm` is on PATH inside the
60
67
  # container before any agent command runs. Pin a default; target repos
61
68
  # can override at runtime via `packageManager` in package.json +
62
- # `corepack use`.
69
+ # `corepack use`. COREPACK_HOME is set above so the data dir lands
70
+ # under /home/agent/.cache/corepack; the trailing chown re-asserts
71
+ # agent ownership over the files root just wrote there.
63
72
  RUN corepack enable \
64
- && corepack prepare pnpm@11.1.1 --activate
73
+ && corepack prepare pnpm@11.1.1 --activate \
74
+ && chown -R $AGENT_UID:$AGENT_GID /home/agent/.cache/corepack
65
75
 
66
76
  USER ${AGENT_UID}:${AGENT_GID}
67
77
 
@@ -0,0 +1,79 @@
1
+ # Canonical claude-code Dockerfile — vendored from
2
+ # @ai-hero/sandcastle's InitService.ts (CLAUDE_CODE_DOCKERFILE constant).
3
+ # Kept here so `runway init` can write it directly, without invoking
4
+ # `sandcastle init` (which has interactive prompts that hang in
5
+ # non-TTY environments like CI / Mac Mini cron).
6
+ #
7
+ # Drift policy: when sandcastle bumps its claude-code Dockerfile,
8
+ # refresh this file. The diff should be tiny — runway's tier 2 layer
9
+ # patches AFTER this base, so adopters re-run `runway init --force`
10
+ # to roll forward.
11
+
12
+ FROM node:24-bookworm
13
+
14
+ # Install system dependencies
15
+ RUN apt-get update && apt-get install -y \
16
+ git \
17
+ curl \
18
+ jq \
19
+ && rm -rf /var/lib/apt/lists/*
20
+
21
+ # Build-args for UID/GID alignment: defaults match the host user's
22
+ # UID/GID at build time so image-built files and bind-mounted files
23
+ # share an owner without runtime chown.
24
+ ARG AGENT_UID=1000
25
+ ARG AGENT_GID=1000
26
+
27
+ # Rename the base image's "node" user to "agent" and align UID/GID.
28
+ #
29
+ # Divergence from sandcastle's stock Dockerfile: stock runs
30
+ # `groupmod -g $AGENT_GID node` unconditionally, which fails on macOS
31
+ # hosts where the host GID is 20 (`staff`) — Debian's `dialout` group
32
+ # already has GID 20, and `groupmod` refuses to assign a duplicate
33
+ # GID. We guard with `getent group` so groupmod only runs if the
34
+ # target GID is unused; if it's already taken, we point the agent
35
+ # user at the pre-existing group via `usermod -g <gid>` and the
36
+ # image still works (the in-image group name is irrelevant — only the
37
+ # numeric GID matters for bind-mount permissions).
38
+ RUN if ! getent group $AGENT_GID >/dev/null; then \
39
+ groupmod -g $AGENT_GID node; \
40
+ fi \
41
+ && usermod -u $AGENT_UID -g $AGENT_GID -d /home/agent -m -l agent node
42
+
43
+ # VA-351: bake the container env up front so agents don't manually
44
+ # work around host-path leaks, missing pnpm, or unset HOME on every
45
+ # iteration. Without these, every agent run repeats the same
46
+ # corepack/TURBO_CACHE_DIR/HOME setup commands — see VA-312's run log
47
+ # for the receipts.
48
+ ENV HOME=/home/agent
49
+ ENV XDG_CACHE_HOME=/home/agent/.cache
50
+ ENV TURBO_CACHE_DIR=/tmp/turbo-cache
51
+ ENV pnpm_config_cache=/home/agent/.cache/pnpm
52
+
53
+ # Pre-create cache dirs with agent ownership so the first pnpm/turbo
54
+ # run doesn't have to chown them. Both are inside paths the agent owns
55
+ # anyway; this just makes them exist.
56
+ RUN mkdir -p /home/agent/.cache /home/agent/.cache/pnpm /tmp/turbo-cache \
57
+ && chown -R $AGENT_UID:$AGENT_GID /home/agent/.cache /tmp/turbo-cache
58
+
59
+ # Bake pnpm via corepack at build time so `pnpm` is on PATH inside the
60
+ # container before any agent command runs. Pin a default; target repos
61
+ # can override at runtime via `packageManager` in package.json +
62
+ # `corepack use`.
63
+ RUN corepack enable \
64
+ && corepack prepare pnpm@11.1.1 --activate
65
+
66
+ USER ${AGENT_UID}:${AGENT_GID}
67
+
68
+ # Install Claude Code CLI
69
+ RUN curl -fsSL https://claude.ai/install.sh | bash
70
+
71
+ # Add Claude to PATH
72
+ ENV PATH="/home/agent/.local/bin:$PATH"
73
+
74
+ WORKDIR /home/agent
75
+
76
+ # In worktree sandbox mode, Sandcastle bind-mounts the git worktree at
77
+ # the sandbox repo dir and overrides the working directory to that dir
78
+ # at container start.
79
+ ENTRYPOINT ["sleep", "infinity"]