atris 3.16.0 → 3.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -392,6 +392,41 @@ function askHuman(taskTitle) {
392
392
  });
393
393
  }
394
394
 
395
+ /**
396
+ * Type-check a child_process error as a timeout/kill. Node's execSync attaches
397
+ * `code: 'ETIMEDOUT'` and `signal` on timeout — it does NOT set `killed`, so a
398
+ * `killed`-only guard is dead code on the exact error it was written for
399
+ * (lesson: etimedout-error-shape, 2026-06-10).
400
+ */
401
+ function isPhaseTimeoutError(err) {
402
+ return Boolean(err && (err.killed || err.code === 'ETIMEDOUT' || err.signal));
403
+ }
404
+
405
+ /**
406
+ * execSync with the phase-timeout orphan fix. Node's sync-exec timeout signals
407
+ * only the direct child pid — the `/bin/sh -c` wrapper — so the `claude` it
408
+ * spawned kept committing 160–296s past the 600s wall (lesson:
409
+ * etimedout-error-shape, 2026-06-10). `detached: true` makes the wrapper a
410
+ * process-group leader; on timeout we sweep the whole group via
411
+ * `process.kill(-pid, 'SIGKILL')`. ESRCH on the sweep means the group already
412
+ * died — fine. The original error is rethrown untouched so every call site
413
+ * keeps its existing catch contract (err.stdout passthrough included).
414
+ */
415
+ function execPhaseCommandSync(cmd, opts = {}) {
416
+ try {
417
+ return execSync(cmd, { ...opts, detached: true });
418
+ } catch (err) {
419
+ if (isPhaseTimeoutError(err) && err.pid) {
420
+ try {
421
+ process.kill(-err.pid, 'SIGKILL');
422
+ } catch (sweepErr) {
423
+ if (sweepErr.code !== 'ESRCH') throw sweepErr;
424
+ }
425
+ }
426
+ throw err;
427
+ }
428
+ }
429
+
395
430
  /**
396
431
  * Run a phase via claude -p subprocess.
397
432
  */
@@ -403,10 +438,11 @@ function executePhaseDetailed(phase, context, options = {}) {
403
438
  fs.writeFileSync(tmpFile, prompt);
404
439
 
405
440
  try {
406
- const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Write,Edit,Glob,Grep"`;
441
+ const cmd = options.cmdOverride
442
+ || `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Write,Edit,Glob,Grep"`;
407
443
  const env = { ...process.env };
408
444
  delete env.CLAUDECODE;
409
- const output = execSync(cmd, {
445
+ const output = execPhaseCommandSync(cmd, {
410
446
  cwd: process.cwd(),
411
447
  encoding: 'utf8',
412
448
  timeout,
@@ -419,7 +455,9 @@ function executePhaseDetailed(phase, context, options = {}) {
419
455
  return { prompt, output: output || '' };
420
456
  } catch (err) {
421
457
  try { fs.unlinkSync(tmpFile); } catch {}
422
- if (err.killed) throw new Error(`${phase} timed out after ${timeout / 1000}s`);
458
+ if (isPhaseTimeoutError(err)) {
459
+ throw new Error(`${phase} phase timed out after ${timeout / 1000}s (claude -p hit the wall; any work it committed survives — reconcile from pre-tick HEADs)`);
460
+ }
423
461
  if (err.stdout) {
424
462
  return { prompt, output: err.stdout };
425
463
  }
@@ -452,6 +490,16 @@ function getContextFiles(phase, options = {}) {
452
490
  return [...new Set(files.filter(Boolean))].map((f) => `- ${f}`).join('\n');
453
491
  }
454
492
 
493
+ // T35a (endgame loop-self-repair): shared-checkout git-safety contract.
494
+ // Lesson 39: a concurrent tick's `git reset` destroyed a sibling repo's
495
+ // uncommitted work. Sibling-repo edits ride per-tick worktrees (the same
496
+ // ../repo siblings snapshotRepoHeads tracks); destructive git on the shared
497
+ // checkout is forbidden (COORDINATION.md Rule 4). Interpolated into the
498
+ // default and self-heal do prompts — never the benchmark prompt (it never
499
+ // commits).
500
+ const SHARED_CHECKOUT_GIT_CONTRACT = `- Shared-checkout git safety (COORDINATION.md Rule 4): edits to any repo OTHER than this tick's cwd (../atrisos-backend-style sibling repos) go through a per-tick worktree — start with \`atris worktree start --member <member> --task "<task>"\`, land with \`atris worktree ship --message "<msg>" --verify "<cmd>"\`. Never edit a sibling repo's shared checkout directly.
501
+ - On a shared checkout, \`git reset\`, \`git checkout --\`, \`git clean\`, and stashing other agents' work are FORBIDDEN — concurrent ticks' uncommitted work lives there.`;
502
+
455
503
  /**
456
504
  * Build the right prompt for each phase, adapting to the kind of work.
457
505
  */
@@ -648,6 +696,7 @@ Rules:
648
696
  - Execute ONE step at a time. Verify each step before moving on.
649
697
  - Check MAP.md for file locations before grepping.
650
698
  - Stay in scope. Only fix the bug described in the lesson — no side quests.
699
+ ${SHARED_CHECKOUT_GIT_CONTRACT}
651
700
 
652
701
  Read these files first:
653
702
  ${readFiles}
@@ -672,6 +721,7 @@ Rules:
672
721
  - Check MAP.md for file locations before grepping.
673
722
  - If you hit two errors on the same step, stop and flag for re-scope.
674
723
  - Stay in scope. Don't touch files outside the task boundary.
724
+ ${SHARED_CHECKOUT_GIT_CONTRACT}
675
725
 
676
726
  Read these files first:
677
727
  ${readFiles}
@@ -742,6 +792,27 @@ If broken beyond quick fix, reply: failed — [reason].`;
742
792
  return '';
743
793
  }
744
794
 
795
+ /**
796
+ * Build a clean kebab-case lesson slug from free text. Strips non-alphanumerics
797
+ * (em-dashes were leaking into slugs verbatim) and truncates at a word boundary
798
+ * instead of mid-word (e.g. the old `.slice(0, 40)` produced
799
+ * `verify-fail-per-member-model-selection-—-the-member-`).
800
+ */
801
+ function lessonSlug(text, maxLen = 40) {
802
+ const base = String(text || 'unknown')
803
+ .toLowerCase()
804
+ .replace(/[^a-z0-9]+/g, '-')
805
+ .replace(/^-+|-+$/g, '');
806
+ if (!base) return 'unknown';
807
+ if (base.length <= maxLen) return base;
808
+ const cut = base.slice(0, maxLen);
809
+ const lastDash = cut.lastIndexOf('-');
810
+ // base[maxLen] continues a word — back up to the last full word.
811
+ const atBoundary = base[maxLen] === '-';
812
+ const trimmed = atBoundary ? cut : (lastDash > 0 ? cut.slice(0, lastDash) : cut);
813
+ return trimmed.replace(/-+$/g, '') || 'unknown';
814
+ }
815
+
745
816
  /**
746
817
  * Write a lesson to atris/lessons.md
747
818
  * Appends a line in format: - **[YYYY-MM-DD] slug** — pass/fail — explanation
@@ -884,6 +955,38 @@ function shouldAdoptPlannedVerify(kind) {
884
955
  return ['staleness', 'docs', 'review', 'inbox', 'cleanup', 'feature', 'lessons', 'imagined'].includes(kind);
885
956
  }
886
957
 
958
+ // Task-plane status vocabulary lint. `atris task list/queue/current --status <s>`
959
+ // only matches raw stored statuses (commands/task.js); `ready` is a TRANSITION
960
+ // (`atris task ready` moves a task to review), so `--status ready` always
961
+ // returns "(no tasks)" — a verify built on it is an unreachable gate; the
962
+ // matching listable form is --status review (lessons.md
963
+ // verify-status-vocabulary, 3rd occurrence 2026-06-10).
964
+ const LISTABLE_TASK_STATUSES = ['open', 'claimed', 'review', 'done', 'failed'];
965
+ const STATUS_CORRECTIONS = { ready: 'review' };
966
+
967
+ function lintVerifyTaskStatusVocabulary(text) {
968
+ // Scan every `atris task list|queue|current` segment (compound verifies
969
+ // chain with && / || / ;), then pull its --status value if present.
970
+ const segmentRe = /\batris\s+task\s+(?:list|queue|current)\b([^|&;]*)/g;
971
+ let segment;
972
+ while ((segment = segmentRe.exec(text)) !== null) {
973
+ const statusMatch = /--status[=\s]+["']?([A-Za-z0-9_-]+)["']?/.exec(segment[1]);
974
+ if (!statusMatch) continue;
975
+ const status = statusMatch[1];
976
+ if (LISTABLE_TASK_STATUSES.includes(status)) continue;
977
+ const vocabulary = LISTABLE_TASK_STATUSES.join('|');
978
+ const corrected = STATUS_CORRECTIONS[status];
979
+ const suggestion = corrected
980
+ ? `use --status ${corrected} instead (atris task ${status} is a transition that lands tasks in ${corrected}, so --status ${status} never matches)`
981
+ : `use one of --status ${vocabulary}`;
982
+ return {
983
+ ok: false,
984
+ reason: `Verify uses unlistable task status "--status ${status}" — the listable vocabulary is ${vocabulary}; ${suggestion}`,
985
+ };
986
+ }
987
+ return null;
988
+ }
989
+
887
990
  function validateVerifyCommandShape(cmd) {
888
991
  const text = String(cmd || '').trim();
889
992
  if (!text) return { ok: true };
@@ -893,6 +996,8 @@ function validateVerifyCommandShape(cmd) {
893
996
  if (/\b(returns?|shows?|equals?|should|must)\b/i.test(text)) {
894
997
  return { ok: false, reason: 'Verify contains prose expectations instead of shell operators/assertions' };
895
998
  }
999
+ const statusLint = lintVerifyTaskStatusVocabulary(text);
1000
+ if (statusLint) return statusLint;
896
1001
  return { ok: true };
897
1002
  }
898
1003
 
@@ -1083,7 +1188,7 @@ function defaultPlanReviewExecutor(prompt, { cwd, timeout = 180000 } = {}) {
1083
1188
  const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Grep,Glob"`;
1084
1189
  const env = { ...process.env };
1085
1190
  delete env.CLAUDECODE;
1086
- const output = execSync(cmd, {
1191
+ const output = execPhaseCommandSync(cmd, {
1087
1192
  cwd,
1088
1193
  encoding: 'utf8',
1089
1194
  timeout,
@@ -1112,7 +1217,18 @@ function defaultCodexExecutor(prompt, { cwd, timeout = 180000 } = {}) {
1112
1217
  timeout,
1113
1218
  stdio: 'pipe',
1114
1219
  maxBuffer: 10 * 1024 * 1024,
1220
+ detached: true,
1115
1221
  });
1222
+ // No sh wrapper here, but codex spawns its own children — sweep the group
1223
+ // on timeout so they cannot outlive the wall (same orphan class as the
1224
+ // claude sites; ESRCH means the tree is already dead).
1225
+ if (proc.pid && ((proc.error && proc.error.code === 'ETIMEDOUT') || proc.signal)) {
1226
+ try {
1227
+ process.kill(-proc.pid, 'SIGKILL');
1228
+ } catch (sweepErr) {
1229
+ if (sweepErr.code !== 'ESRCH') throw sweepErr;
1230
+ }
1231
+ }
1116
1232
  if (proc.status !== 0 && !proc.stdout) {
1117
1233
  throw new Error(`codex exited with status ${proc.status}: ${proc.stderr || 'no output'}`);
1118
1234
  }
@@ -1258,6 +1374,216 @@ function appendPlanRejection(cwd, context, review) {
1258
1374
  }
1259
1375
  }
1260
1376
 
1377
+ // ── Timeout reconciliation (T33, endgame loop-self-repair) ─────────────────
1378
+ // A do-phase wall-clock timeout kills the reporter, not the work: 12 of 13
1379
+ // ETIMEDOUT halts in the 2026-06-10 RSI audit had real commits landed with no
1380
+ // receipt, no checked bullet, and a human halt (lessons: executor-timeout-wall,
1381
+ // tick-must-mark-own-bullet). These helpers let the tick reconcile from
1382
+ // pre-tick HEADs instead of halting when work provably landed.
1383
+
1384
+ function todayJournalPath(cwd) {
1385
+ const now = new Date();
1386
+ const yyyy = now.getFullYear();
1387
+ const mm = String(now.getMonth() + 1).padStart(2, '0');
1388
+ const dd = String(now.getDate()).padStart(2, '0');
1389
+ return {
1390
+ logFile: path.join(cwd, 'atris', 'logs', String(yyyy), `${yyyy}-${mm}-${dd}.md`),
1391
+ dateFormatted: `${yyyy}-${mm}-${dd}`,
1392
+ };
1393
+ }
1394
+
1395
+ /**
1396
+ * Normalize text for fuzzy task-title matching: lowercase, strip code spans,
1397
+ * tags, and markdown punctuation down to single-spaced words.
1398
+ */
1399
+ function normalizeForMatch(text) {
1400
+ return String(text || '')
1401
+ .toLowerCase()
1402
+ .replace(/`[^`]*`/g, ' ')
1403
+ .replace(/\[[\w-]+\]/g, ' ')
1404
+ .replace(/[^a-z0-9]+/g, ' ')
1405
+ .trim()
1406
+ .replace(/\s+/g, ' ');
1407
+ }
1408
+
1409
+ /**
1410
+ * A word-boundary-truncated normalized prefix of the task title, used to find
1411
+ * the task's TODO bullet and journal receipts without exact-string fragility.
1412
+ */
1413
+ function taskMatchNeedle(taskTitle, maxLen = 60) {
1414
+ const norm = normalizeForMatch(taskTitle);
1415
+ if (!norm) return '';
1416
+ if (norm.length <= maxLen) return norm;
1417
+ return norm.slice(0, maxLen).replace(/\s+\S*$/, '');
1418
+ }
1419
+
1420
+ function gitHeadAt(dir) {
1421
+ try {
1422
+ return execSync('git rev-parse HEAD', { cwd: dir, stdio: ['ignore', 'pipe', 'pipe'], encoding: 'utf8' }).trim();
1423
+ } catch {
1424
+ return null;
1425
+ }
1426
+ }
1427
+
1428
+ /**
1429
+ * Snapshot HEAD of the workspace repo plus any sibling repos named in the
1430
+ * task text — both explicit `../atris-cli`-style refs (the journal convention)
1431
+ * and bare sibling-directory names like `atris-cli` that resolve to a git
1432
+ * repo next to cwd. Returns [{ label, dir, head }].
1433
+ */
1434
+ function snapshotRepoHeads(cwd, taskText = '') {
1435
+ const root = path.resolve(cwd);
1436
+ const repos = new Map([[root, '.']]);
1437
+ const text = String(taskText || '');
1438
+ for (const ref of text.match(/\.\.\/[A-Za-z0-9._-]+/g) || []) {
1439
+ const dir = path.resolve(cwd, ref);
1440
+ if (dir !== root && fs.existsSync(path.join(dir, '.git'))) repos.set(dir, ref);
1441
+ }
1442
+ for (const tok of text.match(/[A-Za-z][A-Za-z0-9._-]{2,}/g) || []) {
1443
+ const dir = path.resolve(cwd, '..', tok);
1444
+ if (dir !== root && !repos.has(dir) && fs.existsSync(path.join(dir, '.git'))) {
1445
+ repos.set(dir, `../${tok}`);
1446
+ }
1447
+ }
1448
+ return [...repos].map(([dir, label]) => ({ label, dir, head: gitHeadAt(dir) }));
1449
+ }
1450
+
1451
+ /**
1452
+ * Re-read HEADs for a prior snapshot; return the repos whose HEAD advanced
1453
+ * as [{ label, dir, before, after }].
1454
+ */
1455
+ function diffAdvancedRepoHeads(snapshot) {
1456
+ const advanced = [];
1457
+ for (const repo of snapshot || []) {
1458
+ if (!repo || !repo.head) continue;
1459
+ const after = gitHeadAt(repo.dir);
1460
+ if (after && after !== repo.head) {
1461
+ advanced.push({ label: repo.label, dir: repo.dir, before: repo.head, after });
1462
+ }
1463
+ }
1464
+ return advanced;
1465
+ }
1466
+
1467
+ /**
1468
+ * The T31-typed do-phase timeout message thrown by executePhaseDetailed.
1469
+ * Plan/review timeouts stay human halts — only the do phase commits work
1470
+ * worth reconciling.
1471
+ */
1472
+ function isDoPhaseTimeoutMessage(message) {
1473
+ return /\bdo phase timed out after\b/.test(String(message || ''));
1474
+ }
1475
+
1476
+ /**
1477
+ * Mark the task's TODO bullet `[x]`. Matches the first un-checked,
1478
+ * un-struck bullet whose normalized text contains the normalized title
1479
+ * prefix; `- **T33:** …` becomes `- [x] **T33:** …`, `- [ ]` becomes `- [x]`.
1480
+ * Returns true if a bullet was marked.
1481
+ */
1482
+ function markTodoBulletDone(cwd, taskTitle) {
1483
+ const needle = taskMatchNeedle(taskTitle);
1484
+ if (!needle) return false;
1485
+ for (const name of ['TODO.md', 'todo.md']) {
1486
+ const todoPath = path.join(cwd, 'atris', name);
1487
+ if (!fs.existsSync(todoPath)) continue;
1488
+ const lines = fs.readFileSync(todoPath, 'utf8').split('\n');
1489
+ for (let i = 0; i < lines.length; i++) {
1490
+ const bullet = lines[i].match(/^(\s*)- (?:\[( |x)\]\s+)?(.*)$/);
1491
+ if (!bullet) continue;
1492
+ if (bullet[2] === 'x') continue;
1493
+ if (bullet[3].startsWith('~~')) continue;
1494
+ if (!normalizeForMatch(lines[i]).includes(needle)) continue;
1495
+ lines[i] = `${bullet[1]}- [x] ${bullet[3]}`;
1496
+ fs.writeFileSync(todoPath, lines.join('\n'));
1497
+ return true;
1498
+ }
1499
+ return false;
1500
+ }
1501
+ return false;
1502
+ }
1503
+
1504
+ /**
1505
+ * Append a block under today's journal `## Notes`, creating the journal file
1506
+ * if the tick dies before any other writer got to it. Never throws.
1507
+ */
1508
+ function appendUnderNotes(cwd, block) {
1509
+ try {
1510
+ const { logFile, dateFormatted } = todayJournalPath(cwd);
1511
+ if (!fs.existsSync(logFile)) {
1512
+ const dir = path.dirname(logFile);
1513
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
1514
+ createLogFile(logFile, dateFormatted);
1515
+ }
1516
+ let content = fs.readFileSync(logFile, 'utf8');
1517
+ const notesIdx = content.indexOf('## Notes');
1518
+ if (notesIdx === -1) {
1519
+ content = content.replace(/\s*$/, '') + `\n\n## Notes\n${block}\n`;
1520
+ } else {
1521
+ const eol = content.indexOf('\n', notesIdx);
1522
+ content = content.slice(0, eol + 1) + block + content.slice(eol + 1);
1523
+ }
1524
+ fs.writeFileSync(logFile, content);
1525
+ return true;
1526
+ } catch {
1527
+ return false;
1528
+ }
1529
+ }
1530
+
1531
+ function appendTimeoutReconciliation(cwd, { task, advanced }) {
1532
+ const now = new Date().toISOString().slice(0, 16).replace('T', ' ');
1533
+ const repoLines = (advanced || [])
1534
+ .map((r) => `- ${r.label}: ${String(r.before).slice(0, 7)} → ${String(r.after).slice(0, 7)}`)
1535
+ .join('\n');
1536
+ const block =
1537
+ `\n### Timeout reconciliation — ${now} — work-landed-receipt-died\n\n` +
1538
+ `**Task:** ${task}\n` +
1539
+ `**What happened:** the do-phase wall killed the reporter, but commits landed:\n` +
1540
+ `${repoLines}\n` +
1541
+ `Receipt auto-written and the TODO bullet marked; no human halt required.\n`;
1542
+ return appendUnderNotes(cwd, block);
1543
+ }
1544
+
1545
+ function appendCheckAndAdvance(cwd, task, receiptLine) {
1546
+ const now = new Date().toISOString().slice(0, 16).replace('T', ' ');
1547
+ const block =
1548
+ `\n### Check-and-advance — ${now} — advanced-already-done\n\n` +
1549
+ `**Task:** ${task}\n` +
1550
+ `**What happened:** verify passed before work started AND today's journal already carries a completion receipt — the work shipped on a prior tick whose reporter died before bookkeeping. Bullet marked, picker advanced.\n` +
1551
+ `**Receipt:** ${receiptLine}\n`;
1552
+ return appendUnderNotes(cwd, block);
1553
+ }
1554
+
1555
+ /**
1556
+ * Scan today's journal for a completion receipt naming the task: a `C#`
1557
+ * completed line, a timeout-reconciliation entry, or a `**Task:**` line.
1558
+ * Returns the matching line, or null.
1559
+ */
1560
+ function findCompletionReceipt(cwd, taskTitle) {
1561
+ const { logFile } = todayJournalPath(cwd);
1562
+ if (!fs.existsSync(logFile)) return null;
1563
+ const needle = taskMatchNeedle(taskTitle);
1564
+ if (!needle) return null;
1565
+ for (const line of fs.readFileSync(logFile, 'utf8').split('\n')) {
1566
+ const receiptShaped =
1567
+ /\*\*C\d+:\*\*/.test(line) || /\*\*Task:\*\*/.test(line) || /reconciliation/i.test(line);
1568
+ if (receiptShaped && normalizeForMatch(line).includes(needle)) return line.trim();
1569
+ }
1570
+ return null;
1571
+ }
1572
+
1573
+ /**
1574
+ * After a do-phase timeout: diff the pre-tick HEAD snapshot. If commits
1575
+ * landed, write the journal reconciliation receipt, mark the TODO bullet, and
1576
+ * report outcome `work-landed-receipt-died`. If nothing landed, the caller
1577
+ * halts exactly as before.
1578
+ */
1579
+ function reconcileTimedOutTick(cwd, snapshot, taskTitle) {
1580
+ const advanced = diffAdvancedRepoHeads(snapshot);
1581
+ if (advanced.length === 0) return { reconciled: false, advanced: [] };
1582
+ appendTimeoutReconciliation(cwd, { task: taskTitle, advanced });
1583
+ const bulletMarked = markTodoBulletDone(cwd, taskTitle);
1584
+ return { reconciled: true, outcome: 'work-landed-receipt-died', advanced, bulletMarked };
1585
+ }
1586
+
1261
1587
  function runTaskOnce(context, options = {}) {
1262
1588
  const { verbose = false, cwd = process.cwd() } = options;
1263
1589
 
@@ -1318,6 +1644,25 @@ function runTaskOnce(context, options = {}) {
1318
1644
  if (!skipFalsifiability && verifyResult.explicit && context.kind === 'endgame' && verifyCmd) {
1319
1645
  try {
1320
1646
  execSync(verifyCmd, { cwd, stdio: 'pipe', timeout: 300000 });
1647
+ // T33b (lesson: tick-must-mark-own-bullet): a pre-work verify pass WITH
1648
+ // a completion receipt already in today's journal means the work shipped
1649
+ // but the reporter died before bookkeeping. Check the bullet and advance
1650
+ // instead of wedging the picker on verify-not-falsifiable.
1651
+ const receipt = findCompletionReceipt(cwd, context.task);
1652
+ if (receipt) {
1653
+ const bulletMarked = markTodoBulletDone(cwd, context.task);
1654
+ appendCheckAndAdvance(cwd, context.task, receipt);
1655
+ return {
1656
+ outcome: 'advanced-already-done',
1657
+ reason: 'advanced-already-done',
1658
+ receipt,
1659
+ bulletMarked,
1660
+ phaseResults: {},
1661
+ elapsedSeconds: 0,
1662
+ verifyRan: true,
1663
+ verifyPass: true,
1664
+ };
1665
+ }
1321
1666
  writeLesson(cwd, 'verify-not-falsifiable', 'fail',
1322
1667
  `Verify \`${verifyCmd}\` passed before work started on "${context.task}". Either the rubric is trivial or the task is already done. Tick halted.`);
1323
1668
  return {
@@ -1436,7 +1781,7 @@ function runTaskOnce(context, options = {}) {
1436
1781
  elapsedSeconds: verifyTime,
1437
1782
  };
1438
1783
  try {
1439
- const slug = (context.task || 'unknown').replace(/\s+/g, '-').toLowerCase().slice(0, 40);
1784
+ const slug = lessonSlug(context.task);
1440
1785
  writeLesson(cwd, `verify-fail-${slug}`, 'fail', `Verify command \`${verifyCmd}\` failed: ${e.message.split('\n')[0]}`);
1441
1786
  } catch { /* lesson write must not crash the tick */ }
1442
1787
  }
@@ -2540,7 +2885,7 @@ Reply with the JSON array and nothing else.`;
2540
2885
  const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')"`;
2541
2886
  const env = { ...process.env };
2542
2887
  delete env.CLAUDECODE;
2543
- output = execSync(cmd, {
2888
+ output = execPhaseCommandSync(cmd, {
2544
2889
  cwd,
2545
2890
  encoding: 'utf8',
2546
2891
  timeout: PHASE_TIMEOUT,
@@ -2548,6 +2893,11 @@ Reply with the JSON array and nothing else.`;
2548
2893
  maxBuffer: 10 * 1024 * 1024,
2549
2894
  env
2550
2895
  }).toString();
2896
+ } catch (err) {
2897
+ if (isPhaseTimeoutError(err)) {
2898
+ throw new Error(`horizon-proposal phase timed out after ${PHASE_TIMEOUT / 1000}s`);
2899
+ }
2900
+ throw err;
2551
2901
  } finally {
2552
2902
  try { fs.unlinkSync(tmpFile); } catch {}
2553
2903
  }
@@ -2835,6 +3185,17 @@ async function autopilotAtris(description, options = {}) {
2835
3185
  };
2836
3186
  const startingEndgame = readEndgameState(cwd);
2837
3187
 
3188
+ // T33a: snapshot pre-tick HEADs (cwd + sibling repos named in the task)
3189
+ // so a do-phase timeout can be reconciled against what actually landed.
3190
+ let preTickHeads = null;
3191
+ try {
3192
+ const verifyHint = getVerifyCommand(cwd, suggestion.task).cmd || '';
3193
+ preTickHeads = snapshotRepoHeads(
3194
+ cwd,
3195
+ [suggestion.task, ...(suggestion.files || []), verifyHint].join(' ')
3196
+ );
3197
+ } catch { /* snapshot failure must not block the tick */ }
3198
+
2838
3199
  try {
2839
3200
  if (verbose) {
2840
3201
  console.log('');
@@ -2868,6 +3229,26 @@ async function autopilotAtris(description, options = {}) {
2868
3229
  break;
2869
3230
  }
2870
3231
 
3232
+ // T33b: the falsifiability gate found a completion receipt — the work
3233
+ // already shipped, the bullet is checked, move straight to the next pick.
3234
+ if (execution.outcome === 'advanced-already-done') {
3235
+ completed++;
3236
+ tickOutcome = 'built';
3237
+ tickOutcomeText = `"${lastTaskTitle}" was already done — verify passed pre-work and today's journal carries its completion receipt, so I checked the bullet and advanced.`;
3238
+ tickNextStep = 'pick the next endgame task';
3239
+ if (verbose) {
3240
+ console.log(' already done (journal receipt found). bullet checked, advancing.');
3241
+ } else {
3242
+ printPlainBlock([
3243
+ 'That task was already done — verify passed before work and a completion receipt exists in today\'s journal.',
3244
+ 'I checked the bullet and advanced.',
3245
+ '',
3246
+ 'Next I will look for the next task.'
3247
+ ].join('\n'));
3248
+ }
3249
+ continue;
3250
+ }
3251
+
2871
3252
  const planTime = execution.phaseResults.plan.elapsedSeconds;
2872
3253
  if (verbose) console.log(` planned (${planTime}s)`);
2873
3254
 
@@ -2929,7 +3310,7 @@ async function autopilotAtris(description, options = {}) {
2929
3310
  // Record commit hash + verify command for retroactive regression checks
2930
3311
  try {
2931
3312
  const commitHash = execSync('git rev-parse HEAD', { cwd, encoding: 'utf8' }).trim();
2932
- const taskSlug = (suggestion.task || 'unknown').replace(/\s+/g, '-').toLowerCase().slice(0, 40);
3313
+ const taskSlug = lessonSlug(suggestion.task);
2933
3314
  recordTickCommit(cwd, commitHash, execution.verifyCmd || '', taskSlug);
2934
3315
 
2935
3316
  // Every 10th tick, run retroactive regression check
@@ -2976,6 +3357,36 @@ async function autopilotAtris(description, options = {}) {
2976
3357
  }
2977
3358
 
2978
3359
  } catch (err) {
3360
+ // T33a: a do-phase timeout with commits landed is a dead reporter, not
3361
+ // dead work — write the reconciliation receipt, mark the bullet, and
3362
+ // record work-landed-receipt-died instead of halting for a human.
3363
+ let reconciliation = null;
3364
+ if (isDoPhaseTimeoutMessage(err.message)) {
3365
+ try {
3366
+ reconciliation = reconcileTimedOutTick(cwd, preTickHeads, lastTaskTitle || suggestion.task);
3367
+ } catch { reconciliation = null; }
3368
+ }
3369
+ if (reconciliation && reconciliation.reconciled) {
3370
+ completed++;
3371
+ const landed = reconciliation.advanced
3372
+ .map((r) => `${r.label} ${String(r.before).slice(0, 7)} → ${String(r.after).slice(0, 7)}`)
3373
+ .join(', ');
3374
+ tickOutcome = 'work-landed-receipt-died';
3375
+ tickOutcomeText = `"${lastTaskTitle}" hit the do-phase wall but commits landed (${landed}). I wrote the reconciliation receipt and marked the bullet — work-landed-receipt-died, no human halt.`;
3376
+ tickNextStep = 'pick the next task';
3377
+ if (verbose) {
3378
+ console.log(` do phase timed out, but work landed (${landed}). reconciled — no human halt.`);
3379
+ } else {
3380
+ printPlainBlock([
3381
+ 'The do phase timed out, but commits landed before the wall.',
3382
+ `Landed: ${landed}.`,
3383
+ 'I wrote the reconciliation receipt and marked the task bullet.',
3384
+ '',
3385
+ 'Next tick will pick the next task.'
3386
+ ].join('\n'));
3387
+ }
3388
+ break;
3389
+ }
2979
3390
  tickOutcome = 'halted';
2980
3391
  tickOutcomeText = `I hit an error while running "${lastTaskTitle || 'a task'}": ${err.message}`;
2981
3392
  tickNextStep = 'stop until a human looks at the error';
@@ -3159,7 +3570,7 @@ Search the codebase to verify. Reply: YES <reason> or NO <reason>`;
3159
3570
  const env = { ...process.env };
3160
3571
  delete env.CLAUDECODE;
3161
3572
  const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Glob,Grep"`;
3162
- const output = execSync(cmd, {
3573
+ const output = execPhaseCommandSync(cmd, {
3163
3574
  cwd,
3164
3575
  encoding: 'utf8',
3165
3576
  timeout: 60000,
@@ -3192,6 +3603,13 @@ async function autopilotFromTodo(options = {}) {
3192
3603
 
3193
3604
  module.exports = {
3194
3605
  appendTickSummary,
3606
+ snapshotRepoHeads,
3607
+ diffAdvancedRepoHeads,
3608
+ reconcileTimedOutTick,
3609
+ markTodoBulletDone,
3610
+ findCompletionReceipt,
3611
+ isDoPhaseTimeoutMessage,
3612
+ validateVerifyCommandShape,
3195
3613
  askHuman,
3196
3614
  askModel,
3197
3615
  autopilotAtris,
@@ -3233,5 +3651,9 @@ module.exports = {
3233
3651
  scoreEndgameCandidates,
3234
3652
  suggestNextTask,
3235
3653
  shouldSkipAutoHumanGate,
3236
- writeLesson
3654
+ writeLesson,
3655
+ isPhaseTimeoutError,
3656
+ execPhaseCommandSync,
3657
+ executePhaseDetailed,
3658
+ lessonSlug
3237
3659
  };