atris 3.15.57 → 3.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/AGENTS.md +2 -2
  2. package/GETTING_STARTED.md +1 -1
  3. package/PERSONA.md +4 -4
  4. package/README.md +12 -11
  5. package/atris/skills/copy-editor/SKILL.md +30 -4
  6. package/atris/skills/improve/SKILL.md +18 -20
  7. package/atris/wiki/concepts/agent-activation-contract.md +5 -3
  8. package/atris/wiki/concepts/workspace-initialization-contract.md +4 -4
  9. package/atris/wiki/index.md +1 -0
  10. package/ax +522 -73
  11. package/bin/atris.js +78 -44
  12. package/commands/align.js +0 -14
  13. package/commands/apps.js +102 -1
  14. package/commands/autopilot.js +628 -31
  15. package/commands/brain.js +219 -34
  16. package/commands/brainstorm.js +0 -829
  17. package/commands/compile.js +569 -0
  18. package/commands/computer.js +0 -60
  19. package/commands/improve.js +501 -0
  20. package/commands/integrations.js +233 -71
  21. package/commands/lesson.js +44 -0
  22. package/commands/member.js +4498 -226
  23. package/commands/mission.js +302 -27
  24. package/commands/now.js +89 -1
  25. package/commands/probe.js +366 -0
  26. package/commands/radar.js +181 -56
  27. package/commands/recap.js +203 -0
  28. package/commands/skill.js +6 -2
  29. package/commands/soul.js +0 -4
  30. package/commands/task.js +5587 -499
  31. package/commands/terminal.js +14 -10
  32. package/commands/wiki.js +87 -1
  33. package/commands/workflow.js +288 -73
  34. package/commands/worktree.js +52 -15
  35. package/commands/xp.js +6 -65
  36. package/lib/auto-accept-certified.js +294 -0
  37. package/lib/file-ops.js +0 -184
  38. package/lib/member-alive.js +232 -0
  39. package/lib/policy-lessons.js +280 -0
  40. package/lib/receipt-evidence.js +64 -0
  41. package/lib/state-detection.js +75 -1
  42. package/lib/task-db.js +568 -16
  43. package/lib/task-proof.js +43 -0
  44. package/package.json +1 -1
  45. package/utils/auth.js +13 -4
  46. package/commands/research.js +0 -52
  47. package/lib/section-merge.js +0 -196
@@ -25,6 +25,46 @@ const pkg = require('../package.json');
25
25
 
26
26
  const PHASE_TIMEOUT = 600000; // 10 min per phase
27
27
 
28
+ function looksOwnerClaimed(claimed) {
29
+ const text = String(claimed || '').toLowerCase();
30
+ return /\bkeshav(?:rao)?\b/.test(text) || /\b(owner|human|operator)\b/.test(text);
31
+ }
32
+
33
+ function looksOwnerGatedTitle(title) {
34
+ const text = String(title || '').toLowerCase();
35
+ return (
36
+ /\bowner[- ](?:approval|input|gate|gated)\b/.test(text) ||
37
+ /\bhuman[- ](?:approval|input|gate|gated)\b/.test(text) ||
38
+ /\bmanual send\b/.test(text) ||
39
+ /\broute confirmation\b/.test(text) ||
40
+ /\bconfirm pallet destination\b/.test(text) ||
41
+ /\bconfirm .+ destination before .+ approval\b/.test(text) ||
42
+ /\bapprove and manually send\b/.test(text)
43
+ );
44
+ }
45
+
46
+ function shouldSkipAutoHumanGate(task) {
47
+ if (!task) return false;
48
+ return looksOwnerClaimed(task.claimed) || looksOwnerGatedTitle(task.title || task.task);
49
+ }
50
+
51
+ function repoMapAuditReportsClean(cwd) {
52
+ const auditPath = path.join(cwd, 'scripts', 'audit_map_refs.py');
53
+ if (!fs.existsSync(auditPath)) return false;
54
+
55
+ const result = spawnSync('python3', [auditPath], {
56
+ cwd,
57
+ encoding: 'utf8',
58
+ timeout: 120000,
59
+ maxBuffer: 1024 * 1024
60
+ });
61
+ if (result.status !== 0) return false;
62
+
63
+ const output = `${result.stdout || ''}\n${result.stderr || ''}`;
64
+ const match = output.match(/Total broken references:\s*(\d+)/i);
65
+ return Boolean(match && Number(match[1]) === 0);
66
+ }
67
+
28
68
  /**
29
69
  * Scan workspace for the next thing worth doing.
30
70
  * Returns { task, why, kind } or null.
@@ -54,7 +94,7 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
54
94
  // --- Resume interrupted work ---
55
95
  if (todo.inProgress.length > 0) {
56
96
  const t = todo.inProgress[0];
57
- if (!(t.tags && t.tags.includes('unverified')) && !skipped.has(t.title)) {
97
+ if (!(t.tags && t.tags.includes('unverified')) && !skipped.has(t.title) && !(auto && shouldSkipAutoHumanGate(t))) {
58
98
  suggestions.push({
59
99
  task: t.title,
60
100
  why: `This was already started${t.claimed ? ` by ${t.claimed}` : ''} but never finished.`,
@@ -75,6 +115,7 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
75
115
  why: `"${sp.staleSource}" changed on ${sp.sourceDate} but the page was last compiled ${sp.compiledDate}. The content may be wrong.`,
76
116
  kind: 'staleness',
77
117
  priority: 2,
118
+ files: [pageName, sp.staleSource],
78
119
  skipKey: key
79
120
  });
80
121
  break;
@@ -95,7 +136,9 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
95
136
  }
96
137
 
97
138
  // --- Broken MAP.md references ---
98
- const { unhealable } = healBrokenMapRefs(cwd, atrisDir, true); // dry-run
139
+ const { unhealable } = repoMapAuditReportsClean(cwd)
140
+ ? { unhealable: [] }
141
+ : healBrokenMapRefs(cwd, atrisDir, true); // dry-run
99
142
  if (unhealable.length > 0 && !skipped.has('fix-map-refs')) {
100
143
  const sample = unhealable.slice(0, 3).map(r => `${r.file}:${r.line}`).join(', ');
101
144
  suggestions.push({
@@ -127,6 +170,7 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
127
170
  for (const t of todo.backlog) {
128
171
  if (t.tags && t.tags.includes('unverified')) continue;
129
172
  if (shouldSkipEndgameAtPicker(cwd, t)) continue;
173
+ if (auto && shouldSkipAutoHumanGate(t)) continue;
130
174
  if (skipped.has(t.title)) continue;
131
175
  const remaining = todo.backlog.filter(b => !(b.tags && b.tags.includes('unverified'))).length;
132
176
  suggestions.push({
@@ -348,6 +392,41 @@ function askHuman(taskTitle) {
348
392
  });
349
393
  }
350
394
 
395
+ /**
396
+ * Type-check a child_process error as a timeout/kill. Node's execSync attaches
397
+ * `code: 'ETIMEDOUT'` and `signal` on timeout — it does NOT set `killed`, so a
398
+ * `killed`-only guard is dead code on the exact error it was written for
399
+ * (lesson: etimedout-error-shape, 2026-06-10).
400
+ */
401
+ function isPhaseTimeoutError(err) {
402
+ return Boolean(err && (err.killed || err.code === 'ETIMEDOUT' || err.signal));
403
+ }
404
+
405
+ /**
406
+ * execSync with the phase-timeout orphan fix. Node's sync-exec timeout signals
407
+ * only the direct child pid — the `/bin/sh -c` wrapper — so the `claude` it
408
+ * spawned kept committing 160–296s past the 600s wall (lesson:
409
+ * etimedout-error-shape, 2026-06-10). `detached: true` makes the wrapper a
410
+ * process-group leader; on timeout we sweep the whole group via
411
+ * `process.kill(-pid, 'SIGKILL')`. ESRCH on the sweep means the group already
412
+ * died — fine. The original error is rethrown untouched so every call site
413
+ * keeps its existing catch contract (err.stdout passthrough included).
414
+ */
415
+ function execPhaseCommandSync(cmd, opts = {}) {
416
+ try {
417
+ return execSync(cmd, { ...opts, detached: true });
418
+ } catch (err) {
419
+ if (isPhaseTimeoutError(err) && err.pid) {
420
+ try {
421
+ process.kill(-err.pid, 'SIGKILL');
422
+ } catch (sweepErr) {
423
+ if (sweepErr.code !== 'ESRCH') throw sweepErr;
424
+ }
425
+ }
426
+ throw err;
427
+ }
428
+ }
429
+
351
430
  /**
352
431
  * Run a phase via claude -p subprocess.
353
432
  */
@@ -359,10 +438,11 @@ function executePhaseDetailed(phase, context, options = {}) {
359
438
  fs.writeFileSync(tmpFile, prompt);
360
439
 
361
440
  try {
362
- const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Write,Edit,Glob,Grep"`;
441
+ const cmd = options.cmdOverride
442
+ || `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Write,Edit,Glob,Grep"`;
363
443
  const env = { ...process.env };
364
444
  delete env.CLAUDECODE;
365
- const output = execSync(cmd, {
445
+ const output = execPhaseCommandSync(cmd, {
366
446
  cwd: process.cwd(),
367
447
  encoding: 'utf8',
368
448
  timeout,
@@ -375,7 +455,9 @@ function executePhaseDetailed(phase, context, options = {}) {
375
455
  return { prompt, output: output || '' };
376
456
  } catch (err) {
377
457
  try { fs.unlinkSync(tmpFile); } catch {}
378
- if (err.killed) throw new Error(`${phase} timed out after ${timeout / 1000}s`);
458
+ if (isPhaseTimeoutError(err)) {
459
+ throw new Error(`${phase} phase timed out after ${timeout / 1000}s (claude -p hit the wall; any work it committed survives — reconcile from pre-tick HEADs)`);
460
+ }
379
461
  if (err.stdout) {
380
462
  return { prompt, output: err.stdout };
381
463
  }
@@ -383,10 +465,6 @@ function executePhaseDetailed(phase, context, options = {}) {
383
465
  }
384
466
  }
385
467
 
386
- function executePhase(phase, context, options = {}) {
387
- return executePhaseDetailed(phase, context, options).output;
388
- }
389
-
390
468
  /**
391
469
  * Build context-aware file list for prompts.
392
470
  */
@@ -412,6 +490,16 @@ function getContextFiles(phase, options = {}) {
412
490
  return [...new Set(files.filter(Boolean))].map((f) => `- ${f}`).join('\n');
413
491
  }
414
492
 
493
+ // T35a (endgame loop-self-repair): shared-checkout git-safety contract.
494
+ // Lesson 39: a concurrent tick's `git reset` destroyed a sibling repo's
495
+ // uncommitted work. Sibling-repo edits ride per-tick worktrees (the same
496
+ // ../repo siblings snapshotRepoHeads tracks); destructive git on the shared
497
+ // checkout is forbidden (COORDINATION.md Rule 4). Interpolated into the
498
+ // default and self-heal do prompts — never the benchmark prompt (it never
499
+ // commits).
500
+ const SHARED_CHECKOUT_GIT_CONTRACT = `- Shared-checkout git safety (COORDINATION.md Rule 4): edits to any repo OTHER than this tick's cwd (../atrisos-backend-style sibling repos) go through a per-tick worktree — start with \`atris worktree start --member <member> --task "<task>"\`, land with \`atris worktree ship --message "<msg>" --verify "<cmd>"\`. Never edit a sibling repo's shared checkout directly.
501
+ - On a shared checkout, \`git reset\`, \`git checkout --\`, \`git clean\`, and stashing other agents' work are FORBIDDEN — concurrent ticks' uncommitted work lives there.`;
502
+
415
503
  /**
416
504
  * Build the right prompt for each phase, adapting to the kind of work.
417
505
  */
@@ -422,7 +510,13 @@ function buildPrompt(phase, context, options = {}) {
422
510
  contextNote = '',
423
511
  runnerName = '',
424
512
  } = options;
425
- const readFiles = getContextFiles(phase, options);
513
+ const readFiles = getContextFiles(phase, {
514
+ ...options,
515
+ extraReadFiles: [
516
+ ...(options.extraReadFiles || []),
517
+ ...(Array.isArray(context.files) ? context.files : []),
518
+ ],
519
+ });
426
520
  const benchmarkProtocol = benchmarkStrategy === 'stack'
427
521
  ? 'coordinated stack run'
428
522
  : (benchmarkStrategy === 'single' ? 'pinned single-model baseline run' : '');
@@ -478,12 +572,24 @@ When done, reply: done.`;
478
572
  }
479
573
 
480
574
  if (kind === 'staleness' || kind === 'docs' || kind === 'review') {
575
+ const fileList = Array.isArray(context.files) && context.files.length
576
+ ? context.files.map((file) => `- ${file}`).join('\n')
577
+ : '- target page or MAP entry from the task title\n- source file(s) that changed';
481
578
  return `${baseRules}
482
579
 
483
580
  Maintenance task: ${task}
484
581
 
485
- Figure out what needs to change and why. Create focused tasks in atris/TODO.md.
582
+ Relevant files:
583
+ ${fileList}
584
+
585
+ Figure out what needs to change and why. Create exactly one focused task in atris/TODO.md unless the drift truly requires separate commits.
486
586
  For stale pages, read both the page and its sources to understand the drift.
587
+ The task row must include these fields so plan-review can prove it is executable:
588
+ - **Files:** concrete target page plus source file paths
589
+ - **Exit:** the observable post-update state
590
+ - **Verify:** one raw shell command that checks concrete facts and rejects stale phrases; use shell operators like \`&&\`, \`grep -q\`, or \`test\`, not Markdown backticks or English like "returns 1" / "shows today's date"
591
+ - **Rollback:** git checkout -- <changed-files> before commit, or git revert HEAD --no-edit after commit
592
+ Do not write tasks without Verify and Rollback. Do not use \`true\`, \`echo ok\`, or vague "review manually" verification.
487
593
 
488
594
  When done, reply: done.`;
489
595
  }
@@ -590,6 +696,7 @@ Rules:
590
696
  - Execute ONE step at a time. Verify each step before moving on.
591
697
  - Check MAP.md for file locations before grepping.
592
698
  - Stay in scope. Only fix the bug described in the lesson — no side quests.
699
+ ${SHARED_CHECKOUT_GIT_CONTRACT}
593
700
 
594
701
  Read these files first:
595
702
  ${readFiles}
@@ -614,6 +721,7 @@ Rules:
614
721
  - Check MAP.md for file locations before grepping.
615
722
  - If you hit two errors on the same step, stop and flag for re-scope.
616
723
  - Stay in scope. Don't touch files outside the task boundary.
724
+ ${SHARED_CHECKOUT_GIT_CONTRACT}
617
725
 
618
726
  Read these files first:
619
727
  ${readFiles}
@@ -684,6 +792,27 @@ If broken beyond quick fix, reply: failed — [reason].`;
684
792
  return '';
685
793
  }
686
794
 
795
+ /**
796
+ * Build a clean kebab-case lesson slug from free text. Strips non-alphanumerics
797
+ * (em-dashes were leaking into slugs verbatim) and truncates at a word boundary
798
+ * instead of mid-word (e.g. the old `.slice(0, 40)` produced
799
+ * `verify-fail-per-member-model-selection-—-the-member-`).
800
+ */
801
+ function lessonSlug(text, maxLen = 40) {
802
+ const base = String(text || 'unknown')
803
+ .toLowerCase()
804
+ .replace(/[^a-z0-9]+/g, '-')
805
+ .replace(/^-+|-+$/g, '');
806
+ if (!base) return 'unknown';
807
+ if (base.length <= maxLen) return base;
808
+ const cut = base.slice(0, maxLen);
809
+ const lastDash = cut.lastIndexOf('-');
810
+ // base[maxLen] continues a word — back up to the last full word.
811
+ const atBoundary = base[maxLen] === '-';
812
+ const trimmed = atBoundary ? cut : (lastDash > 0 ? cut.slice(0, lastDash) : cut);
813
+ return trimmed.replace(/-+$/g, '') || 'unknown';
814
+ }
815
+
687
816
  /**
688
817
  * Write a lesson to atris/lessons.md
689
818
  * Appends a line in format: - **[YYYY-MM-DD] slug** — pass/fail — explanation
@@ -802,6 +931,90 @@ function getVerifyCommand(cwd, taskTitle) {
802
931
  return { cmd: detectDefaultVerify(cwd), explicit: false };
803
932
  }
804
933
 
934
+ function collectExplicitVerifyTasks(cwd) {
935
+ const todoPath = path.join(cwd, 'atris', 'TODO.md');
936
+ if (!fs.existsSync(todoPath)) return [];
937
+ const todo = parseTodo(todoPath);
938
+ return [...todo.inProgress, ...(todo.review || []), ...todo.backlog, ...todo.completed]
939
+ .filter((task) => task && task.verify)
940
+ .map((task) => ({
941
+ title: task.title,
942
+ verify: task.verify,
943
+ key: `${task.title}\0${task.verify}`,
944
+ }));
945
+ }
946
+
947
+ function findNewExplicitVerifyCommand(cwd, beforeKeys) {
948
+ const prior = beforeKeys instanceof Set ? beforeKeys : new Set(beforeKeys || []);
949
+ const added = collectExplicitVerifyTasks(cwd).filter((task) => !prior.has(task.key));
950
+ if (added.length !== 1) return null;
951
+ return { cmd: added[0].verify, explicit: true, task: added[0].title };
952
+ }
953
+
954
+ function shouldAdoptPlannedVerify(kind) {
955
+ return ['staleness', 'docs', 'review', 'inbox', 'cleanup', 'feature', 'lessons', 'imagined'].includes(kind);
956
+ }
957
+
958
+ // Task-plane status vocabulary lint. `atris task list/queue/current --status <s>`
959
+ // only matches raw stored statuses (commands/task.js); `ready` is a TRANSITION
960
+ // (`atris task ready` moves a task to review), so `--status ready` always
961
+ // returns "(no tasks)" — a verify built on it is an unreachable gate; the
962
+ // matching listable form is --status review (lessons.md
963
+ // verify-status-vocabulary, 3rd occurrence 2026-06-10).
964
+ const LISTABLE_TASK_STATUSES = ['open', 'claimed', 'review', 'done', 'failed'];
965
+ const STATUS_CORRECTIONS = { ready: 'review' };
966
+
967
+ function lintVerifyTaskStatusVocabulary(text) {
968
+ // Scan every `atris task list|queue|current` segment (compound verifies
969
+ // chain with && / || / ;), then pull its --status value if present.
970
+ const segmentRe = /\batris\s+task\s+(?:list|queue|current)\b([^|&;]*)/g;
971
+ let segment;
972
+ while ((segment = segmentRe.exec(text)) !== null) {
973
+ const statusMatch = /--status[=\s]+["']?([A-Za-z0-9_-]+)["']?/.exec(segment[1]);
974
+ if (!statusMatch) continue;
975
+ const status = statusMatch[1];
976
+ if (LISTABLE_TASK_STATUSES.includes(status)) continue;
977
+ const vocabulary = LISTABLE_TASK_STATUSES.join('|');
978
+ const corrected = STATUS_CORRECTIONS[status];
979
+ const suggestion = corrected
980
+ ? `use --status ${corrected} instead (atris task ${status} is a transition that lands tasks in ${corrected}, so --status ${status} never matches)`
981
+ : `use one of --status ${vocabulary}`;
982
+ return {
983
+ ok: false,
984
+ reason: `Verify uses unlistable task status "--status ${status}" — the listable vocabulary is ${vocabulary}; ${suggestion}`,
985
+ };
986
+ }
987
+ return null;
988
+ }
989
+
990
+ function validateVerifyCommandShape(cmd) {
991
+ const text = String(cmd || '').trim();
992
+ if (!text) return { ok: true };
993
+ if (text.includes('`')) {
994
+ return { ok: false, reason: 'Verify contains markdown backticks instead of a raw shell command' };
995
+ }
996
+ if (/\b(returns?|shows?|equals?|should|must)\b/i.test(text)) {
997
+ return { ok: false, reason: 'Verify contains prose expectations instead of shell operators/assertions' };
998
+ }
999
+ const statusLint = lintVerifyTaskStatusVocabulary(text);
1000
+ if (statusLint) return statusLint;
1001
+ return { ok: true };
1002
+ }
1003
+
1004
+ function haltInvalidVerify(cwd, context, verifyCmd, reason, startedAt, phaseResults = {}) {
1005
+ writeLesson(cwd, 'verify-not-runnable', 'fail',
1006
+ `Verify \`${verifyCmd}\` for "${context.task}" is not a runnable shell command: ${reason}. Tick halted.`);
1007
+ return {
1008
+ outcome: 'halted',
1009
+ reason: 'verify-not-runnable',
1010
+ phaseResults,
1011
+ elapsedSeconds: Math.round((Date.now() - startedAt) / 1000),
1012
+ verifyRan: false,
1013
+ verifyPass: false,
1014
+ verifyCmd,
1015
+ };
1016
+ }
1017
+
805
1018
  /**
806
1019
  * Infer a default verify command from the repo shape. Order matters:
807
1020
  * package.json with a non-stub test script → `npm test`; then pytest/python;
@@ -878,7 +1091,7 @@ Read from disk:
878
1091
  - atris/lessons.md (recent failures — last 20 lines)
879
1092
 
880
1093
  Decide if the plan is safe to execute. Check:
881
- 1. Verify points at a falsifiable rubric or test (not \`true\`, \`echo ok\`, or similar).
1094
+ 1. Verify points at a falsifiable raw shell command or rubric (not \`true\`, \`echo ok\`, Markdown backticks, or English like "returns 1" / "shows today's date").
882
1095
  Prefer \`atris verify <slug> --section <name>\`.
883
1096
  2. Files are explicitly declared (not empty, not vague).
884
1097
  3. Rollback is named (commit, checkpoint, or \`git revert\`).
@@ -975,7 +1188,7 @@ function defaultPlanReviewExecutor(prompt, { cwd, timeout = 180000 } = {}) {
975
1188
  const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Grep,Glob"`;
976
1189
  const env = { ...process.env };
977
1190
  delete env.CLAUDECODE;
978
- const output = execSync(cmd, {
1191
+ const output = execPhaseCommandSync(cmd, {
979
1192
  cwd,
980
1193
  encoding: 'utf8',
981
1194
  timeout,
@@ -1004,7 +1217,18 @@ function defaultCodexExecutor(prompt, { cwd, timeout = 180000 } = {}) {
1004
1217
  timeout,
1005
1218
  stdio: 'pipe',
1006
1219
  maxBuffer: 10 * 1024 * 1024,
1220
+ detached: true,
1007
1221
  });
1222
+ // No sh wrapper here, but codex spawns its own children — sweep the group
1223
+ // on timeout so they cannot outlive the wall (same orphan class as the
1224
+ // claude sites; ESRCH means the tree is already dead).
1225
+ if (proc.pid && ((proc.error && proc.error.code === 'ETIMEDOUT') || proc.signal)) {
1226
+ try {
1227
+ process.kill(-proc.pid, 'SIGKILL');
1228
+ } catch (sweepErr) {
1229
+ if (sweepErr.code !== 'ESRCH') throw sweepErr;
1230
+ }
1231
+ }
1008
1232
  if (proc.status !== 0 && !proc.stdout) {
1009
1233
  throw new Error(`codex exited with status ${proc.status}: ${proc.stderr || 'no output'}`);
1010
1234
  }
@@ -1150,6 +1374,216 @@ function appendPlanRejection(cwd, context, review) {
1150
1374
  }
1151
1375
  }
1152
1376
 
1377
+ // ── Timeout reconciliation (T33, endgame loop-self-repair) ─────────────────
1378
+ // A do-phase wall-clock timeout kills the reporter, not the work: 12 of 13
1379
+ // ETIMEDOUT halts in the 2026-06-10 RSI audit had real commits landed with no
1380
+ // receipt, no checked bullet, and a human halt (lessons: executor-timeout-wall,
1381
+ // tick-must-mark-own-bullet). These helpers let the tick reconcile from
1382
+ // pre-tick HEADs instead of halting when work provably landed.
1383
+
1384
+ function todayJournalPath(cwd) {
1385
+ const now = new Date();
1386
+ const yyyy = now.getFullYear();
1387
+ const mm = String(now.getMonth() + 1).padStart(2, '0');
1388
+ const dd = String(now.getDate()).padStart(2, '0');
1389
+ return {
1390
+ logFile: path.join(cwd, 'atris', 'logs', String(yyyy), `${yyyy}-${mm}-${dd}.md`),
1391
+ dateFormatted: `${yyyy}-${mm}-${dd}`,
1392
+ };
1393
+ }
1394
+
1395
+ /**
1396
+ * Normalize text for fuzzy task-title matching: lowercase, strip code spans,
1397
+ * tags, and markdown punctuation down to single-spaced words.
1398
+ */
1399
+ function normalizeForMatch(text) {
1400
+ return String(text || '')
1401
+ .toLowerCase()
1402
+ .replace(/`[^`]*`/g, ' ')
1403
+ .replace(/\[[\w-]+\]/g, ' ')
1404
+ .replace(/[^a-z0-9]+/g, ' ')
1405
+ .trim()
1406
+ .replace(/\s+/g, ' ');
1407
+ }
1408
+
1409
+ /**
1410
+ * A word-boundary-truncated normalized prefix of the task title, used to find
1411
+ * the task's TODO bullet and journal receipts without exact-string fragility.
1412
+ */
1413
+ function taskMatchNeedle(taskTitle, maxLen = 60) {
1414
+ const norm = normalizeForMatch(taskTitle);
1415
+ if (!norm) return '';
1416
+ if (norm.length <= maxLen) return norm;
1417
+ return norm.slice(0, maxLen).replace(/\s+\S*$/, '');
1418
+ }
1419
+
1420
+ function gitHeadAt(dir) {
1421
+ try {
1422
+ return execSync('git rev-parse HEAD', { cwd: dir, stdio: ['ignore', 'pipe', 'pipe'], encoding: 'utf8' }).trim();
1423
+ } catch {
1424
+ return null;
1425
+ }
1426
+ }
1427
+
1428
+ /**
1429
+ * Snapshot HEAD of the workspace repo plus any sibling repos named in the
1430
+ * task text — both explicit `../atris-cli`-style refs (the journal convention)
1431
+ * and bare sibling-directory names like `atris-cli` that resolve to a git
1432
+ * repo next to cwd. Returns [{ label, dir, head }].
1433
+ */
1434
+ function snapshotRepoHeads(cwd, taskText = '') {
1435
+ const root = path.resolve(cwd);
1436
+ const repos = new Map([[root, '.']]);
1437
+ const text = String(taskText || '');
1438
+ for (const ref of text.match(/\.\.\/[A-Za-z0-9._-]+/g) || []) {
1439
+ const dir = path.resolve(cwd, ref);
1440
+ if (dir !== root && fs.existsSync(path.join(dir, '.git'))) repos.set(dir, ref);
1441
+ }
1442
+ for (const tok of text.match(/[A-Za-z][A-Za-z0-9._-]{2,}/g) || []) {
1443
+ const dir = path.resolve(cwd, '..', tok);
1444
+ if (dir !== root && !repos.has(dir) && fs.existsSync(path.join(dir, '.git'))) {
1445
+ repos.set(dir, `../${tok}`);
1446
+ }
1447
+ }
1448
+ return [...repos].map(([dir, label]) => ({ label, dir, head: gitHeadAt(dir) }));
1449
+ }
1450
+
1451
+ /**
1452
+ * Re-read HEADs for a prior snapshot; return the repos whose HEAD advanced
1453
+ * as [{ label, dir, before, after }].
1454
+ */
1455
+ function diffAdvancedRepoHeads(snapshot) {
1456
+ const advanced = [];
1457
+ for (const repo of snapshot || []) {
1458
+ if (!repo || !repo.head) continue;
1459
+ const after = gitHeadAt(repo.dir);
1460
+ if (after && after !== repo.head) {
1461
+ advanced.push({ label: repo.label, dir: repo.dir, before: repo.head, after });
1462
+ }
1463
+ }
1464
+ return advanced;
1465
+ }
1466
+
1467
+ /**
1468
+ * The T31-typed do-phase timeout message thrown by executePhaseDetailed.
1469
+ * Plan/review timeouts stay human halts — only the do phase commits work
1470
+ * worth reconciling.
1471
+ */
1472
+ function isDoPhaseTimeoutMessage(message) {
1473
+ return /\bdo phase timed out after\b/.test(String(message || ''));
1474
+ }
1475
+
1476
+ /**
1477
+ * Mark the task's TODO bullet `[x]`. Matches the first un-checked,
1478
+ * un-struck bullet whose normalized text contains the normalized title
1479
+ * prefix; `- **T33:** …` becomes `- [x] **T33:** …`, `- [ ]` becomes `- [x]`.
1480
+ * Returns true if a bullet was marked.
1481
+ */
1482
+ function markTodoBulletDone(cwd, taskTitle) {
1483
+ const needle = taskMatchNeedle(taskTitle);
1484
+ if (!needle) return false;
1485
+ for (const name of ['TODO.md', 'todo.md']) {
1486
+ const todoPath = path.join(cwd, 'atris', name);
1487
+ if (!fs.existsSync(todoPath)) continue;
1488
+ const lines = fs.readFileSync(todoPath, 'utf8').split('\n');
1489
+ for (let i = 0; i < lines.length; i++) {
1490
+ const bullet = lines[i].match(/^(\s*)- (?:\[( |x)\]\s+)?(.*)$/);
1491
+ if (!bullet) continue;
1492
+ if (bullet[2] === 'x') continue;
1493
+ if (bullet[3].startsWith('~~')) continue;
1494
+ if (!normalizeForMatch(lines[i]).includes(needle)) continue;
1495
+ lines[i] = `${bullet[1]}- [x] ${bullet[3]}`;
1496
+ fs.writeFileSync(todoPath, lines.join('\n'));
1497
+ return true;
1498
+ }
1499
+ return false;
1500
+ }
1501
+ return false;
1502
+ }
1503
+
1504
+ /**
1505
+ * Append a block under today's journal `## Notes`, creating the journal file
1506
+ * if the tick dies before any other writer got to it. Never throws.
1507
+ */
1508
+ function appendUnderNotes(cwd, block) {
1509
+ try {
1510
+ const { logFile, dateFormatted } = todayJournalPath(cwd);
1511
+ if (!fs.existsSync(logFile)) {
1512
+ const dir = path.dirname(logFile);
1513
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
1514
+ createLogFile(logFile, dateFormatted);
1515
+ }
1516
+ let content = fs.readFileSync(logFile, 'utf8');
1517
+ const notesIdx = content.indexOf('## Notes');
1518
+ if (notesIdx === -1) {
1519
+ content = content.replace(/\s*$/, '') + `\n\n## Notes\n${block}\n`;
1520
+ } else {
1521
+ const eol = content.indexOf('\n', notesIdx);
1522
+ content = content.slice(0, eol + 1) + block + content.slice(eol + 1);
1523
+ }
1524
+ fs.writeFileSync(logFile, content);
1525
+ return true;
1526
+ } catch {
1527
+ return false;
1528
+ }
1529
+ }
1530
+
1531
+ function appendTimeoutReconciliation(cwd, { task, advanced }) {
1532
+ const now = new Date().toISOString().slice(0, 16).replace('T', ' ');
1533
+ const repoLines = (advanced || [])
1534
+ .map((r) => `- ${r.label}: ${String(r.before).slice(0, 7)} → ${String(r.after).slice(0, 7)}`)
1535
+ .join('\n');
1536
+ const block =
1537
+ `\n### Timeout reconciliation — ${now} — work-landed-receipt-died\n\n` +
1538
+ `**Task:** ${task}\n` +
1539
+ `**What happened:** the do-phase wall killed the reporter, but commits landed:\n` +
1540
+ `${repoLines}\n` +
1541
+ `Receipt auto-written and the TODO bullet marked; no human halt required.\n`;
1542
+ return appendUnderNotes(cwd, block);
1543
+ }
1544
+
1545
+ function appendCheckAndAdvance(cwd, task, receiptLine) {
1546
+ const now = new Date().toISOString().slice(0, 16).replace('T', ' ');
1547
+ const block =
1548
+ `\n### Check-and-advance — ${now} — advanced-already-done\n\n` +
1549
+ `**Task:** ${task}\n` +
1550
+ `**What happened:** verify passed before work started AND today's journal already carries a completion receipt — the work shipped on a prior tick whose reporter died before bookkeeping. Bullet marked, picker advanced.\n` +
1551
+ `**Receipt:** ${receiptLine}\n`;
1552
+ return appendUnderNotes(cwd, block);
1553
+ }
1554
+
1555
+ /**
1556
+ * Scan today's journal for a completion receipt naming the task: a `C#`
1557
+ * completed line, a timeout-reconciliation entry, or a `**Task:**` line.
1558
+ * Returns the matching line, or null.
1559
+ */
1560
+ function findCompletionReceipt(cwd, taskTitle) {
1561
+ const { logFile } = todayJournalPath(cwd);
1562
+ if (!fs.existsSync(logFile)) return null;
1563
+ const needle = taskMatchNeedle(taskTitle);
1564
+ if (!needle) return null;
1565
+ for (const line of fs.readFileSync(logFile, 'utf8').split('\n')) {
1566
+ const receiptShaped =
1567
+ /\*\*C\d+:\*\*/.test(line) || /\*\*Task:\*\*/.test(line) || /reconciliation/i.test(line);
1568
+ if (receiptShaped && normalizeForMatch(line).includes(needle)) return line.trim();
1569
+ }
1570
+ return null;
1571
+ }
1572
+
1573
+ /**
1574
+ * After a do-phase timeout: diff the pre-tick HEAD snapshot. If commits
1575
+ * landed, write the journal reconciliation receipt, mark the TODO bullet, and
1576
+ * report outcome `work-landed-receipt-died`. If nothing landed, the caller
1577
+ * halts exactly as before.
1578
+ */
1579
+ function reconcileTimedOutTick(cwd, snapshot, taskTitle) {
1580
+ const advanced = diffAdvancedRepoHeads(snapshot);
1581
+ if (advanced.length === 0) return { reconciled: false, advanced: [] };
1582
+ appendTimeoutReconciliation(cwd, { task: taskTitle, advanced });
1583
+ const bulletMarked = markTodoBulletDone(cwd, taskTitle);
1584
+ return { reconciled: true, outcome: 'work-landed-receipt-died', advanced, bulletMarked };
1585
+ }
1586
+
1153
1587
  function runTaskOnce(context, options = {}) {
1154
1588
  const { verbose = false, cwd = process.cwd() } = options;
1155
1589
 
@@ -1170,8 +1604,15 @@ function runTaskOnce(context, options = {}) {
1170
1604
 
1171
1605
  const phaseResults = {};
1172
1606
  const startedAt = Date.now();
1173
- const verifyResult = getVerifyCommand(cwd, context.task);
1174
- const verifyCmd = verifyResult.cmd;
1607
+ let verifyResult = getVerifyCommand(cwd, context.task);
1608
+ let verifyCmd = verifyResult.cmd;
1609
+ const explicitVerifyBefore = new Set(
1610
+ collectExplicitVerifyTasks(cwd).map((task) => task.key)
1611
+ );
1612
+ const initialVerifyShape = validateVerifyCommandShape(verifyCmd);
1613
+ if (!initialVerifyShape.ok) {
1614
+ return haltInvalidVerify(cwd, context, verifyCmd, initialVerifyShape.reason, startedAt, phaseResults);
1615
+ }
1175
1616
 
1176
1617
  // Guard: endgame tasks must have an explicit Verify field.
1177
1618
  // Reactive signals (inbox, staleness, imagined) use npm test as default.
@@ -1203,6 +1644,25 @@ function runTaskOnce(context, options = {}) {
1203
1644
  if (!skipFalsifiability && verifyResult.explicit && context.kind === 'endgame' && verifyCmd) {
1204
1645
  try {
1205
1646
  execSync(verifyCmd, { cwd, stdio: 'pipe', timeout: 300000 });
1647
+ // T33b (lesson: tick-must-mark-own-bullet): a pre-work verify pass WITH
1648
+ // a completion receipt already in today's journal means the work shipped
1649
+ // but the reporter died before bookkeeping. Check the bullet and advance
1650
+ // instead of wedging the picker on verify-not-falsifiable.
1651
+ const receipt = findCompletionReceipt(cwd, context.task);
1652
+ if (receipt) {
1653
+ const bulletMarked = markTodoBulletDone(cwd, context.task);
1654
+ appendCheckAndAdvance(cwd, context.task, receipt);
1655
+ return {
1656
+ outcome: 'advanced-already-done',
1657
+ reason: 'advanced-already-done',
1658
+ receipt,
1659
+ bulletMarked,
1660
+ phaseResults: {},
1661
+ elapsedSeconds: 0,
1662
+ verifyRan: true,
1663
+ verifyPass: true,
1664
+ };
1665
+ }
1206
1666
  writeLesson(cwd, 'verify-not-falsifiable', 'fail',
1207
1667
  `Verify \`${verifyCmd}\` passed before work started on "${context.task}". Either the rubric is trivial or the task is already done. Tick halted.`);
1208
1668
  return {
@@ -1264,6 +1724,18 @@ function runTaskOnce(context, options = {}) {
1264
1724
  }
1265
1725
  }
1266
1726
 
1727
+ if (!verifyResult.explicit && shouldAdoptPlannedVerify(context.kind)) {
1728
+ const plannedVerify = findNewExplicitVerifyCommand(cwd, explicitVerifyBefore);
1729
+ if (plannedVerify) {
1730
+ verifyResult = plannedVerify;
1731
+ verifyCmd = plannedVerify.cmd;
1732
+ }
1733
+ }
1734
+ const plannedVerifyShape = validateVerifyCommandShape(verifyCmd);
1735
+ if (!plannedVerifyShape.ok) {
1736
+ return haltInvalidVerify(cwd, context, verifyCmd, plannedVerifyShape.reason, startedAt, phaseResults);
1737
+ }
1738
+
1267
1739
  // Phase: do
1268
1740
  {
1269
1741
  const t0 = Date.now();
@@ -1309,7 +1781,7 @@ function runTaskOnce(context, options = {}) {
1309
1781
  elapsedSeconds: verifyTime,
1310
1782
  };
1311
1783
  try {
1312
- const slug = (context.task || 'unknown').replace(/\s+/g, '-').toLowerCase().slice(0, 40);
1784
+ const slug = lessonSlug(context.task);
1313
1785
  writeLesson(cwd, `verify-fail-${slug}`, 'fail', `Verify command \`${verifyCmd}\` failed: ${e.message.split('\n')[0]}`);
1314
1786
  } catch { /* lesson write must not crash the tick */ }
1315
1787
  }
@@ -1975,12 +2447,13 @@ function findCodeTodos(cwd) {
1975
2447
  try {
1976
2448
  const out = execFileSync('git', [
1977
2449
  'grep', '-n', '-I', '-E', '(TODO|FIXME)',
1978
- '--', ':!test/', ':!node_modules/', ':!atris/', ':!**/*.md'
2450
+ '--', ':!test/', ':!node_modules/', ':!atris/', ':!**/_archive/**', ':!**/*.md'
1979
2451
  ], { cwd, encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] });
1980
2452
  const results = [];
1981
2453
  for (const raw of out.split('\n').filter(Boolean)) {
1982
2454
  const m = raw.match(/^([^:]+):(\d+):(.*)$/);
1983
2455
  if (!m) continue;
2456
+ if (m[1].split(/[\\/]/).includes('_archive')) continue;
1984
2457
  const line = m[3];
1985
2458
  // A real TODO is a comment marker at the start of the line (allowing
1986
2459
  // leading indent) followed by TODO/FIXME and at least one word. This
@@ -2160,16 +2633,63 @@ function isLessonResolved(lessonLine, cwd, options = {}) {
2160
2633
  if (!slugMatch) return false;
2161
2634
  const slug = slugMatch[1];
2162
2635
 
2636
+ if (isCleanMapBrokenRefFailLesson(lessonLine, cwd)) return true;
2637
+
2163
2638
  // Detector-backed check (typed lesson sidecar)
2164
2639
  const meta = options.meta || loadLessonMetadata(cwd)[slug];
2165
2640
  if (meta && meta.detector) {
2166
2641
  return runLessonDetector(meta.detector, cwd, options.detectorTimeout);
2167
2642
  }
2168
2643
 
2644
+ if (inlinePythonVerifyFailureNowPasses(lessonLine, cwd, options.detectorTimeout)) return true;
2645
+
2169
2646
  // Legacy fallback: keyword grep against referenced files.
2170
2647
  return isLessonResolvedLegacy(lessonLine, cwd);
2171
2648
  }
2172
2649
 
2650
+ function isCleanMapBrokenRefFailLesson(lessonLine, cwd) {
2651
+ const text = String(lessonLine || '').toLowerCase();
2652
+ if (!/fix \d+ broken references? in map\.md/.test(text)) return false;
2653
+ return repoMapAuditReportsClean(cwd);
2654
+ }
2655
+
2656
+ function extractInlinePythonVerifyFailure(lessonLine) {
2657
+ const commandMatch = String(lessonLine || '').match(/Verify command\s+``([\s\S]*?)``\s+failed/i);
2658
+ if (!commandMatch) return null;
2659
+ const matches = [...commandMatch[1].matchAll(/\b(python3?)\s+-c\s+(["'])([\s\S]*?)\2/g)];
2660
+ const match = matches[matches.length - 1];
2661
+ if (!match) return null;
2662
+ return {
2663
+ executable: match[1],
2664
+ code: match[3].replace(/\\"/g, '"').replace(/\\'/g, "'")
2665
+ };
2666
+ }
2667
+
2668
+ function inlinePythonVerifyFailureNowPasses(lessonLine, cwd, timeout = 10000) {
2669
+ const parsed = extractInlinePythonVerifyFailure(lessonLine);
2670
+ if (!parsed) return false;
2671
+ const result = spawnSync(parsed.executable, ['-c', parsed.code], {
2672
+ cwd,
2673
+ encoding: 'utf8',
2674
+ timeout,
2675
+ stdio: ['ignore', 'ignore', 'ignore']
2676
+ });
2677
+ return result.status === 0;
2678
+ }
2679
+
2680
+ function legacyLessonFileRefs(lessonLine) {
2681
+ const fileRefs = [];
2682
+ const filePattern = /`([a-zA-Z0-9_/./-]+\.[a-zA-Z]+(?::\d+(?:-\d+)?)?)`/g;
2683
+ let m;
2684
+ while ((m = filePattern.exec(lessonLine)) !== null) {
2685
+ const ref = m[1].replace(/:\d+(-\d+)?$/, '');
2686
+ if (ref.includes('/') || ref.endsWith('.js') || ref.endsWith('.md') || ref.endsWith('.ts')) {
2687
+ fileRefs.push(ref);
2688
+ }
2689
+ }
2690
+ return fileRefs;
2691
+ }
2692
+
2173
2693
  /**
2174
2694
  * The pre-v3.8 resolver — kept as an internal fallback for prose-only lessons
2175
2695
  * that don't have detector metadata yet. Never auto-promotes a prose lesson to
@@ -2182,16 +2702,7 @@ function isLessonResolvedLegacy(lessonLine, cwd) {
2182
2702
  if (!slugMatch) return false;
2183
2703
  const slug = slugMatch[1];
2184
2704
 
2185
- // Extract file paths: patterns like `commands/autopilot.js:116` or `commands/run.js:157`
2186
- const fileRefs = [];
2187
- const filePattern = /`([a-zA-Z0-9_/./-]+\.[a-zA-Z]+(?::\d+(?:-\d+)?)?)`/g;
2188
- let m;
2189
- while ((m = filePattern.exec(lessonLine)) !== null) {
2190
- const ref = m[1].replace(/:\d+(-\d+)?$/, ''); // strip line numbers
2191
- if (ref.includes('/') || ref.endsWith('.js') || ref.endsWith('.md') || ref.endsWith('.ts')) {
2192
- fileRefs.push(ref);
2193
- }
2194
- }
2705
+ const fileRefs = legacyLessonFileRefs(lessonLine);
2195
2706
 
2196
2707
  if (fileRefs.length === 0) return false;
2197
2708
 
@@ -2274,6 +2785,9 @@ function pickUnresolvedFailLesson(cwd) {
2274
2785
  const candidates = [];
2275
2786
  for (const lesson of lessons) {
2276
2787
  if (lesson.verdict !== 'fail') continue;
2788
+ if (lesson.id === 'verify-not-falsifiable') continue;
2789
+ if (lesson.id === 'no-verify-field') continue;
2790
+ if (lesson.id === 'verify-failed' && lesson.legacy) continue;
2277
2791
  if (lesson.resolvedTag) continue;
2278
2792
  // Typed lesson with explicit status wins — respect the sidecar.
2279
2793
  // `resolved` = done. `observed` = process rule, not a fixable code state.
@@ -2284,6 +2798,7 @@ function pickUnresolvedFailLesson(cwd) {
2284
2798
  if (s === 'resolved' || s === 'observed') continue;
2285
2799
  if (s === 'attempted' && (lesson.meta.attempts || 0) >= MAX_ATTEMPTS) continue;
2286
2800
  }
2801
+ if (lesson.legacy && legacyLessonFileRefs(lesson.line).length === 0) continue;
2287
2802
  // Detector-backed or legacy grep check.
2288
2803
  if (isLessonResolved(lesson.line, cwd, { meta: lesson.meta })) continue;
2289
2804
 
@@ -2370,7 +2885,7 @@ Reply with the JSON array and nothing else.`;
2370
2885
  const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')"`;
2371
2886
  const env = { ...process.env };
2372
2887
  delete env.CLAUDECODE;
2373
- output = execSync(cmd, {
2888
+ output = execPhaseCommandSync(cmd, {
2374
2889
  cwd,
2375
2890
  encoding: 'utf8',
2376
2891
  timeout: PHASE_TIMEOUT,
@@ -2378,6 +2893,11 @@ Reply with the JSON array and nothing else.`;
2378
2893
  maxBuffer: 10 * 1024 * 1024,
2379
2894
  env
2380
2895
  }).toString();
2896
+ } catch (err) {
2897
+ if (isPhaseTimeoutError(err)) {
2898
+ throw new Error(`horizon-proposal phase timed out after ${PHASE_TIMEOUT / 1000}s`);
2899
+ }
2900
+ throw err;
2381
2901
  } finally {
2382
2902
  try { fs.unlinkSync(tmpFile); } catch {}
2383
2903
  }
@@ -2658,12 +3178,24 @@ async function autopilotAtris(description, options = {}) {
2658
3178
  const context = {
2659
3179
  task: suggestion.task,
2660
3180
  kind: suggestion.kind,
3181
+ ...(suggestion.files ? { files: suggestion.files } : {}),
2661
3182
  ...(suggestion.lessonLine ? { lessonLine: suggestion.lessonLine } : {}),
2662
3183
  ...(suggestion.lessonSlug ? { lessonSlug: suggestion.lessonSlug } : {}),
2663
3184
  ...(suggestion.lessonDate ? { lessonDate: suggestion.lessonDate } : {})
2664
3185
  };
2665
3186
  const startingEndgame = readEndgameState(cwd);
2666
3187
 
3188
+ // T33a: snapshot pre-tick HEADs (cwd + sibling repos named in the task)
3189
+ // so a do-phase timeout can be reconciled against what actually landed.
3190
+ let preTickHeads = null;
3191
+ try {
3192
+ const verifyHint = getVerifyCommand(cwd, suggestion.task).cmd || '';
3193
+ preTickHeads = snapshotRepoHeads(
3194
+ cwd,
3195
+ [suggestion.task, ...(suggestion.files || []), verifyHint].join(' ')
3196
+ );
3197
+ } catch { /* snapshot failure must not block the tick */ }
3198
+
2667
3199
  try {
2668
3200
  if (verbose) {
2669
3201
  console.log('');
@@ -2697,6 +3229,26 @@ async function autopilotAtris(description, options = {}) {
2697
3229
  break;
2698
3230
  }
2699
3231
 
3232
+ // T33b: the falsifiability gate found a completion receipt — the work
3233
+ // already shipped, the bullet is checked, move straight to the next pick.
3234
+ if (execution.outcome === 'advanced-already-done') {
3235
+ completed++;
3236
+ tickOutcome = 'built';
3237
+ tickOutcomeText = `"${lastTaskTitle}" was already done — verify passed pre-work and today's journal carries its completion receipt, so I checked the bullet and advanced.`;
3238
+ tickNextStep = 'pick the next endgame task';
3239
+ if (verbose) {
3240
+ console.log(' already done (journal receipt found). bullet checked, advancing.');
3241
+ } else {
3242
+ printPlainBlock([
3243
+ 'That task was already done — verify passed before work and a completion receipt exists in today\'s journal.',
3244
+ 'I checked the bullet and advanced.',
3245
+ '',
3246
+ 'Next I will look for the next task.'
3247
+ ].join('\n'));
3248
+ }
3249
+ continue;
3250
+ }
3251
+
2700
3252
  const planTime = execution.phaseResults.plan.elapsedSeconds;
2701
3253
  if (verbose) console.log(` planned (${planTime}s)`);
2702
3254
 
@@ -2758,7 +3310,7 @@ async function autopilotAtris(description, options = {}) {
2758
3310
  // Record commit hash + verify command for retroactive regression checks
2759
3311
  try {
2760
3312
  const commitHash = execSync('git rev-parse HEAD', { cwd, encoding: 'utf8' }).trim();
2761
- const taskSlug = (suggestion.task || 'unknown').replace(/\s+/g, '-').toLowerCase().slice(0, 40);
3313
+ const taskSlug = lessonSlug(suggestion.task);
2762
3314
  recordTickCommit(cwd, commitHash, execution.verifyCmd || '', taskSlug);
2763
3315
 
2764
3316
  // Every 10th tick, run retroactive regression check
@@ -2805,6 +3357,36 @@ async function autopilotAtris(description, options = {}) {
2805
3357
  }
2806
3358
 
2807
3359
  } catch (err) {
3360
+ // T33a: a do-phase timeout with commits landed is a dead reporter, not
3361
+ // dead work — write the reconciliation receipt, mark the bullet, and
3362
+ // record work-landed-receipt-died instead of halting for a human.
3363
+ let reconciliation = null;
3364
+ if (isDoPhaseTimeoutMessage(err.message)) {
3365
+ try {
3366
+ reconciliation = reconcileTimedOutTick(cwd, preTickHeads, lastTaskTitle || suggestion.task);
3367
+ } catch { reconciliation = null; }
3368
+ }
3369
+ if (reconciliation && reconciliation.reconciled) {
3370
+ completed++;
3371
+ const landed = reconciliation.advanced
3372
+ .map((r) => `${r.label} ${String(r.before).slice(0, 7)} → ${String(r.after).slice(0, 7)}`)
3373
+ .join(', ');
3374
+ tickOutcome = 'work-landed-receipt-died';
3375
+ tickOutcomeText = `"${lastTaskTitle}" hit the do-phase wall but commits landed (${landed}). I wrote the reconciliation receipt and marked the bullet — work-landed-receipt-died, no human halt.`;
3376
+ tickNextStep = 'pick the next task';
3377
+ if (verbose) {
3378
+ console.log(` do phase timed out, but work landed (${landed}). reconciled — no human halt.`);
3379
+ } else {
3380
+ printPlainBlock([
3381
+ 'The do phase timed out, but commits landed before the wall.',
3382
+ `Landed: ${landed}.`,
3383
+ 'I wrote the reconciliation receipt and marked the task bullet.',
3384
+ '',
3385
+ 'Next tick will pick the next task.'
3386
+ ].join('\n'));
3387
+ }
3388
+ break;
3389
+ }
2808
3390
  tickOutcome = 'halted';
2809
3391
  tickOutcomeText = `I hit an error while running "${lastTaskTitle || 'a task'}": ${err.message}`;
2810
3392
  tickNextStep = 'stop until a human looks at the error';
@@ -2988,7 +3570,7 @@ Search the codebase to verify. Reply: YES <reason> or NO <reason>`;
2988
3570
  const env = { ...process.env };
2989
3571
  delete env.CLAUDECODE;
2990
3572
  const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Glob,Grep"`;
2991
- const output = execSync(cmd, {
3573
+ const output = execPhaseCommandSync(cmd, {
2992
3574
  cwd,
2993
3575
  encoding: 'utf8',
2994
3576
  timeout: 60000,
@@ -3021,6 +3603,13 @@ async function autopilotFromTodo(options = {}) {
3021
3603
 
3022
3604
  module.exports = {
3023
3605
  appendTickSummary,
3606
+ snapshotRepoHeads,
3607
+ diffAdvancedRepoHeads,
3608
+ reconcileTimedOutTick,
3609
+ markTodoBulletDone,
3610
+ findCompletionReceipt,
3611
+ isDoPhaseTimeoutMessage,
3612
+ validateVerifyCommandShape,
3024
3613
  askHuman,
3025
3614
  askModel,
3026
3615
  autopilotAtris,
@@ -3052,11 +3641,19 @@ module.exports = {
3052
3641
  proposeCandidateHorizons,
3053
3642
  recordTickCommit,
3054
3643
  regressionCheck,
3644
+ repoMapAuditReportsClean,
3645
+ isCleanMapBrokenRefFailLesson,
3646
+ inlinePythonVerifyFailureNowPasses,
3055
3647
  runPlanReview,
3056
3648
  runTaskOnce,
3057
3649
  buildPlanReviewPrompt,
3058
3650
  parseVerdict,
3059
3651
  scoreEndgameCandidates,
3060
3652
  suggestNextTask,
3061
- writeLesson
3653
+ shouldSkipAutoHumanGate,
3654
+ writeLesson,
3655
+ isPhaseTimeoutError,
3656
+ execPhaseCommandSync,
3657
+ executePhaseDetailed,
3658
+ lessonSlug
3062
3659
  };