atris 3.15.57 → 3.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +2 -2
- package/GETTING_STARTED.md +1 -1
- package/PERSONA.md +4 -4
- package/README.md +12 -11
- package/atris/skills/copy-editor/SKILL.md +30 -4
- package/atris/skills/improve/SKILL.md +18 -20
- package/atris/wiki/concepts/agent-activation-contract.md +5 -3
- package/atris/wiki/concepts/workspace-initialization-contract.md +4 -4
- package/atris/wiki/index.md +1 -0
- package/ax +522 -73
- package/bin/atris.js +78 -44
- package/commands/align.js +0 -14
- package/commands/apps.js +102 -1
- package/commands/autopilot.js +628 -31
- package/commands/brain.js +219 -34
- package/commands/brainstorm.js +0 -829
- package/commands/compile.js +569 -0
- package/commands/computer.js +0 -60
- package/commands/improve.js +501 -0
- package/commands/integrations.js +233 -71
- package/commands/lesson.js +44 -0
- package/commands/member.js +4498 -226
- package/commands/mission.js +302 -27
- package/commands/now.js +89 -1
- package/commands/probe.js +366 -0
- package/commands/radar.js +181 -56
- package/commands/recap.js +203 -0
- package/commands/skill.js +6 -2
- package/commands/soul.js +0 -4
- package/commands/task.js +5587 -499
- package/commands/terminal.js +14 -10
- package/commands/wiki.js +87 -1
- package/commands/workflow.js +288 -73
- package/commands/worktree.js +52 -15
- package/commands/xp.js +6 -65
- package/lib/auto-accept-certified.js +294 -0
- package/lib/file-ops.js +0 -184
- package/lib/member-alive.js +232 -0
- package/lib/policy-lessons.js +280 -0
- package/lib/receipt-evidence.js +64 -0
- package/lib/state-detection.js +75 -1
- package/lib/task-db.js +568 -16
- package/lib/task-proof.js +43 -0
- package/package.json +1 -1
- package/utils/auth.js +13 -4
- package/commands/research.js +0 -52
- package/lib/section-merge.js +0 -196
package/commands/autopilot.js
CHANGED
|
@@ -25,6 +25,46 @@ const pkg = require('../package.json');
|
|
|
25
25
|
|
|
26
26
|
const PHASE_TIMEOUT = 600000; // 10 min per phase
|
|
27
27
|
|
|
28
|
+
function looksOwnerClaimed(claimed) {
|
|
29
|
+
const text = String(claimed || '').toLowerCase();
|
|
30
|
+
return /\bkeshav(?:rao)?\b/.test(text) || /\b(owner|human|operator)\b/.test(text);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function looksOwnerGatedTitle(title) {
|
|
34
|
+
const text = String(title || '').toLowerCase();
|
|
35
|
+
return (
|
|
36
|
+
/\bowner[- ](?:approval|input|gate|gated)\b/.test(text) ||
|
|
37
|
+
/\bhuman[- ](?:approval|input|gate|gated)\b/.test(text) ||
|
|
38
|
+
/\bmanual send\b/.test(text) ||
|
|
39
|
+
/\broute confirmation\b/.test(text) ||
|
|
40
|
+
/\bconfirm pallet destination\b/.test(text) ||
|
|
41
|
+
/\bconfirm .+ destination before .+ approval\b/.test(text) ||
|
|
42
|
+
/\bapprove and manually send\b/.test(text)
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function shouldSkipAutoHumanGate(task) {
|
|
47
|
+
if (!task) return false;
|
|
48
|
+
return looksOwnerClaimed(task.claimed) || looksOwnerGatedTitle(task.title || task.task);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function repoMapAuditReportsClean(cwd) {
|
|
52
|
+
const auditPath = path.join(cwd, 'scripts', 'audit_map_refs.py');
|
|
53
|
+
if (!fs.existsSync(auditPath)) return false;
|
|
54
|
+
|
|
55
|
+
const result = spawnSync('python3', [auditPath], {
|
|
56
|
+
cwd,
|
|
57
|
+
encoding: 'utf8',
|
|
58
|
+
timeout: 120000,
|
|
59
|
+
maxBuffer: 1024 * 1024
|
|
60
|
+
});
|
|
61
|
+
if (result.status !== 0) return false;
|
|
62
|
+
|
|
63
|
+
const output = `${result.stdout || ''}\n${result.stderr || ''}`;
|
|
64
|
+
const match = output.match(/Total broken references:\s*(\d+)/i);
|
|
65
|
+
return Boolean(match && Number(match[1]) === 0);
|
|
66
|
+
}
|
|
67
|
+
|
|
28
68
|
/**
|
|
29
69
|
* Scan workspace for the next thing worth doing.
|
|
30
70
|
* Returns { task, why, kind } or null.
|
|
@@ -54,7 +94,7 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
|
|
|
54
94
|
// --- Resume interrupted work ---
|
|
55
95
|
if (todo.inProgress.length > 0) {
|
|
56
96
|
const t = todo.inProgress[0];
|
|
57
|
-
if (!(t.tags && t.tags.includes('unverified')) && !skipped.has(t.title)) {
|
|
97
|
+
if (!(t.tags && t.tags.includes('unverified')) && !skipped.has(t.title) && !(auto && shouldSkipAutoHumanGate(t))) {
|
|
58
98
|
suggestions.push({
|
|
59
99
|
task: t.title,
|
|
60
100
|
why: `This was already started${t.claimed ? ` by ${t.claimed}` : ''} but never finished.`,
|
|
@@ -75,6 +115,7 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
|
|
|
75
115
|
why: `"${sp.staleSource}" changed on ${sp.sourceDate} but the page was last compiled ${sp.compiledDate}. The content may be wrong.`,
|
|
76
116
|
kind: 'staleness',
|
|
77
117
|
priority: 2,
|
|
118
|
+
files: [pageName, sp.staleSource],
|
|
78
119
|
skipKey: key
|
|
79
120
|
});
|
|
80
121
|
break;
|
|
@@ -95,7 +136,9 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
|
|
|
95
136
|
}
|
|
96
137
|
|
|
97
138
|
// --- Broken MAP.md references ---
|
|
98
|
-
const { unhealable } =
|
|
139
|
+
const { unhealable } = repoMapAuditReportsClean(cwd)
|
|
140
|
+
? { unhealable: [] }
|
|
141
|
+
: healBrokenMapRefs(cwd, atrisDir, true); // dry-run
|
|
99
142
|
if (unhealable.length > 0 && !skipped.has('fix-map-refs')) {
|
|
100
143
|
const sample = unhealable.slice(0, 3).map(r => `${r.file}:${r.line}`).join(', ');
|
|
101
144
|
suggestions.push({
|
|
@@ -127,6 +170,7 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
|
|
|
127
170
|
for (const t of todo.backlog) {
|
|
128
171
|
if (t.tags && t.tags.includes('unverified')) continue;
|
|
129
172
|
if (shouldSkipEndgameAtPicker(cwd, t)) continue;
|
|
173
|
+
if (auto && shouldSkipAutoHumanGate(t)) continue;
|
|
130
174
|
if (skipped.has(t.title)) continue;
|
|
131
175
|
const remaining = todo.backlog.filter(b => !(b.tags && b.tags.includes('unverified'))).length;
|
|
132
176
|
suggestions.push({
|
|
@@ -348,6 +392,41 @@ function askHuman(taskTitle) {
|
|
|
348
392
|
});
|
|
349
393
|
}
|
|
350
394
|
|
|
395
|
+
/**
|
|
396
|
+
* Type-check a child_process error as a timeout/kill. Node's execSync attaches
|
|
397
|
+
* `code: 'ETIMEDOUT'` and `signal` on timeout — it does NOT set `killed`, so a
|
|
398
|
+
* `killed`-only guard is dead code on the exact error it was written for
|
|
399
|
+
* (lesson: etimedout-error-shape, 2026-06-10).
|
|
400
|
+
*/
|
|
401
|
+
function isPhaseTimeoutError(err) {
|
|
402
|
+
return Boolean(err && (err.killed || err.code === 'ETIMEDOUT' || err.signal));
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
/**
|
|
406
|
+
* execSync with the phase-timeout orphan fix. Node's sync-exec timeout signals
|
|
407
|
+
* only the direct child pid — the `/bin/sh -c` wrapper — so the `claude` it
|
|
408
|
+
* spawned kept committing 160–296s past the 600s wall (lesson:
|
|
409
|
+
* etimedout-error-shape, 2026-06-10). `detached: true` makes the wrapper a
|
|
410
|
+
* process-group leader; on timeout we sweep the whole group via
|
|
411
|
+
* `process.kill(-pid, 'SIGKILL')`. ESRCH on the sweep means the group already
|
|
412
|
+
* died — fine. The original error is rethrown untouched so every call site
|
|
413
|
+
* keeps its existing catch contract (err.stdout passthrough included).
|
|
414
|
+
*/
|
|
415
|
+
function execPhaseCommandSync(cmd, opts = {}) {
|
|
416
|
+
try {
|
|
417
|
+
return execSync(cmd, { ...opts, detached: true });
|
|
418
|
+
} catch (err) {
|
|
419
|
+
if (isPhaseTimeoutError(err) && err.pid) {
|
|
420
|
+
try {
|
|
421
|
+
process.kill(-err.pid, 'SIGKILL');
|
|
422
|
+
} catch (sweepErr) {
|
|
423
|
+
if (sweepErr.code !== 'ESRCH') throw sweepErr;
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
throw err;
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
351
430
|
/**
|
|
352
431
|
* Run a phase via claude -p subprocess.
|
|
353
432
|
*/
|
|
@@ -359,10 +438,11 @@ function executePhaseDetailed(phase, context, options = {}) {
|
|
|
359
438
|
fs.writeFileSync(tmpFile, prompt);
|
|
360
439
|
|
|
361
440
|
try {
|
|
362
|
-
const cmd =
|
|
441
|
+
const cmd = options.cmdOverride
|
|
442
|
+
|| `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Write,Edit,Glob,Grep"`;
|
|
363
443
|
const env = { ...process.env };
|
|
364
444
|
delete env.CLAUDECODE;
|
|
365
|
-
const output =
|
|
445
|
+
const output = execPhaseCommandSync(cmd, {
|
|
366
446
|
cwd: process.cwd(),
|
|
367
447
|
encoding: 'utf8',
|
|
368
448
|
timeout,
|
|
@@ -375,7 +455,9 @@ function executePhaseDetailed(phase, context, options = {}) {
|
|
|
375
455
|
return { prompt, output: output || '' };
|
|
376
456
|
} catch (err) {
|
|
377
457
|
try { fs.unlinkSync(tmpFile); } catch {}
|
|
378
|
-
if (err
|
|
458
|
+
if (isPhaseTimeoutError(err)) {
|
|
459
|
+
throw new Error(`${phase} phase timed out after ${timeout / 1000}s (claude -p hit the wall; any work it committed survives — reconcile from pre-tick HEADs)`);
|
|
460
|
+
}
|
|
379
461
|
if (err.stdout) {
|
|
380
462
|
return { prompt, output: err.stdout };
|
|
381
463
|
}
|
|
@@ -383,10 +465,6 @@ function executePhaseDetailed(phase, context, options = {}) {
|
|
|
383
465
|
}
|
|
384
466
|
}
|
|
385
467
|
|
|
386
|
-
function executePhase(phase, context, options = {}) {
|
|
387
|
-
return executePhaseDetailed(phase, context, options).output;
|
|
388
|
-
}
|
|
389
|
-
|
|
390
468
|
/**
|
|
391
469
|
* Build context-aware file list for prompts.
|
|
392
470
|
*/
|
|
@@ -412,6 +490,16 @@ function getContextFiles(phase, options = {}) {
|
|
|
412
490
|
return [...new Set(files.filter(Boolean))].map((f) => `- ${f}`).join('\n');
|
|
413
491
|
}
|
|
414
492
|
|
|
493
|
+
// T35a (endgame loop-self-repair): shared-checkout git-safety contract.
|
|
494
|
+
// Lesson 39: a concurrent tick's `git reset` destroyed a sibling repo's
|
|
495
|
+
// uncommitted work. Sibling-repo edits ride per-tick worktrees (the same
|
|
496
|
+
// ../repo siblings snapshotRepoHeads tracks); destructive git on the shared
|
|
497
|
+
// checkout is forbidden (COORDINATION.md Rule 4). Interpolated into the
|
|
498
|
+
// default and self-heal do prompts — never the benchmark prompt (it never
|
|
499
|
+
// commits).
|
|
500
|
+
const SHARED_CHECKOUT_GIT_CONTRACT = `- Shared-checkout git safety (COORDINATION.md Rule 4): edits to any repo OTHER than this tick's cwd (../atrisos-backend-style sibling repos) go through a per-tick worktree — start with \`atris worktree start --member <member> --task "<task>"\`, land with \`atris worktree ship --message "<msg>" --verify "<cmd>"\`. Never edit a sibling repo's shared checkout directly.
|
|
501
|
+
- On a shared checkout, \`git reset\`, \`git checkout --\`, \`git clean\`, and stashing other agents' work are FORBIDDEN — concurrent ticks' uncommitted work lives there.`;
|
|
502
|
+
|
|
415
503
|
/**
|
|
416
504
|
* Build the right prompt for each phase, adapting to the kind of work.
|
|
417
505
|
*/
|
|
@@ -422,7 +510,13 @@ function buildPrompt(phase, context, options = {}) {
|
|
|
422
510
|
contextNote = '',
|
|
423
511
|
runnerName = '',
|
|
424
512
|
} = options;
|
|
425
|
-
const readFiles = getContextFiles(phase,
|
|
513
|
+
const readFiles = getContextFiles(phase, {
|
|
514
|
+
...options,
|
|
515
|
+
extraReadFiles: [
|
|
516
|
+
...(options.extraReadFiles || []),
|
|
517
|
+
...(Array.isArray(context.files) ? context.files : []),
|
|
518
|
+
],
|
|
519
|
+
});
|
|
426
520
|
const benchmarkProtocol = benchmarkStrategy === 'stack'
|
|
427
521
|
? 'coordinated stack run'
|
|
428
522
|
: (benchmarkStrategy === 'single' ? 'pinned single-model baseline run' : '');
|
|
@@ -478,12 +572,24 @@ When done, reply: done.`;
|
|
|
478
572
|
}
|
|
479
573
|
|
|
480
574
|
if (kind === 'staleness' || kind === 'docs' || kind === 'review') {
|
|
575
|
+
const fileList = Array.isArray(context.files) && context.files.length
|
|
576
|
+
? context.files.map((file) => `- ${file}`).join('\n')
|
|
577
|
+
: '- target page or MAP entry from the task title\n- source file(s) that changed';
|
|
481
578
|
return `${baseRules}
|
|
482
579
|
|
|
483
580
|
Maintenance task: ${task}
|
|
484
581
|
|
|
485
|
-
|
|
582
|
+
Relevant files:
|
|
583
|
+
${fileList}
|
|
584
|
+
|
|
585
|
+
Figure out what needs to change and why. Create exactly one focused task in atris/TODO.md unless the drift truly requires separate commits.
|
|
486
586
|
For stale pages, read both the page and its sources to understand the drift.
|
|
587
|
+
The task row must include these fields so plan-review can prove it is executable:
|
|
588
|
+
- **Files:** concrete target page plus source file paths
|
|
589
|
+
- **Exit:** the observable post-update state
|
|
590
|
+
- **Verify:** one raw shell command that checks concrete facts and rejects stale phrases; use shell operators like \`&&\`, \`grep -q\`, or \`test\`, not Markdown backticks or English like "returns 1" / "shows today's date"
|
|
591
|
+
- **Rollback:** git checkout -- <changed-files> before commit, or git revert HEAD --no-edit after commit
|
|
592
|
+
Do not write tasks without Verify and Rollback. Do not use \`true\`, \`echo ok\`, or vague "review manually" verification.
|
|
487
593
|
|
|
488
594
|
When done, reply: done.`;
|
|
489
595
|
}
|
|
@@ -590,6 +696,7 @@ Rules:
|
|
|
590
696
|
- Execute ONE step at a time. Verify each step before moving on.
|
|
591
697
|
- Check MAP.md for file locations before grepping.
|
|
592
698
|
- Stay in scope. Only fix the bug described in the lesson — no side quests.
|
|
699
|
+
${SHARED_CHECKOUT_GIT_CONTRACT}
|
|
593
700
|
|
|
594
701
|
Read these files first:
|
|
595
702
|
${readFiles}
|
|
@@ -614,6 +721,7 @@ Rules:
|
|
|
614
721
|
- Check MAP.md for file locations before grepping.
|
|
615
722
|
- If you hit two errors on the same step, stop and flag for re-scope.
|
|
616
723
|
- Stay in scope. Don't touch files outside the task boundary.
|
|
724
|
+
${SHARED_CHECKOUT_GIT_CONTRACT}
|
|
617
725
|
|
|
618
726
|
Read these files first:
|
|
619
727
|
${readFiles}
|
|
@@ -684,6 +792,27 @@ If broken beyond quick fix, reply: failed — [reason].`;
|
|
|
684
792
|
return '';
|
|
685
793
|
}
|
|
686
794
|
|
|
795
|
+
/**
|
|
796
|
+
* Build a clean kebab-case lesson slug from free text. Strips non-alphanumerics
|
|
797
|
+
* (em-dashes were leaking into slugs verbatim) and truncates at a word boundary
|
|
798
|
+
* instead of mid-word (e.g. the old `.slice(0, 40)` produced
|
|
799
|
+
* `verify-fail-per-member-model-selection-—-the-member-`).
|
|
800
|
+
*/
|
|
801
|
+
function lessonSlug(text, maxLen = 40) {
|
|
802
|
+
const base = String(text || 'unknown')
|
|
803
|
+
.toLowerCase()
|
|
804
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
805
|
+
.replace(/^-+|-+$/g, '');
|
|
806
|
+
if (!base) return 'unknown';
|
|
807
|
+
if (base.length <= maxLen) return base;
|
|
808
|
+
const cut = base.slice(0, maxLen);
|
|
809
|
+
const lastDash = cut.lastIndexOf('-');
|
|
810
|
+
// base[maxLen] continues a word — back up to the last full word.
|
|
811
|
+
const atBoundary = base[maxLen] === '-';
|
|
812
|
+
const trimmed = atBoundary ? cut : (lastDash > 0 ? cut.slice(0, lastDash) : cut);
|
|
813
|
+
return trimmed.replace(/-+$/g, '') || 'unknown';
|
|
814
|
+
}
|
|
815
|
+
|
|
687
816
|
/**
|
|
688
817
|
* Write a lesson to atris/lessons.md
|
|
689
818
|
* Appends a line in format: - **[YYYY-MM-DD] slug** — pass/fail — explanation
|
|
@@ -802,6 +931,90 @@ function getVerifyCommand(cwd, taskTitle) {
|
|
|
802
931
|
return { cmd: detectDefaultVerify(cwd), explicit: false };
|
|
803
932
|
}
|
|
804
933
|
|
|
934
|
+
function collectExplicitVerifyTasks(cwd) {
|
|
935
|
+
const todoPath = path.join(cwd, 'atris', 'TODO.md');
|
|
936
|
+
if (!fs.existsSync(todoPath)) return [];
|
|
937
|
+
const todo = parseTodo(todoPath);
|
|
938
|
+
return [...todo.inProgress, ...(todo.review || []), ...todo.backlog, ...todo.completed]
|
|
939
|
+
.filter((task) => task && task.verify)
|
|
940
|
+
.map((task) => ({
|
|
941
|
+
title: task.title,
|
|
942
|
+
verify: task.verify,
|
|
943
|
+
key: `${task.title}\0${task.verify}`,
|
|
944
|
+
}));
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
function findNewExplicitVerifyCommand(cwd, beforeKeys) {
|
|
948
|
+
const prior = beforeKeys instanceof Set ? beforeKeys : new Set(beforeKeys || []);
|
|
949
|
+
const added = collectExplicitVerifyTasks(cwd).filter((task) => !prior.has(task.key));
|
|
950
|
+
if (added.length !== 1) return null;
|
|
951
|
+
return { cmd: added[0].verify, explicit: true, task: added[0].title };
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
function shouldAdoptPlannedVerify(kind) {
|
|
955
|
+
return ['staleness', 'docs', 'review', 'inbox', 'cleanup', 'feature', 'lessons', 'imagined'].includes(kind);
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
// Task-plane status vocabulary lint. `atris task list/queue/current --status <s>`
|
|
959
|
+
// only matches raw stored statuses (commands/task.js); `ready` is a TRANSITION
|
|
960
|
+
// (`atris task ready` moves a task to review), so `--status ready` always
|
|
961
|
+
// returns "(no tasks)" — a verify built on it is an unreachable gate; the
|
|
962
|
+
// matching listable form is --status review (lessons.md
|
|
963
|
+
// verify-status-vocabulary, 3rd occurrence 2026-06-10).
|
|
964
|
+
const LISTABLE_TASK_STATUSES = ['open', 'claimed', 'review', 'done', 'failed'];
|
|
965
|
+
const STATUS_CORRECTIONS = { ready: 'review' };
|
|
966
|
+
|
|
967
|
+
function lintVerifyTaskStatusVocabulary(text) {
|
|
968
|
+
// Scan every `atris task list|queue|current` segment (compound verifies
|
|
969
|
+
// chain with && / || / ;), then pull its --status value if present.
|
|
970
|
+
const segmentRe = /\batris\s+task\s+(?:list|queue|current)\b([^|&;]*)/g;
|
|
971
|
+
let segment;
|
|
972
|
+
while ((segment = segmentRe.exec(text)) !== null) {
|
|
973
|
+
const statusMatch = /--status[=\s]+["']?([A-Za-z0-9_-]+)["']?/.exec(segment[1]);
|
|
974
|
+
if (!statusMatch) continue;
|
|
975
|
+
const status = statusMatch[1];
|
|
976
|
+
if (LISTABLE_TASK_STATUSES.includes(status)) continue;
|
|
977
|
+
const vocabulary = LISTABLE_TASK_STATUSES.join('|');
|
|
978
|
+
const corrected = STATUS_CORRECTIONS[status];
|
|
979
|
+
const suggestion = corrected
|
|
980
|
+
? `use --status ${corrected} instead (atris task ${status} is a transition that lands tasks in ${corrected}, so --status ${status} never matches)`
|
|
981
|
+
: `use one of --status ${vocabulary}`;
|
|
982
|
+
return {
|
|
983
|
+
ok: false,
|
|
984
|
+
reason: `Verify uses unlistable task status "--status ${status}" — the listable vocabulary is ${vocabulary}; ${suggestion}`,
|
|
985
|
+
};
|
|
986
|
+
}
|
|
987
|
+
return null;
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
function validateVerifyCommandShape(cmd) {
|
|
991
|
+
const text = String(cmd || '').trim();
|
|
992
|
+
if (!text) return { ok: true };
|
|
993
|
+
if (text.includes('`')) {
|
|
994
|
+
return { ok: false, reason: 'Verify contains markdown backticks instead of a raw shell command' };
|
|
995
|
+
}
|
|
996
|
+
if (/\b(returns?|shows?|equals?|should|must)\b/i.test(text)) {
|
|
997
|
+
return { ok: false, reason: 'Verify contains prose expectations instead of shell operators/assertions' };
|
|
998
|
+
}
|
|
999
|
+
const statusLint = lintVerifyTaskStatusVocabulary(text);
|
|
1000
|
+
if (statusLint) return statusLint;
|
|
1001
|
+
return { ok: true };
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
function haltInvalidVerify(cwd, context, verifyCmd, reason, startedAt, phaseResults = {}) {
|
|
1005
|
+
writeLesson(cwd, 'verify-not-runnable', 'fail',
|
|
1006
|
+
`Verify \`${verifyCmd}\` for "${context.task}" is not a runnable shell command: ${reason}. Tick halted.`);
|
|
1007
|
+
return {
|
|
1008
|
+
outcome: 'halted',
|
|
1009
|
+
reason: 'verify-not-runnable',
|
|
1010
|
+
phaseResults,
|
|
1011
|
+
elapsedSeconds: Math.round((Date.now() - startedAt) / 1000),
|
|
1012
|
+
verifyRan: false,
|
|
1013
|
+
verifyPass: false,
|
|
1014
|
+
verifyCmd,
|
|
1015
|
+
};
|
|
1016
|
+
}
|
|
1017
|
+
|
|
805
1018
|
/**
|
|
806
1019
|
* Infer a default verify command from the repo shape. Order matters:
|
|
807
1020
|
* package.json with a non-stub test script → `npm test`; then pytest/python;
|
|
@@ -878,7 +1091,7 @@ Read from disk:
|
|
|
878
1091
|
- atris/lessons.md (recent failures — last 20 lines)
|
|
879
1092
|
|
|
880
1093
|
Decide if the plan is safe to execute. Check:
|
|
881
|
-
1. Verify points at a falsifiable
|
|
1094
|
+
1. Verify points at a falsifiable raw shell command or rubric (not \`true\`, \`echo ok\`, Markdown backticks, or English like "returns 1" / "shows today's date").
|
|
882
1095
|
Prefer \`atris verify <slug> --section <name>\`.
|
|
883
1096
|
2. Files are explicitly declared (not empty, not vague).
|
|
884
1097
|
3. Rollback is named (commit, checkpoint, or \`git revert\`).
|
|
@@ -975,7 +1188,7 @@ function defaultPlanReviewExecutor(prompt, { cwd, timeout = 180000 } = {}) {
|
|
|
975
1188
|
const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Grep,Glob"`;
|
|
976
1189
|
const env = { ...process.env };
|
|
977
1190
|
delete env.CLAUDECODE;
|
|
978
|
-
const output =
|
|
1191
|
+
const output = execPhaseCommandSync(cmd, {
|
|
979
1192
|
cwd,
|
|
980
1193
|
encoding: 'utf8',
|
|
981
1194
|
timeout,
|
|
@@ -1004,7 +1217,18 @@ function defaultCodexExecutor(prompt, { cwd, timeout = 180000 } = {}) {
|
|
|
1004
1217
|
timeout,
|
|
1005
1218
|
stdio: 'pipe',
|
|
1006
1219
|
maxBuffer: 10 * 1024 * 1024,
|
|
1220
|
+
detached: true,
|
|
1007
1221
|
});
|
|
1222
|
+
// No sh wrapper here, but codex spawns its own children — sweep the group
|
|
1223
|
+
// on timeout so they cannot outlive the wall (same orphan class as the
|
|
1224
|
+
// claude sites; ESRCH means the tree is already dead).
|
|
1225
|
+
if (proc.pid && ((proc.error && proc.error.code === 'ETIMEDOUT') || proc.signal)) {
|
|
1226
|
+
try {
|
|
1227
|
+
process.kill(-proc.pid, 'SIGKILL');
|
|
1228
|
+
} catch (sweepErr) {
|
|
1229
|
+
if (sweepErr.code !== 'ESRCH') throw sweepErr;
|
|
1230
|
+
}
|
|
1231
|
+
}
|
|
1008
1232
|
if (proc.status !== 0 && !proc.stdout) {
|
|
1009
1233
|
throw new Error(`codex exited with status ${proc.status}: ${proc.stderr || 'no output'}`);
|
|
1010
1234
|
}
|
|
@@ -1150,6 +1374,216 @@ function appendPlanRejection(cwd, context, review) {
|
|
|
1150
1374
|
}
|
|
1151
1375
|
}
|
|
1152
1376
|
|
|
1377
|
+
// ── Timeout reconciliation (T33, endgame loop-self-repair) ─────────────────
|
|
1378
|
+
// A do-phase wall-clock timeout kills the reporter, not the work: 12 of 13
|
|
1379
|
+
// ETIMEDOUT halts in the 2026-06-10 RSI audit had real commits landed with no
|
|
1380
|
+
// receipt, no checked bullet, and a human halt (lessons: executor-timeout-wall,
|
|
1381
|
+
// tick-must-mark-own-bullet). These helpers let the tick reconcile from
|
|
1382
|
+
// pre-tick HEADs instead of halting when work provably landed.
|
|
1383
|
+
|
|
1384
|
+
function todayJournalPath(cwd) {
|
|
1385
|
+
const now = new Date();
|
|
1386
|
+
const yyyy = now.getFullYear();
|
|
1387
|
+
const mm = String(now.getMonth() + 1).padStart(2, '0');
|
|
1388
|
+
const dd = String(now.getDate()).padStart(2, '0');
|
|
1389
|
+
return {
|
|
1390
|
+
logFile: path.join(cwd, 'atris', 'logs', String(yyyy), `${yyyy}-${mm}-${dd}.md`),
|
|
1391
|
+
dateFormatted: `${yyyy}-${mm}-${dd}`,
|
|
1392
|
+
};
|
|
1393
|
+
}
|
|
1394
|
+
|
|
1395
|
+
/**
|
|
1396
|
+
* Normalize text for fuzzy task-title matching: lowercase, strip code spans,
|
|
1397
|
+
* tags, and markdown punctuation down to single-spaced words.
|
|
1398
|
+
*/
|
|
1399
|
+
function normalizeForMatch(text) {
|
|
1400
|
+
return String(text || '')
|
|
1401
|
+
.toLowerCase()
|
|
1402
|
+
.replace(/`[^`]*`/g, ' ')
|
|
1403
|
+
.replace(/\[[\w-]+\]/g, ' ')
|
|
1404
|
+
.replace(/[^a-z0-9]+/g, ' ')
|
|
1405
|
+
.trim()
|
|
1406
|
+
.replace(/\s+/g, ' ');
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1409
|
+
/**
|
|
1410
|
+
* A word-boundary-truncated normalized prefix of the task title, used to find
|
|
1411
|
+
* the task's TODO bullet and journal receipts without exact-string fragility.
|
|
1412
|
+
*/
|
|
1413
|
+
function taskMatchNeedle(taskTitle, maxLen = 60) {
|
|
1414
|
+
const norm = normalizeForMatch(taskTitle);
|
|
1415
|
+
if (!norm) return '';
|
|
1416
|
+
if (norm.length <= maxLen) return norm;
|
|
1417
|
+
return norm.slice(0, maxLen).replace(/\s+\S*$/, '');
|
|
1418
|
+
}
|
|
1419
|
+
|
|
1420
|
+
function gitHeadAt(dir) {
|
|
1421
|
+
try {
|
|
1422
|
+
return execSync('git rev-parse HEAD', { cwd: dir, stdio: ['ignore', 'pipe', 'pipe'], encoding: 'utf8' }).trim();
|
|
1423
|
+
} catch {
|
|
1424
|
+
return null;
|
|
1425
|
+
}
|
|
1426
|
+
}
|
|
1427
|
+
|
|
1428
|
+
/**
|
|
1429
|
+
* Snapshot HEAD of the workspace repo plus any sibling repos named in the
|
|
1430
|
+
* task text — both explicit `../atris-cli`-style refs (the journal convention)
|
|
1431
|
+
* and bare sibling-directory names like `atris-cli` that resolve to a git
|
|
1432
|
+
* repo next to cwd. Returns [{ label, dir, head }].
|
|
1433
|
+
*/
|
|
1434
|
+
function snapshotRepoHeads(cwd, taskText = '') {
|
|
1435
|
+
const root = path.resolve(cwd);
|
|
1436
|
+
const repos = new Map([[root, '.']]);
|
|
1437
|
+
const text = String(taskText || '');
|
|
1438
|
+
for (const ref of text.match(/\.\.\/[A-Za-z0-9._-]+/g) || []) {
|
|
1439
|
+
const dir = path.resolve(cwd, ref);
|
|
1440
|
+
if (dir !== root && fs.existsSync(path.join(dir, '.git'))) repos.set(dir, ref);
|
|
1441
|
+
}
|
|
1442
|
+
for (const tok of text.match(/[A-Za-z][A-Za-z0-9._-]{2,}/g) || []) {
|
|
1443
|
+
const dir = path.resolve(cwd, '..', tok);
|
|
1444
|
+
if (dir !== root && !repos.has(dir) && fs.existsSync(path.join(dir, '.git'))) {
|
|
1445
|
+
repos.set(dir, `../${tok}`);
|
|
1446
|
+
}
|
|
1447
|
+
}
|
|
1448
|
+
return [...repos].map(([dir, label]) => ({ label, dir, head: gitHeadAt(dir) }));
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
/**
|
|
1452
|
+
* Re-read HEADs for a prior snapshot; return the repos whose HEAD advanced
|
|
1453
|
+
* as [{ label, dir, before, after }].
|
|
1454
|
+
*/
|
|
1455
|
+
function diffAdvancedRepoHeads(snapshot) {
|
|
1456
|
+
const advanced = [];
|
|
1457
|
+
for (const repo of snapshot || []) {
|
|
1458
|
+
if (!repo || !repo.head) continue;
|
|
1459
|
+
const after = gitHeadAt(repo.dir);
|
|
1460
|
+
if (after && after !== repo.head) {
|
|
1461
|
+
advanced.push({ label: repo.label, dir: repo.dir, before: repo.head, after });
|
|
1462
|
+
}
|
|
1463
|
+
}
|
|
1464
|
+
return advanced;
|
|
1465
|
+
}
|
|
1466
|
+
|
|
1467
|
+
/**
|
|
1468
|
+
* The T31-typed do-phase timeout message thrown by executePhaseDetailed.
|
|
1469
|
+
* Plan/review timeouts stay human halts — only the do phase commits work
|
|
1470
|
+
* worth reconciling.
|
|
1471
|
+
*/
|
|
1472
|
+
function isDoPhaseTimeoutMessage(message) {
|
|
1473
|
+
return /\bdo phase timed out after\b/.test(String(message || ''));
|
|
1474
|
+
}
|
|
1475
|
+
|
|
1476
|
+
/**
|
|
1477
|
+
* Mark the task's TODO bullet `[x]`. Matches the first un-checked,
|
|
1478
|
+
* un-struck bullet whose normalized text contains the normalized title
|
|
1479
|
+
* prefix; `- **T33:** …` becomes `- [x] **T33:** …`, `- [ ]` becomes `- [x]`.
|
|
1480
|
+
* Returns true if a bullet was marked.
|
|
1481
|
+
*/
|
|
1482
|
+
function markTodoBulletDone(cwd, taskTitle) {
|
|
1483
|
+
const needle = taskMatchNeedle(taskTitle);
|
|
1484
|
+
if (!needle) return false;
|
|
1485
|
+
for (const name of ['TODO.md', 'todo.md']) {
|
|
1486
|
+
const todoPath = path.join(cwd, 'atris', name);
|
|
1487
|
+
if (!fs.existsSync(todoPath)) continue;
|
|
1488
|
+
const lines = fs.readFileSync(todoPath, 'utf8').split('\n');
|
|
1489
|
+
for (let i = 0; i < lines.length; i++) {
|
|
1490
|
+
const bullet = lines[i].match(/^(\s*)- (?:\[( |x)\]\s+)?(.*)$/);
|
|
1491
|
+
if (!bullet) continue;
|
|
1492
|
+
if (bullet[2] === 'x') continue;
|
|
1493
|
+
if (bullet[3].startsWith('~~')) continue;
|
|
1494
|
+
if (!normalizeForMatch(lines[i]).includes(needle)) continue;
|
|
1495
|
+
lines[i] = `${bullet[1]}- [x] ${bullet[3]}`;
|
|
1496
|
+
fs.writeFileSync(todoPath, lines.join('\n'));
|
|
1497
|
+
return true;
|
|
1498
|
+
}
|
|
1499
|
+
return false;
|
|
1500
|
+
}
|
|
1501
|
+
return false;
|
|
1502
|
+
}
|
|
1503
|
+
|
|
1504
|
+
/**
|
|
1505
|
+
* Append a block under today's journal `## Notes`, creating the journal file
|
|
1506
|
+
* if the tick dies before any other writer got to it. Never throws.
|
|
1507
|
+
*/
|
|
1508
|
+
function appendUnderNotes(cwd, block) {
|
|
1509
|
+
try {
|
|
1510
|
+
const { logFile, dateFormatted } = todayJournalPath(cwd);
|
|
1511
|
+
if (!fs.existsSync(logFile)) {
|
|
1512
|
+
const dir = path.dirname(logFile);
|
|
1513
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
1514
|
+
createLogFile(logFile, dateFormatted);
|
|
1515
|
+
}
|
|
1516
|
+
let content = fs.readFileSync(logFile, 'utf8');
|
|
1517
|
+
const notesIdx = content.indexOf('## Notes');
|
|
1518
|
+
if (notesIdx === -1) {
|
|
1519
|
+
content = content.replace(/\s*$/, '') + `\n\n## Notes\n${block}\n`;
|
|
1520
|
+
} else {
|
|
1521
|
+
const eol = content.indexOf('\n', notesIdx);
|
|
1522
|
+
content = content.slice(0, eol + 1) + block + content.slice(eol + 1);
|
|
1523
|
+
}
|
|
1524
|
+
fs.writeFileSync(logFile, content);
|
|
1525
|
+
return true;
|
|
1526
|
+
} catch {
|
|
1527
|
+
return false;
|
|
1528
|
+
}
|
|
1529
|
+
}
|
|
1530
|
+
|
|
1531
|
+
function appendTimeoutReconciliation(cwd, { task, advanced }) {
|
|
1532
|
+
const now = new Date().toISOString().slice(0, 16).replace('T', ' ');
|
|
1533
|
+
const repoLines = (advanced || [])
|
|
1534
|
+
.map((r) => `- ${r.label}: ${String(r.before).slice(0, 7)} → ${String(r.after).slice(0, 7)}`)
|
|
1535
|
+
.join('\n');
|
|
1536
|
+
const block =
|
|
1537
|
+
`\n### Timeout reconciliation — ${now} — work-landed-receipt-died\n\n` +
|
|
1538
|
+
`**Task:** ${task}\n` +
|
|
1539
|
+
`**What happened:** the do-phase wall killed the reporter, but commits landed:\n` +
|
|
1540
|
+
`${repoLines}\n` +
|
|
1541
|
+
`Receipt auto-written and the TODO bullet marked; no human halt required.\n`;
|
|
1542
|
+
return appendUnderNotes(cwd, block);
|
|
1543
|
+
}
|
|
1544
|
+
|
|
1545
|
+
function appendCheckAndAdvance(cwd, task, receiptLine) {
|
|
1546
|
+
const now = new Date().toISOString().slice(0, 16).replace('T', ' ');
|
|
1547
|
+
const block =
|
|
1548
|
+
`\n### Check-and-advance — ${now} — advanced-already-done\n\n` +
|
|
1549
|
+
`**Task:** ${task}\n` +
|
|
1550
|
+
`**What happened:** verify passed before work started AND today's journal already carries a completion receipt — the work shipped on a prior tick whose reporter died before bookkeeping. Bullet marked, picker advanced.\n` +
|
|
1551
|
+
`**Receipt:** ${receiptLine}\n`;
|
|
1552
|
+
return appendUnderNotes(cwd, block);
|
|
1553
|
+
}
|
|
1554
|
+
|
|
1555
|
+
/**
|
|
1556
|
+
* Scan today's journal for a completion receipt naming the task: a `C#`
|
|
1557
|
+
* completed line, a timeout-reconciliation entry, or a `**Task:**` line.
|
|
1558
|
+
* Returns the matching line, or null.
|
|
1559
|
+
*/
|
|
1560
|
+
function findCompletionReceipt(cwd, taskTitle) {
|
|
1561
|
+
const { logFile } = todayJournalPath(cwd);
|
|
1562
|
+
if (!fs.existsSync(logFile)) return null;
|
|
1563
|
+
const needle = taskMatchNeedle(taskTitle);
|
|
1564
|
+
if (!needle) return null;
|
|
1565
|
+
for (const line of fs.readFileSync(logFile, 'utf8').split('\n')) {
|
|
1566
|
+
const receiptShaped =
|
|
1567
|
+
/\*\*C\d+:\*\*/.test(line) || /\*\*Task:\*\*/.test(line) || /reconciliation/i.test(line);
|
|
1568
|
+
if (receiptShaped && normalizeForMatch(line).includes(needle)) return line.trim();
|
|
1569
|
+
}
|
|
1570
|
+
return null;
|
|
1571
|
+
}
|
|
1572
|
+
|
|
1573
|
+
/**
|
|
1574
|
+
* After a do-phase timeout: diff the pre-tick HEAD snapshot. If commits
|
|
1575
|
+
* landed, write the journal reconciliation receipt, mark the TODO bullet, and
|
|
1576
|
+
* report outcome `work-landed-receipt-died`. If nothing landed, the caller
|
|
1577
|
+
* halts exactly as before.
|
|
1578
|
+
*/
|
|
1579
|
+
function reconcileTimedOutTick(cwd, snapshot, taskTitle) {
|
|
1580
|
+
const advanced = diffAdvancedRepoHeads(snapshot);
|
|
1581
|
+
if (advanced.length === 0) return { reconciled: false, advanced: [] };
|
|
1582
|
+
appendTimeoutReconciliation(cwd, { task: taskTitle, advanced });
|
|
1583
|
+
const bulletMarked = markTodoBulletDone(cwd, taskTitle);
|
|
1584
|
+
return { reconciled: true, outcome: 'work-landed-receipt-died', advanced, bulletMarked };
|
|
1585
|
+
}
|
|
1586
|
+
|
|
1153
1587
|
function runTaskOnce(context, options = {}) {
|
|
1154
1588
|
const { verbose = false, cwd = process.cwd() } = options;
|
|
1155
1589
|
|
|
@@ -1170,8 +1604,15 @@ function runTaskOnce(context, options = {}) {
|
|
|
1170
1604
|
|
|
1171
1605
|
const phaseResults = {};
|
|
1172
1606
|
const startedAt = Date.now();
|
|
1173
|
-
|
|
1174
|
-
|
|
1607
|
+
let verifyResult = getVerifyCommand(cwd, context.task);
|
|
1608
|
+
let verifyCmd = verifyResult.cmd;
|
|
1609
|
+
const explicitVerifyBefore = new Set(
|
|
1610
|
+
collectExplicitVerifyTasks(cwd).map((task) => task.key)
|
|
1611
|
+
);
|
|
1612
|
+
const initialVerifyShape = validateVerifyCommandShape(verifyCmd);
|
|
1613
|
+
if (!initialVerifyShape.ok) {
|
|
1614
|
+
return haltInvalidVerify(cwd, context, verifyCmd, initialVerifyShape.reason, startedAt, phaseResults);
|
|
1615
|
+
}
|
|
1175
1616
|
|
|
1176
1617
|
// Guard: endgame tasks must have an explicit Verify field.
|
|
1177
1618
|
// Reactive signals (inbox, staleness, imagined) use npm test as default.
|
|
@@ -1203,6 +1644,25 @@ function runTaskOnce(context, options = {}) {
|
|
|
1203
1644
|
if (!skipFalsifiability && verifyResult.explicit && context.kind === 'endgame' && verifyCmd) {
|
|
1204
1645
|
try {
|
|
1205
1646
|
execSync(verifyCmd, { cwd, stdio: 'pipe', timeout: 300000 });
|
|
1647
|
+
// T33b (lesson: tick-must-mark-own-bullet): a pre-work verify pass WITH
|
|
1648
|
+
// a completion receipt already in today's journal means the work shipped
|
|
1649
|
+
// but the reporter died before bookkeeping. Check the bullet and advance
|
|
1650
|
+
// instead of wedging the picker on verify-not-falsifiable.
|
|
1651
|
+
const receipt = findCompletionReceipt(cwd, context.task);
|
|
1652
|
+
if (receipt) {
|
|
1653
|
+
const bulletMarked = markTodoBulletDone(cwd, context.task);
|
|
1654
|
+
appendCheckAndAdvance(cwd, context.task, receipt);
|
|
1655
|
+
return {
|
|
1656
|
+
outcome: 'advanced-already-done',
|
|
1657
|
+
reason: 'advanced-already-done',
|
|
1658
|
+
receipt,
|
|
1659
|
+
bulletMarked,
|
|
1660
|
+
phaseResults: {},
|
|
1661
|
+
elapsedSeconds: 0,
|
|
1662
|
+
verifyRan: true,
|
|
1663
|
+
verifyPass: true,
|
|
1664
|
+
};
|
|
1665
|
+
}
|
|
1206
1666
|
writeLesson(cwd, 'verify-not-falsifiable', 'fail',
|
|
1207
1667
|
`Verify \`${verifyCmd}\` passed before work started on "${context.task}". Either the rubric is trivial or the task is already done. Tick halted.`);
|
|
1208
1668
|
return {
|
|
@@ -1264,6 +1724,18 @@ function runTaskOnce(context, options = {}) {
|
|
|
1264
1724
|
}
|
|
1265
1725
|
}
|
|
1266
1726
|
|
|
1727
|
+
if (!verifyResult.explicit && shouldAdoptPlannedVerify(context.kind)) {
|
|
1728
|
+
const plannedVerify = findNewExplicitVerifyCommand(cwd, explicitVerifyBefore);
|
|
1729
|
+
if (plannedVerify) {
|
|
1730
|
+
verifyResult = plannedVerify;
|
|
1731
|
+
verifyCmd = plannedVerify.cmd;
|
|
1732
|
+
}
|
|
1733
|
+
}
|
|
1734
|
+
const plannedVerifyShape = validateVerifyCommandShape(verifyCmd);
|
|
1735
|
+
if (!plannedVerifyShape.ok) {
|
|
1736
|
+
return haltInvalidVerify(cwd, context, verifyCmd, plannedVerifyShape.reason, startedAt, phaseResults);
|
|
1737
|
+
}
|
|
1738
|
+
|
|
1267
1739
|
// Phase: do
|
|
1268
1740
|
{
|
|
1269
1741
|
const t0 = Date.now();
|
|
@@ -1309,7 +1781,7 @@ function runTaskOnce(context, options = {}) {
|
|
|
1309
1781
|
elapsedSeconds: verifyTime,
|
|
1310
1782
|
};
|
|
1311
1783
|
try {
|
|
1312
|
-
const slug = (context.task
|
|
1784
|
+
const slug = lessonSlug(context.task);
|
|
1313
1785
|
writeLesson(cwd, `verify-fail-${slug}`, 'fail', `Verify command \`${verifyCmd}\` failed: ${e.message.split('\n')[0]}`);
|
|
1314
1786
|
} catch { /* lesson write must not crash the tick */ }
|
|
1315
1787
|
}
|
|
@@ -1975,12 +2447,13 @@ function findCodeTodos(cwd) {
|
|
|
1975
2447
|
try {
|
|
1976
2448
|
const out = execFileSync('git', [
|
|
1977
2449
|
'grep', '-n', '-I', '-E', '(TODO|FIXME)',
|
|
1978
|
-
'--', ':!test/', ':!node_modules/', ':!atris/', ':!**/*.md'
|
|
2450
|
+
'--', ':!test/', ':!node_modules/', ':!atris/', ':!**/_archive/**', ':!**/*.md'
|
|
1979
2451
|
], { cwd, encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] });
|
|
1980
2452
|
const results = [];
|
|
1981
2453
|
for (const raw of out.split('\n').filter(Boolean)) {
|
|
1982
2454
|
const m = raw.match(/^([^:]+):(\d+):(.*)$/);
|
|
1983
2455
|
if (!m) continue;
|
|
2456
|
+
if (m[1].split(/[\\/]/).includes('_archive')) continue;
|
|
1984
2457
|
const line = m[3];
|
|
1985
2458
|
// A real TODO is a comment marker at the start of the line (allowing
|
|
1986
2459
|
// leading indent) followed by TODO/FIXME and at least one word. This
|
|
@@ -2160,16 +2633,63 @@ function isLessonResolved(lessonLine, cwd, options = {}) {
|
|
|
2160
2633
|
if (!slugMatch) return false;
|
|
2161
2634
|
const slug = slugMatch[1];
|
|
2162
2635
|
|
|
2636
|
+
if (isCleanMapBrokenRefFailLesson(lessonLine, cwd)) return true;
|
|
2637
|
+
|
|
2163
2638
|
// Detector-backed check (typed lesson sidecar)
|
|
2164
2639
|
const meta = options.meta || loadLessonMetadata(cwd)[slug];
|
|
2165
2640
|
if (meta && meta.detector) {
|
|
2166
2641
|
return runLessonDetector(meta.detector, cwd, options.detectorTimeout);
|
|
2167
2642
|
}
|
|
2168
2643
|
|
|
2644
|
+
if (inlinePythonVerifyFailureNowPasses(lessonLine, cwd, options.detectorTimeout)) return true;
|
|
2645
|
+
|
|
2169
2646
|
// Legacy fallback: keyword grep against referenced files.
|
|
2170
2647
|
return isLessonResolvedLegacy(lessonLine, cwd);
|
|
2171
2648
|
}
|
|
2172
2649
|
|
|
2650
|
+
function isCleanMapBrokenRefFailLesson(lessonLine, cwd) {
|
|
2651
|
+
const text = String(lessonLine || '').toLowerCase();
|
|
2652
|
+
if (!/fix \d+ broken references? in map\.md/.test(text)) return false;
|
|
2653
|
+
return repoMapAuditReportsClean(cwd);
|
|
2654
|
+
}
|
|
2655
|
+
|
|
2656
|
+
function extractInlinePythonVerifyFailure(lessonLine) {
|
|
2657
|
+
const commandMatch = String(lessonLine || '').match(/Verify command\s+``([\s\S]*?)``\s+failed/i);
|
|
2658
|
+
if (!commandMatch) return null;
|
|
2659
|
+
const matches = [...commandMatch[1].matchAll(/\b(python3?)\s+-c\s+(["'])([\s\S]*?)\2/g)];
|
|
2660
|
+
const match = matches[matches.length - 1];
|
|
2661
|
+
if (!match) return null;
|
|
2662
|
+
return {
|
|
2663
|
+
executable: match[1],
|
|
2664
|
+
code: match[3].replace(/\\"/g, '"').replace(/\\'/g, "'")
|
|
2665
|
+
};
|
|
2666
|
+
}
|
|
2667
|
+
|
|
2668
|
+
function inlinePythonVerifyFailureNowPasses(lessonLine, cwd, timeout = 10000) {
|
|
2669
|
+
const parsed = extractInlinePythonVerifyFailure(lessonLine);
|
|
2670
|
+
if (!parsed) return false;
|
|
2671
|
+
const result = spawnSync(parsed.executable, ['-c', parsed.code], {
|
|
2672
|
+
cwd,
|
|
2673
|
+
encoding: 'utf8',
|
|
2674
|
+
timeout,
|
|
2675
|
+
stdio: ['ignore', 'ignore', 'ignore']
|
|
2676
|
+
});
|
|
2677
|
+
return result.status === 0;
|
|
2678
|
+
}
|
|
2679
|
+
|
|
2680
|
+
function legacyLessonFileRefs(lessonLine) {
|
|
2681
|
+
const fileRefs = [];
|
|
2682
|
+
const filePattern = /`([a-zA-Z0-9_/./-]+\.[a-zA-Z]+(?::\d+(?:-\d+)?)?)`/g;
|
|
2683
|
+
let m;
|
|
2684
|
+
while ((m = filePattern.exec(lessonLine)) !== null) {
|
|
2685
|
+
const ref = m[1].replace(/:\d+(-\d+)?$/, '');
|
|
2686
|
+
if (ref.includes('/') || ref.endsWith('.js') || ref.endsWith('.md') || ref.endsWith('.ts')) {
|
|
2687
|
+
fileRefs.push(ref);
|
|
2688
|
+
}
|
|
2689
|
+
}
|
|
2690
|
+
return fileRefs;
|
|
2691
|
+
}
|
|
2692
|
+
|
|
2173
2693
|
/**
|
|
2174
2694
|
* The pre-v3.8 resolver — kept as an internal fallback for prose-only lessons
|
|
2175
2695
|
* that don't have detector metadata yet. Never auto-promotes a prose lesson to
|
|
@@ -2182,16 +2702,7 @@ function isLessonResolvedLegacy(lessonLine, cwd) {
|
|
|
2182
2702
|
if (!slugMatch) return false;
|
|
2183
2703
|
const slug = slugMatch[1];
|
|
2184
2704
|
|
|
2185
|
-
|
|
2186
|
-
const fileRefs = [];
|
|
2187
|
-
const filePattern = /`([a-zA-Z0-9_/./-]+\.[a-zA-Z]+(?::\d+(?:-\d+)?)?)`/g;
|
|
2188
|
-
let m;
|
|
2189
|
-
while ((m = filePattern.exec(lessonLine)) !== null) {
|
|
2190
|
-
const ref = m[1].replace(/:\d+(-\d+)?$/, ''); // strip line numbers
|
|
2191
|
-
if (ref.includes('/') || ref.endsWith('.js') || ref.endsWith('.md') || ref.endsWith('.ts')) {
|
|
2192
|
-
fileRefs.push(ref);
|
|
2193
|
-
}
|
|
2194
|
-
}
|
|
2705
|
+
const fileRefs = legacyLessonFileRefs(lessonLine);
|
|
2195
2706
|
|
|
2196
2707
|
if (fileRefs.length === 0) return false;
|
|
2197
2708
|
|
|
@@ -2274,6 +2785,9 @@ function pickUnresolvedFailLesson(cwd) {
|
|
|
2274
2785
|
const candidates = [];
|
|
2275
2786
|
for (const lesson of lessons) {
|
|
2276
2787
|
if (lesson.verdict !== 'fail') continue;
|
|
2788
|
+
if (lesson.id === 'verify-not-falsifiable') continue;
|
|
2789
|
+
if (lesson.id === 'no-verify-field') continue;
|
|
2790
|
+
if (lesson.id === 'verify-failed' && lesson.legacy) continue;
|
|
2277
2791
|
if (lesson.resolvedTag) continue;
|
|
2278
2792
|
// Typed lesson with explicit status wins — respect the sidecar.
|
|
2279
2793
|
// `resolved` = done. `observed` = process rule, not a fixable code state.
|
|
@@ -2284,6 +2798,7 @@ function pickUnresolvedFailLesson(cwd) {
|
|
|
2284
2798
|
if (s === 'resolved' || s === 'observed') continue;
|
|
2285
2799
|
if (s === 'attempted' && (lesson.meta.attempts || 0) >= MAX_ATTEMPTS) continue;
|
|
2286
2800
|
}
|
|
2801
|
+
if (lesson.legacy && legacyLessonFileRefs(lesson.line).length === 0) continue;
|
|
2287
2802
|
// Detector-backed or legacy grep check.
|
|
2288
2803
|
if (isLessonResolved(lesson.line, cwd, { meta: lesson.meta })) continue;
|
|
2289
2804
|
|
|
@@ -2370,7 +2885,7 @@ Reply with the JSON array and nothing else.`;
|
|
|
2370
2885
|
const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')"`;
|
|
2371
2886
|
const env = { ...process.env };
|
|
2372
2887
|
delete env.CLAUDECODE;
|
|
2373
|
-
output =
|
|
2888
|
+
output = execPhaseCommandSync(cmd, {
|
|
2374
2889
|
cwd,
|
|
2375
2890
|
encoding: 'utf8',
|
|
2376
2891
|
timeout: PHASE_TIMEOUT,
|
|
@@ -2378,6 +2893,11 @@ Reply with the JSON array and nothing else.`;
|
|
|
2378
2893
|
maxBuffer: 10 * 1024 * 1024,
|
|
2379
2894
|
env
|
|
2380
2895
|
}).toString();
|
|
2896
|
+
} catch (err) {
|
|
2897
|
+
if (isPhaseTimeoutError(err)) {
|
|
2898
|
+
throw new Error(`horizon-proposal phase timed out after ${PHASE_TIMEOUT / 1000}s`);
|
|
2899
|
+
}
|
|
2900
|
+
throw err;
|
|
2381
2901
|
} finally {
|
|
2382
2902
|
try { fs.unlinkSync(tmpFile); } catch {}
|
|
2383
2903
|
}
|
|
@@ -2658,12 +3178,24 @@ async function autopilotAtris(description, options = {}) {
|
|
|
2658
3178
|
const context = {
|
|
2659
3179
|
task: suggestion.task,
|
|
2660
3180
|
kind: suggestion.kind,
|
|
3181
|
+
...(suggestion.files ? { files: suggestion.files } : {}),
|
|
2661
3182
|
...(suggestion.lessonLine ? { lessonLine: suggestion.lessonLine } : {}),
|
|
2662
3183
|
...(suggestion.lessonSlug ? { lessonSlug: suggestion.lessonSlug } : {}),
|
|
2663
3184
|
...(suggestion.lessonDate ? { lessonDate: suggestion.lessonDate } : {})
|
|
2664
3185
|
};
|
|
2665
3186
|
const startingEndgame = readEndgameState(cwd);
|
|
2666
3187
|
|
|
3188
|
+
// T33a: snapshot pre-tick HEADs (cwd + sibling repos named in the task)
|
|
3189
|
+
// so a do-phase timeout can be reconciled against what actually landed.
|
|
3190
|
+
let preTickHeads = null;
|
|
3191
|
+
try {
|
|
3192
|
+
const verifyHint = getVerifyCommand(cwd, suggestion.task).cmd || '';
|
|
3193
|
+
preTickHeads = snapshotRepoHeads(
|
|
3194
|
+
cwd,
|
|
3195
|
+
[suggestion.task, ...(suggestion.files || []), verifyHint].join(' ')
|
|
3196
|
+
);
|
|
3197
|
+
} catch { /* snapshot failure must not block the tick */ }
|
|
3198
|
+
|
|
2667
3199
|
try {
|
|
2668
3200
|
if (verbose) {
|
|
2669
3201
|
console.log('');
|
|
@@ -2697,6 +3229,26 @@ async function autopilotAtris(description, options = {}) {
|
|
|
2697
3229
|
break;
|
|
2698
3230
|
}
|
|
2699
3231
|
|
|
3232
|
+
// T33b: the falsifiability gate found a completion receipt — the work
|
|
3233
|
+
// already shipped, the bullet is checked, move straight to the next pick.
|
|
3234
|
+
if (execution.outcome === 'advanced-already-done') {
|
|
3235
|
+
completed++;
|
|
3236
|
+
tickOutcome = 'built';
|
|
3237
|
+
tickOutcomeText = `"${lastTaskTitle}" was already done — verify passed pre-work and today's journal carries its completion receipt, so I checked the bullet and advanced.`;
|
|
3238
|
+
tickNextStep = 'pick the next endgame task';
|
|
3239
|
+
if (verbose) {
|
|
3240
|
+
console.log(' already done (journal receipt found). bullet checked, advancing.');
|
|
3241
|
+
} else {
|
|
3242
|
+
printPlainBlock([
|
|
3243
|
+
'That task was already done — verify passed before work and a completion receipt exists in today\'s journal.',
|
|
3244
|
+
'I checked the bullet and advanced.',
|
|
3245
|
+
'',
|
|
3246
|
+
'Next I will look for the next task.'
|
|
3247
|
+
].join('\n'));
|
|
3248
|
+
}
|
|
3249
|
+
continue;
|
|
3250
|
+
}
|
|
3251
|
+
|
|
2700
3252
|
const planTime = execution.phaseResults.plan.elapsedSeconds;
|
|
2701
3253
|
if (verbose) console.log(` planned (${planTime}s)`);
|
|
2702
3254
|
|
|
@@ -2758,7 +3310,7 @@ async function autopilotAtris(description, options = {}) {
|
|
|
2758
3310
|
// Record commit hash + verify command for retroactive regression checks
|
|
2759
3311
|
try {
|
|
2760
3312
|
const commitHash = execSync('git rev-parse HEAD', { cwd, encoding: 'utf8' }).trim();
|
|
2761
|
-
const taskSlug = (suggestion.task
|
|
3313
|
+
const taskSlug = lessonSlug(suggestion.task);
|
|
2762
3314
|
recordTickCommit(cwd, commitHash, execution.verifyCmd || '', taskSlug);
|
|
2763
3315
|
|
|
2764
3316
|
// Every 10th tick, run retroactive regression check
|
|
@@ -2805,6 +3357,36 @@ async function autopilotAtris(description, options = {}) {
|
|
|
2805
3357
|
}
|
|
2806
3358
|
|
|
2807
3359
|
} catch (err) {
|
|
3360
|
+
// T33a: a do-phase timeout with commits landed is a dead reporter, not
|
|
3361
|
+
// dead work — write the reconciliation receipt, mark the bullet, and
|
|
3362
|
+
// record work-landed-receipt-died instead of halting for a human.
|
|
3363
|
+
let reconciliation = null;
|
|
3364
|
+
if (isDoPhaseTimeoutMessage(err.message)) {
|
|
3365
|
+
try {
|
|
3366
|
+
reconciliation = reconcileTimedOutTick(cwd, preTickHeads, lastTaskTitle || suggestion.task);
|
|
3367
|
+
} catch { reconciliation = null; }
|
|
3368
|
+
}
|
|
3369
|
+
if (reconciliation && reconciliation.reconciled) {
|
|
3370
|
+
completed++;
|
|
3371
|
+
const landed = reconciliation.advanced
|
|
3372
|
+
.map((r) => `${r.label} ${String(r.before).slice(0, 7)} → ${String(r.after).slice(0, 7)}`)
|
|
3373
|
+
.join(', ');
|
|
3374
|
+
tickOutcome = 'work-landed-receipt-died';
|
|
3375
|
+
tickOutcomeText = `"${lastTaskTitle}" hit the do-phase wall but commits landed (${landed}). I wrote the reconciliation receipt and marked the bullet — work-landed-receipt-died, no human halt.`;
|
|
3376
|
+
tickNextStep = 'pick the next task';
|
|
3377
|
+
if (verbose) {
|
|
3378
|
+
console.log(` do phase timed out, but work landed (${landed}). reconciled — no human halt.`);
|
|
3379
|
+
} else {
|
|
3380
|
+
printPlainBlock([
|
|
3381
|
+
'The do phase timed out, but commits landed before the wall.',
|
|
3382
|
+
`Landed: ${landed}.`,
|
|
3383
|
+
'I wrote the reconciliation receipt and marked the task bullet.',
|
|
3384
|
+
'',
|
|
3385
|
+
'Next tick will pick the next task.'
|
|
3386
|
+
].join('\n'));
|
|
3387
|
+
}
|
|
3388
|
+
break;
|
|
3389
|
+
}
|
|
2808
3390
|
tickOutcome = 'halted';
|
|
2809
3391
|
tickOutcomeText = `I hit an error while running "${lastTaskTitle || 'a task'}": ${err.message}`;
|
|
2810
3392
|
tickNextStep = 'stop until a human looks at the error';
|
|
@@ -2988,7 +3570,7 @@ Search the codebase to verify. Reply: YES <reason> or NO <reason>`;
|
|
|
2988
3570
|
const env = { ...process.env };
|
|
2989
3571
|
delete env.CLAUDECODE;
|
|
2990
3572
|
const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Glob,Grep"`;
|
|
2991
|
-
const output =
|
|
3573
|
+
const output = execPhaseCommandSync(cmd, {
|
|
2992
3574
|
cwd,
|
|
2993
3575
|
encoding: 'utf8',
|
|
2994
3576
|
timeout: 60000,
|
|
@@ -3021,6 +3603,13 @@ async function autopilotFromTodo(options = {}) {
|
|
|
3021
3603
|
|
|
3022
3604
|
module.exports = {
|
|
3023
3605
|
appendTickSummary,
|
|
3606
|
+
snapshotRepoHeads,
|
|
3607
|
+
diffAdvancedRepoHeads,
|
|
3608
|
+
reconcileTimedOutTick,
|
|
3609
|
+
markTodoBulletDone,
|
|
3610
|
+
findCompletionReceipt,
|
|
3611
|
+
isDoPhaseTimeoutMessage,
|
|
3612
|
+
validateVerifyCommandShape,
|
|
3024
3613
|
askHuman,
|
|
3025
3614
|
askModel,
|
|
3026
3615
|
autopilotAtris,
|
|
@@ -3052,11 +3641,19 @@ module.exports = {
|
|
|
3052
3641
|
proposeCandidateHorizons,
|
|
3053
3642
|
recordTickCommit,
|
|
3054
3643
|
regressionCheck,
|
|
3644
|
+
repoMapAuditReportsClean,
|
|
3645
|
+
isCleanMapBrokenRefFailLesson,
|
|
3646
|
+
inlinePythonVerifyFailureNowPasses,
|
|
3055
3647
|
runPlanReview,
|
|
3056
3648
|
runTaskOnce,
|
|
3057
3649
|
buildPlanReviewPrompt,
|
|
3058
3650
|
parseVerdict,
|
|
3059
3651
|
scoreEndgameCandidates,
|
|
3060
3652
|
suggestNextTask,
|
|
3061
|
-
|
|
3653
|
+
shouldSkipAutoHumanGate,
|
|
3654
|
+
writeLesson,
|
|
3655
|
+
isPhaseTimeoutError,
|
|
3656
|
+
execPhaseCommandSync,
|
|
3657
|
+
executePhaseDetailed,
|
|
3658
|
+
lessonSlug
|
|
3062
3659
|
};
|