atris 3.16.0 → 3.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/bin/atris.js +48 -15
- package/commands/autopilot.js +431 -9
- package/commands/compile.js +569 -0
- package/commands/probe.js +366 -0
- package/commands/recap.js +203 -0
- package/commands/skill.js +6 -2
- package/commands/task.js +30 -7
- package/lib/state-detection.js +41 -1
- package/package.json +1 -1
package/commands/autopilot.js
CHANGED
|
@@ -392,6 +392,41 @@ function askHuman(taskTitle) {
|
|
|
392
392
|
});
|
|
393
393
|
}
|
|
394
394
|
|
|
395
|
+
/**
|
|
396
|
+
* Type-check a child_process error as a timeout/kill. Node's execSync attaches
|
|
397
|
+
* `code: 'ETIMEDOUT'` and `signal` on timeout — it does NOT set `killed`, so a
|
|
398
|
+
* `killed`-only guard is dead code on the exact error it was written for
|
|
399
|
+
* (lesson: etimedout-error-shape, 2026-06-10).
|
|
400
|
+
*/
|
|
401
|
+
function isPhaseTimeoutError(err) {
|
|
402
|
+
return Boolean(err && (err.killed || err.code === 'ETIMEDOUT' || err.signal));
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
/**
|
|
406
|
+
* execSync with the phase-timeout orphan fix. Node's sync-exec timeout signals
|
|
407
|
+
* only the direct child pid — the `/bin/sh -c` wrapper — so the `claude` it
|
|
408
|
+
* spawned kept committing 160–296s past the 600s wall (lesson:
|
|
409
|
+
* etimedout-error-shape, 2026-06-10). `detached: true` makes the wrapper a
|
|
410
|
+
* process-group leader; on timeout we sweep the whole group via
|
|
411
|
+
* `process.kill(-pid, 'SIGKILL')`. ESRCH on the sweep means the group already
|
|
412
|
+
* died — fine. The original error is rethrown untouched so every call site
|
|
413
|
+
* keeps its existing catch contract (err.stdout passthrough included).
|
|
414
|
+
*/
|
|
415
|
+
function execPhaseCommandSync(cmd, opts = {}) {
|
|
416
|
+
try {
|
|
417
|
+
return execSync(cmd, { ...opts, detached: true });
|
|
418
|
+
} catch (err) {
|
|
419
|
+
if (isPhaseTimeoutError(err) && err.pid) {
|
|
420
|
+
try {
|
|
421
|
+
process.kill(-err.pid, 'SIGKILL');
|
|
422
|
+
} catch (sweepErr) {
|
|
423
|
+
if (sweepErr.code !== 'ESRCH') throw sweepErr;
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
throw err;
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
395
430
|
/**
|
|
396
431
|
* Run a phase via claude -p subprocess.
|
|
397
432
|
*/
|
|
@@ -403,10 +438,11 @@ function executePhaseDetailed(phase, context, options = {}) {
|
|
|
403
438
|
fs.writeFileSync(tmpFile, prompt);
|
|
404
439
|
|
|
405
440
|
try {
|
|
406
|
-
const cmd =
|
|
441
|
+
const cmd = options.cmdOverride
|
|
442
|
+
|| `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Write,Edit,Glob,Grep"`;
|
|
407
443
|
const env = { ...process.env };
|
|
408
444
|
delete env.CLAUDECODE;
|
|
409
|
-
const output =
|
|
445
|
+
const output = execPhaseCommandSync(cmd, {
|
|
410
446
|
cwd: process.cwd(),
|
|
411
447
|
encoding: 'utf8',
|
|
412
448
|
timeout,
|
|
@@ -419,7 +455,9 @@ function executePhaseDetailed(phase, context, options = {}) {
|
|
|
419
455
|
return { prompt, output: output || '' };
|
|
420
456
|
} catch (err) {
|
|
421
457
|
try { fs.unlinkSync(tmpFile); } catch {}
|
|
422
|
-
if (err
|
|
458
|
+
if (isPhaseTimeoutError(err)) {
|
|
459
|
+
throw new Error(`${phase} phase timed out after ${timeout / 1000}s (claude -p hit the wall; any work it committed survives — reconcile from pre-tick HEADs)`);
|
|
460
|
+
}
|
|
423
461
|
if (err.stdout) {
|
|
424
462
|
return { prompt, output: err.stdout };
|
|
425
463
|
}
|
|
@@ -452,6 +490,16 @@ function getContextFiles(phase, options = {}) {
|
|
|
452
490
|
return [...new Set(files.filter(Boolean))].map((f) => `- ${f}`).join('\n');
|
|
453
491
|
}
|
|
454
492
|
|
|
493
|
+
// T35a (endgame loop-self-repair): shared-checkout git-safety contract.
|
|
494
|
+
// Lesson 39: a concurrent tick's `git reset` destroyed a sibling repo's
|
|
495
|
+
// uncommitted work. Sibling-repo edits ride per-tick worktrees (the same
|
|
496
|
+
// ../repo siblings snapshotRepoHeads tracks); destructive git on the shared
|
|
497
|
+
// checkout is forbidden (COORDINATION.md Rule 4). Interpolated into the
|
|
498
|
+
// default and self-heal do prompts — never the benchmark prompt (it never
|
|
499
|
+
// commits).
|
|
500
|
+
const SHARED_CHECKOUT_GIT_CONTRACT = `- Shared-checkout git safety (COORDINATION.md Rule 4): edits to any repo OTHER than this tick's cwd (../atrisos-backend-style sibling repos) go through a per-tick worktree — start with \`atris worktree start --member <member> --task "<task>"\`, land with \`atris worktree ship --message "<msg>" --verify "<cmd>"\`. Never edit a sibling repo's shared checkout directly.
|
|
501
|
+
- On a shared checkout, \`git reset\`, \`git checkout --\`, \`git clean\`, and stashing other agents' work are FORBIDDEN — concurrent ticks' uncommitted work lives there.`;
|
|
502
|
+
|
|
455
503
|
/**
|
|
456
504
|
* Build the right prompt for each phase, adapting to the kind of work.
|
|
457
505
|
*/
|
|
@@ -648,6 +696,7 @@ Rules:
|
|
|
648
696
|
- Execute ONE step at a time. Verify each step before moving on.
|
|
649
697
|
- Check MAP.md for file locations before grepping.
|
|
650
698
|
- Stay in scope. Only fix the bug described in the lesson — no side quests.
|
|
699
|
+
${SHARED_CHECKOUT_GIT_CONTRACT}
|
|
651
700
|
|
|
652
701
|
Read these files first:
|
|
653
702
|
${readFiles}
|
|
@@ -672,6 +721,7 @@ Rules:
|
|
|
672
721
|
- Check MAP.md for file locations before grepping.
|
|
673
722
|
- If you hit two errors on the same step, stop and flag for re-scope.
|
|
674
723
|
- Stay in scope. Don't touch files outside the task boundary.
|
|
724
|
+
${SHARED_CHECKOUT_GIT_CONTRACT}
|
|
675
725
|
|
|
676
726
|
Read these files first:
|
|
677
727
|
${readFiles}
|
|
@@ -742,6 +792,27 @@ If broken beyond quick fix, reply: failed — [reason].`;
|
|
|
742
792
|
return '';
|
|
743
793
|
}
|
|
744
794
|
|
|
795
|
+
/**
|
|
796
|
+
* Build a clean kebab-case lesson slug from free text. Strips non-alphanumerics
|
|
797
|
+
* (em-dashes were leaking into slugs verbatim) and truncates at a word boundary
|
|
798
|
+
* instead of mid-word (e.g. the old `.slice(0, 40)` produced
|
|
799
|
+
* `verify-fail-per-member-model-selection-—-the-member-`).
|
|
800
|
+
*/
|
|
801
|
+
function lessonSlug(text, maxLen = 40) {
|
|
802
|
+
const base = String(text || 'unknown')
|
|
803
|
+
.toLowerCase()
|
|
804
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
805
|
+
.replace(/^-+|-+$/g, '');
|
|
806
|
+
if (!base) return 'unknown';
|
|
807
|
+
if (base.length <= maxLen) return base;
|
|
808
|
+
const cut = base.slice(0, maxLen);
|
|
809
|
+
const lastDash = cut.lastIndexOf('-');
|
|
810
|
+
// base[maxLen] continues a word — back up to the last full word.
|
|
811
|
+
const atBoundary = base[maxLen] === '-';
|
|
812
|
+
const trimmed = atBoundary ? cut : (lastDash > 0 ? cut.slice(0, lastDash) : cut);
|
|
813
|
+
return trimmed.replace(/-+$/g, '') || 'unknown';
|
|
814
|
+
}
|
|
815
|
+
|
|
745
816
|
/**
|
|
746
817
|
* Write a lesson to atris/lessons.md
|
|
747
818
|
* Appends a line in format: - **[YYYY-MM-DD] slug** — pass/fail — explanation
|
|
@@ -884,6 +955,38 @@ function shouldAdoptPlannedVerify(kind) {
|
|
|
884
955
|
return ['staleness', 'docs', 'review', 'inbox', 'cleanup', 'feature', 'lessons', 'imagined'].includes(kind);
|
|
885
956
|
}
|
|
886
957
|
|
|
958
|
+
// Task-plane status vocabulary lint. `atris task list/queue/current --status <s>`
|
|
959
|
+
// only matches raw stored statuses (commands/task.js); `ready` is a TRANSITION
|
|
960
|
+
// (`atris task ready` moves a task to review), so `--status ready` always
|
|
961
|
+
// returns "(no tasks)" — a verify built on it is an unreachable gate; the
|
|
962
|
+
// matching listable form is --status review (lessons.md
|
|
963
|
+
// verify-status-vocabulary, 3rd occurrence 2026-06-10).
|
|
964
|
+
const LISTABLE_TASK_STATUSES = ['open', 'claimed', 'review', 'done', 'failed'];
|
|
965
|
+
const STATUS_CORRECTIONS = { ready: 'review' };
|
|
966
|
+
|
|
967
|
+
function lintVerifyTaskStatusVocabulary(text) {
|
|
968
|
+
// Scan every `atris task list|queue|current` segment (compound verifies
|
|
969
|
+
// chain with && / || / ;), then pull its --status value if present.
|
|
970
|
+
const segmentRe = /\batris\s+task\s+(?:list|queue|current)\b([^|&;]*)/g;
|
|
971
|
+
let segment;
|
|
972
|
+
while ((segment = segmentRe.exec(text)) !== null) {
|
|
973
|
+
const statusMatch = /--status[=\s]+["']?([A-Za-z0-9_-]+)["']?/.exec(segment[1]);
|
|
974
|
+
if (!statusMatch) continue;
|
|
975
|
+
const status = statusMatch[1];
|
|
976
|
+
if (LISTABLE_TASK_STATUSES.includes(status)) continue;
|
|
977
|
+
const vocabulary = LISTABLE_TASK_STATUSES.join('|');
|
|
978
|
+
const corrected = STATUS_CORRECTIONS[status];
|
|
979
|
+
const suggestion = corrected
|
|
980
|
+
? `use --status ${corrected} instead (atris task ${status} is a transition that lands tasks in ${corrected}, so --status ${status} never matches)`
|
|
981
|
+
: `use one of --status ${vocabulary}`;
|
|
982
|
+
return {
|
|
983
|
+
ok: false,
|
|
984
|
+
reason: `Verify uses unlistable task status "--status ${status}" — the listable vocabulary is ${vocabulary}; ${suggestion}`,
|
|
985
|
+
};
|
|
986
|
+
}
|
|
987
|
+
return null;
|
|
988
|
+
}
|
|
989
|
+
|
|
887
990
|
function validateVerifyCommandShape(cmd) {
|
|
888
991
|
const text = String(cmd || '').trim();
|
|
889
992
|
if (!text) return { ok: true };
|
|
@@ -893,6 +996,8 @@ function validateVerifyCommandShape(cmd) {
|
|
|
893
996
|
if (/\b(returns?|shows?|equals?|should|must)\b/i.test(text)) {
|
|
894
997
|
return { ok: false, reason: 'Verify contains prose expectations instead of shell operators/assertions' };
|
|
895
998
|
}
|
|
999
|
+
const statusLint = lintVerifyTaskStatusVocabulary(text);
|
|
1000
|
+
if (statusLint) return statusLint;
|
|
896
1001
|
return { ok: true };
|
|
897
1002
|
}
|
|
898
1003
|
|
|
@@ -1083,7 +1188,7 @@ function defaultPlanReviewExecutor(prompt, { cwd, timeout = 180000 } = {}) {
|
|
|
1083
1188
|
const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Grep,Glob"`;
|
|
1084
1189
|
const env = { ...process.env };
|
|
1085
1190
|
delete env.CLAUDECODE;
|
|
1086
|
-
const output =
|
|
1191
|
+
const output = execPhaseCommandSync(cmd, {
|
|
1087
1192
|
cwd,
|
|
1088
1193
|
encoding: 'utf8',
|
|
1089
1194
|
timeout,
|
|
@@ -1112,7 +1217,18 @@ function defaultCodexExecutor(prompt, { cwd, timeout = 180000 } = {}) {
|
|
|
1112
1217
|
timeout,
|
|
1113
1218
|
stdio: 'pipe',
|
|
1114
1219
|
maxBuffer: 10 * 1024 * 1024,
|
|
1220
|
+
detached: true,
|
|
1115
1221
|
});
|
|
1222
|
+
// No sh wrapper here, but codex spawns its own children — sweep the group
|
|
1223
|
+
// on timeout so they cannot outlive the wall (same orphan class as the
|
|
1224
|
+
// claude sites; ESRCH means the tree is already dead).
|
|
1225
|
+
if (proc.pid && ((proc.error && proc.error.code === 'ETIMEDOUT') || proc.signal)) {
|
|
1226
|
+
try {
|
|
1227
|
+
process.kill(-proc.pid, 'SIGKILL');
|
|
1228
|
+
} catch (sweepErr) {
|
|
1229
|
+
if (sweepErr.code !== 'ESRCH') throw sweepErr;
|
|
1230
|
+
}
|
|
1231
|
+
}
|
|
1116
1232
|
if (proc.status !== 0 && !proc.stdout) {
|
|
1117
1233
|
throw new Error(`codex exited with status ${proc.status}: ${proc.stderr || 'no output'}`);
|
|
1118
1234
|
}
|
|
@@ -1258,6 +1374,216 @@ function appendPlanRejection(cwd, context, review) {
|
|
|
1258
1374
|
}
|
|
1259
1375
|
}
|
|
1260
1376
|
|
|
1377
|
+
// ── Timeout reconciliation (T33, endgame loop-self-repair) ─────────────────
|
|
1378
|
+
// A do-phase wall-clock timeout kills the reporter, not the work: 12 of 13
|
|
1379
|
+
// ETIMEDOUT halts in the 2026-06-10 RSI audit had real commits landed with no
|
|
1380
|
+
// receipt, no checked bullet, and a human halt (lessons: executor-timeout-wall,
|
|
1381
|
+
// tick-must-mark-own-bullet). These helpers let the tick reconcile from
|
|
1382
|
+
// pre-tick HEADs instead of halting when work provably landed.
|
|
1383
|
+
|
|
1384
|
+
function todayJournalPath(cwd) {
|
|
1385
|
+
const now = new Date();
|
|
1386
|
+
const yyyy = now.getFullYear();
|
|
1387
|
+
const mm = String(now.getMonth() + 1).padStart(2, '0');
|
|
1388
|
+
const dd = String(now.getDate()).padStart(2, '0');
|
|
1389
|
+
return {
|
|
1390
|
+
logFile: path.join(cwd, 'atris', 'logs', String(yyyy), `${yyyy}-${mm}-${dd}.md`),
|
|
1391
|
+
dateFormatted: `${yyyy}-${mm}-${dd}`,
|
|
1392
|
+
};
|
|
1393
|
+
}
|
|
1394
|
+
|
|
1395
|
+
/**
|
|
1396
|
+
* Normalize text for fuzzy task-title matching: lowercase, strip code spans,
|
|
1397
|
+
* tags, and markdown punctuation down to single-spaced words.
|
|
1398
|
+
*/
|
|
1399
|
+
function normalizeForMatch(text) {
|
|
1400
|
+
return String(text || '')
|
|
1401
|
+
.toLowerCase()
|
|
1402
|
+
.replace(/`[^`]*`/g, ' ')
|
|
1403
|
+
.replace(/\[[\w-]+\]/g, ' ')
|
|
1404
|
+
.replace(/[^a-z0-9]+/g, ' ')
|
|
1405
|
+
.trim()
|
|
1406
|
+
.replace(/\s+/g, ' ');
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1409
|
+
/**
|
|
1410
|
+
* A word-boundary-truncated normalized prefix of the task title, used to find
|
|
1411
|
+
* the task's TODO bullet and journal receipts without exact-string fragility.
|
|
1412
|
+
*/
|
|
1413
|
+
function taskMatchNeedle(taskTitle, maxLen = 60) {
|
|
1414
|
+
const norm = normalizeForMatch(taskTitle);
|
|
1415
|
+
if (!norm) return '';
|
|
1416
|
+
if (norm.length <= maxLen) return norm;
|
|
1417
|
+
return norm.slice(0, maxLen).replace(/\s+\S*$/, '');
|
|
1418
|
+
}
|
|
1419
|
+
|
|
1420
|
+
function gitHeadAt(dir) {
|
|
1421
|
+
try {
|
|
1422
|
+
return execSync('git rev-parse HEAD', { cwd: dir, stdio: ['ignore', 'pipe', 'pipe'], encoding: 'utf8' }).trim();
|
|
1423
|
+
} catch {
|
|
1424
|
+
return null;
|
|
1425
|
+
}
|
|
1426
|
+
}
|
|
1427
|
+
|
|
1428
|
+
/**
|
|
1429
|
+
* Snapshot HEAD of the workspace repo plus any sibling repos named in the
|
|
1430
|
+
* task text — both explicit `../atris-cli`-style refs (the journal convention)
|
|
1431
|
+
* and bare sibling-directory names like `atris-cli` that resolve to a git
|
|
1432
|
+
* repo next to cwd. Returns [{ label, dir, head }].
|
|
1433
|
+
*/
|
|
1434
|
+
function snapshotRepoHeads(cwd, taskText = '') {
|
|
1435
|
+
const root = path.resolve(cwd);
|
|
1436
|
+
const repos = new Map([[root, '.']]);
|
|
1437
|
+
const text = String(taskText || '');
|
|
1438
|
+
for (const ref of text.match(/\.\.\/[A-Za-z0-9._-]+/g) || []) {
|
|
1439
|
+
const dir = path.resolve(cwd, ref);
|
|
1440
|
+
if (dir !== root && fs.existsSync(path.join(dir, '.git'))) repos.set(dir, ref);
|
|
1441
|
+
}
|
|
1442
|
+
for (const tok of text.match(/[A-Za-z][A-Za-z0-9._-]{2,}/g) || []) {
|
|
1443
|
+
const dir = path.resolve(cwd, '..', tok);
|
|
1444
|
+
if (dir !== root && !repos.has(dir) && fs.existsSync(path.join(dir, '.git'))) {
|
|
1445
|
+
repos.set(dir, `../${tok}`);
|
|
1446
|
+
}
|
|
1447
|
+
}
|
|
1448
|
+
return [...repos].map(([dir, label]) => ({ label, dir, head: gitHeadAt(dir) }));
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
/**
|
|
1452
|
+
* Re-read HEADs for a prior snapshot; return the repos whose HEAD advanced
|
|
1453
|
+
* as [{ label, dir, before, after }].
|
|
1454
|
+
*/
|
|
1455
|
+
function diffAdvancedRepoHeads(snapshot) {
|
|
1456
|
+
const advanced = [];
|
|
1457
|
+
for (const repo of snapshot || []) {
|
|
1458
|
+
if (!repo || !repo.head) continue;
|
|
1459
|
+
const after = gitHeadAt(repo.dir);
|
|
1460
|
+
if (after && after !== repo.head) {
|
|
1461
|
+
advanced.push({ label: repo.label, dir: repo.dir, before: repo.head, after });
|
|
1462
|
+
}
|
|
1463
|
+
}
|
|
1464
|
+
return advanced;
|
|
1465
|
+
}
|
|
1466
|
+
|
|
1467
|
+
/**
|
|
1468
|
+
* The T31-typed do-phase timeout message thrown by executePhaseDetailed.
|
|
1469
|
+
* Plan/review timeouts stay human halts — only the do phase commits work
|
|
1470
|
+
* worth reconciling.
|
|
1471
|
+
*/
|
|
1472
|
+
function isDoPhaseTimeoutMessage(message) {
|
|
1473
|
+
return /\bdo phase timed out after\b/.test(String(message || ''));
|
|
1474
|
+
}
|
|
1475
|
+
|
|
1476
|
+
/**
|
|
1477
|
+
* Mark the task's TODO bullet `[x]`. Matches the first un-checked,
|
|
1478
|
+
* un-struck bullet whose normalized text contains the normalized title
|
|
1479
|
+
* prefix; `- **T33:** …` becomes `- [x] **T33:** …`, `- [ ]` becomes `- [x]`.
|
|
1480
|
+
* Returns true if a bullet was marked.
|
|
1481
|
+
*/
|
|
1482
|
+
function markTodoBulletDone(cwd, taskTitle) {
|
|
1483
|
+
const needle = taskMatchNeedle(taskTitle);
|
|
1484
|
+
if (!needle) return false;
|
|
1485
|
+
for (const name of ['TODO.md', 'todo.md']) {
|
|
1486
|
+
const todoPath = path.join(cwd, 'atris', name);
|
|
1487
|
+
if (!fs.existsSync(todoPath)) continue;
|
|
1488
|
+
const lines = fs.readFileSync(todoPath, 'utf8').split('\n');
|
|
1489
|
+
for (let i = 0; i < lines.length; i++) {
|
|
1490
|
+
const bullet = lines[i].match(/^(\s*)- (?:\[( |x)\]\s+)?(.*)$/);
|
|
1491
|
+
if (!bullet) continue;
|
|
1492
|
+
if (bullet[2] === 'x') continue;
|
|
1493
|
+
if (bullet[3].startsWith('~~')) continue;
|
|
1494
|
+
if (!normalizeForMatch(lines[i]).includes(needle)) continue;
|
|
1495
|
+
lines[i] = `${bullet[1]}- [x] ${bullet[3]}`;
|
|
1496
|
+
fs.writeFileSync(todoPath, lines.join('\n'));
|
|
1497
|
+
return true;
|
|
1498
|
+
}
|
|
1499
|
+
return false;
|
|
1500
|
+
}
|
|
1501
|
+
return false;
|
|
1502
|
+
}
|
|
1503
|
+
|
|
1504
|
+
/**
|
|
1505
|
+
* Append a block under today's journal `## Notes`, creating the journal file
|
|
1506
|
+
* if the tick dies before any other writer got to it. Never throws.
|
|
1507
|
+
*/
|
|
1508
|
+
function appendUnderNotes(cwd, block) {
|
|
1509
|
+
try {
|
|
1510
|
+
const { logFile, dateFormatted } = todayJournalPath(cwd);
|
|
1511
|
+
if (!fs.existsSync(logFile)) {
|
|
1512
|
+
const dir = path.dirname(logFile);
|
|
1513
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
1514
|
+
createLogFile(logFile, dateFormatted);
|
|
1515
|
+
}
|
|
1516
|
+
let content = fs.readFileSync(logFile, 'utf8');
|
|
1517
|
+
const notesIdx = content.indexOf('## Notes');
|
|
1518
|
+
if (notesIdx === -1) {
|
|
1519
|
+
content = content.replace(/\s*$/, '') + `\n\n## Notes\n${block}\n`;
|
|
1520
|
+
} else {
|
|
1521
|
+
const eol = content.indexOf('\n', notesIdx);
|
|
1522
|
+
content = content.slice(0, eol + 1) + block + content.slice(eol + 1);
|
|
1523
|
+
}
|
|
1524
|
+
fs.writeFileSync(logFile, content);
|
|
1525
|
+
return true;
|
|
1526
|
+
} catch {
|
|
1527
|
+
return false;
|
|
1528
|
+
}
|
|
1529
|
+
}
|
|
1530
|
+
|
|
1531
|
+
function appendTimeoutReconciliation(cwd, { task, advanced }) {
|
|
1532
|
+
const now = new Date().toISOString().slice(0, 16).replace('T', ' ');
|
|
1533
|
+
const repoLines = (advanced || [])
|
|
1534
|
+
.map((r) => `- ${r.label}: ${String(r.before).slice(0, 7)} → ${String(r.after).slice(0, 7)}`)
|
|
1535
|
+
.join('\n');
|
|
1536
|
+
const block =
|
|
1537
|
+
`\n### Timeout reconciliation — ${now} — work-landed-receipt-died\n\n` +
|
|
1538
|
+
`**Task:** ${task}\n` +
|
|
1539
|
+
`**What happened:** the do-phase wall killed the reporter, but commits landed:\n` +
|
|
1540
|
+
`${repoLines}\n` +
|
|
1541
|
+
`Receipt auto-written and the TODO bullet marked; no human halt required.\n`;
|
|
1542
|
+
return appendUnderNotes(cwd, block);
|
|
1543
|
+
}
|
|
1544
|
+
|
|
1545
|
+
function appendCheckAndAdvance(cwd, task, receiptLine) {
|
|
1546
|
+
const now = new Date().toISOString().slice(0, 16).replace('T', ' ');
|
|
1547
|
+
const block =
|
|
1548
|
+
`\n### Check-and-advance — ${now} — advanced-already-done\n\n` +
|
|
1549
|
+
`**Task:** ${task}\n` +
|
|
1550
|
+
`**What happened:** verify passed before work started AND today's journal already carries a completion receipt — the work shipped on a prior tick whose reporter died before bookkeeping. Bullet marked, picker advanced.\n` +
|
|
1551
|
+
`**Receipt:** ${receiptLine}\n`;
|
|
1552
|
+
return appendUnderNotes(cwd, block);
|
|
1553
|
+
}
|
|
1554
|
+
|
|
1555
|
+
/**
|
|
1556
|
+
* Scan today's journal for a completion receipt naming the task: a `C#`
|
|
1557
|
+
* completed line, a timeout-reconciliation entry, or a `**Task:**` line.
|
|
1558
|
+
* Returns the matching line, or null.
|
|
1559
|
+
*/
|
|
1560
|
+
function findCompletionReceipt(cwd, taskTitle) {
|
|
1561
|
+
const { logFile } = todayJournalPath(cwd);
|
|
1562
|
+
if (!fs.existsSync(logFile)) return null;
|
|
1563
|
+
const needle = taskMatchNeedle(taskTitle);
|
|
1564
|
+
if (!needle) return null;
|
|
1565
|
+
for (const line of fs.readFileSync(logFile, 'utf8').split('\n')) {
|
|
1566
|
+
const receiptShaped =
|
|
1567
|
+
/\*\*C\d+:\*\*/.test(line) || /\*\*Task:\*\*/.test(line) || /reconciliation/i.test(line);
|
|
1568
|
+
if (receiptShaped && normalizeForMatch(line).includes(needle)) return line.trim();
|
|
1569
|
+
}
|
|
1570
|
+
return null;
|
|
1571
|
+
}
|
|
1572
|
+
|
|
1573
|
+
/**
|
|
1574
|
+
* After a do-phase timeout: diff the pre-tick HEAD snapshot. If commits
|
|
1575
|
+
* landed, write the journal reconciliation receipt, mark the TODO bullet, and
|
|
1576
|
+
* report outcome `work-landed-receipt-died`. If nothing landed, the caller
|
|
1577
|
+
* halts exactly as before.
|
|
1578
|
+
*/
|
|
1579
|
+
function reconcileTimedOutTick(cwd, snapshot, taskTitle) {
|
|
1580
|
+
const advanced = diffAdvancedRepoHeads(snapshot);
|
|
1581
|
+
if (advanced.length === 0) return { reconciled: false, advanced: [] };
|
|
1582
|
+
appendTimeoutReconciliation(cwd, { task: taskTitle, advanced });
|
|
1583
|
+
const bulletMarked = markTodoBulletDone(cwd, taskTitle);
|
|
1584
|
+
return { reconciled: true, outcome: 'work-landed-receipt-died', advanced, bulletMarked };
|
|
1585
|
+
}
|
|
1586
|
+
|
|
1261
1587
|
function runTaskOnce(context, options = {}) {
|
|
1262
1588
|
const { verbose = false, cwd = process.cwd() } = options;
|
|
1263
1589
|
|
|
@@ -1318,6 +1644,25 @@ function runTaskOnce(context, options = {}) {
|
|
|
1318
1644
|
if (!skipFalsifiability && verifyResult.explicit && context.kind === 'endgame' && verifyCmd) {
|
|
1319
1645
|
try {
|
|
1320
1646
|
execSync(verifyCmd, { cwd, stdio: 'pipe', timeout: 300000 });
|
|
1647
|
+
// T33b (lesson: tick-must-mark-own-bullet): a pre-work verify pass WITH
|
|
1648
|
+
// a completion receipt already in today's journal means the work shipped
|
|
1649
|
+
// but the reporter died before bookkeeping. Check the bullet and advance
|
|
1650
|
+
// instead of wedging the picker on verify-not-falsifiable.
|
|
1651
|
+
const receipt = findCompletionReceipt(cwd, context.task);
|
|
1652
|
+
if (receipt) {
|
|
1653
|
+
const bulletMarked = markTodoBulletDone(cwd, context.task);
|
|
1654
|
+
appendCheckAndAdvance(cwd, context.task, receipt);
|
|
1655
|
+
return {
|
|
1656
|
+
outcome: 'advanced-already-done',
|
|
1657
|
+
reason: 'advanced-already-done',
|
|
1658
|
+
receipt,
|
|
1659
|
+
bulletMarked,
|
|
1660
|
+
phaseResults: {},
|
|
1661
|
+
elapsedSeconds: 0,
|
|
1662
|
+
verifyRan: true,
|
|
1663
|
+
verifyPass: true,
|
|
1664
|
+
};
|
|
1665
|
+
}
|
|
1321
1666
|
writeLesson(cwd, 'verify-not-falsifiable', 'fail',
|
|
1322
1667
|
`Verify \`${verifyCmd}\` passed before work started on "${context.task}". Either the rubric is trivial or the task is already done. Tick halted.`);
|
|
1323
1668
|
return {
|
|
@@ -1436,7 +1781,7 @@ function runTaskOnce(context, options = {}) {
|
|
|
1436
1781
|
elapsedSeconds: verifyTime,
|
|
1437
1782
|
};
|
|
1438
1783
|
try {
|
|
1439
|
-
const slug = (context.task
|
|
1784
|
+
const slug = lessonSlug(context.task);
|
|
1440
1785
|
writeLesson(cwd, `verify-fail-${slug}`, 'fail', `Verify command \`${verifyCmd}\` failed: ${e.message.split('\n')[0]}`);
|
|
1441
1786
|
} catch { /* lesson write must not crash the tick */ }
|
|
1442
1787
|
}
|
|
@@ -2540,7 +2885,7 @@ Reply with the JSON array and nothing else.`;
|
|
|
2540
2885
|
const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')"`;
|
|
2541
2886
|
const env = { ...process.env };
|
|
2542
2887
|
delete env.CLAUDECODE;
|
|
2543
|
-
output =
|
|
2888
|
+
output = execPhaseCommandSync(cmd, {
|
|
2544
2889
|
cwd,
|
|
2545
2890
|
encoding: 'utf8',
|
|
2546
2891
|
timeout: PHASE_TIMEOUT,
|
|
@@ -2548,6 +2893,11 @@ Reply with the JSON array and nothing else.`;
|
|
|
2548
2893
|
maxBuffer: 10 * 1024 * 1024,
|
|
2549
2894
|
env
|
|
2550
2895
|
}).toString();
|
|
2896
|
+
} catch (err) {
|
|
2897
|
+
if (isPhaseTimeoutError(err)) {
|
|
2898
|
+
throw new Error(`horizon-proposal phase timed out after ${PHASE_TIMEOUT / 1000}s`);
|
|
2899
|
+
}
|
|
2900
|
+
throw err;
|
|
2551
2901
|
} finally {
|
|
2552
2902
|
try { fs.unlinkSync(tmpFile); } catch {}
|
|
2553
2903
|
}
|
|
@@ -2835,6 +3185,17 @@ async function autopilotAtris(description, options = {}) {
|
|
|
2835
3185
|
};
|
|
2836
3186
|
const startingEndgame = readEndgameState(cwd);
|
|
2837
3187
|
|
|
3188
|
+
// T33a: snapshot pre-tick HEADs (cwd + sibling repos named in the task)
|
|
3189
|
+
// so a do-phase timeout can be reconciled against what actually landed.
|
|
3190
|
+
let preTickHeads = null;
|
|
3191
|
+
try {
|
|
3192
|
+
const verifyHint = getVerifyCommand(cwd, suggestion.task).cmd || '';
|
|
3193
|
+
preTickHeads = snapshotRepoHeads(
|
|
3194
|
+
cwd,
|
|
3195
|
+
[suggestion.task, ...(suggestion.files || []), verifyHint].join(' ')
|
|
3196
|
+
);
|
|
3197
|
+
} catch { /* snapshot failure must not block the tick */ }
|
|
3198
|
+
|
|
2838
3199
|
try {
|
|
2839
3200
|
if (verbose) {
|
|
2840
3201
|
console.log('');
|
|
@@ -2868,6 +3229,26 @@ async function autopilotAtris(description, options = {}) {
|
|
|
2868
3229
|
break;
|
|
2869
3230
|
}
|
|
2870
3231
|
|
|
3232
|
+
// T33b: the falsifiability gate found a completion receipt — the work
|
|
3233
|
+
// already shipped, the bullet is checked, move straight to the next pick.
|
|
3234
|
+
if (execution.outcome === 'advanced-already-done') {
|
|
3235
|
+
completed++;
|
|
3236
|
+
tickOutcome = 'built';
|
|
3237
|
+
tickOutcomeText = `"${lastTaskTitle}" was already done — verify passed pre-work and today's journal carries its completion receipt, so I checked the bullet and advanced.`;
|
|
3238
|
+
tickNextStep = 'pick the next endgame task';
|
|
3239
|
+
if (verbose) {
|
|
3240
|
+
console.log(' already done (journal receipt found). bullet checked, advancing.');
|
|
3241
|
+
} else {
|
|
3242
|
+
printPlainBlock([
|
|
3243
|
+
'That task was already done — verify passed before work and a completion receipt exists in today\'s journal.',
|
|
3244
|
+
'I checked the bullet and advanced.',
|
|
3245
|
+
'',
|
|
3246
|
+
'Next I will look for the next task.'
|
|
3247
|
+
].join('\n'));
|
|
3248
|
+
}
|
|
3249
|
+
continue;
|
|
3250
|
+
}
|
|
3251
|
+
|
|
2871
3252
|
const planTime = execution.phaseResults.plan.elapsedSeconds;
|
|
2872
3253
|
if (verbose) console.log(` planned (${planTime}s)`);
|
|
2873
3254
|
|
|
@@ -2929,7 +3310,7 @@ async function autopilotAtris(description, options = {}) {
|
|
|
2929
3310
|
// Record commit hash + verify command for retroactive regression checks
|
|
2930
3311
|
try {
|
|
2931
3312
|
const commitHash = execSync('git rev-parse HEAD', { cwd, encoding: 'utf8' }).trim();
|
|
2932
|
-
const taskSlug = (suggestion.task
|
|
3313
|
+
const taskSlug = lessonSlug(suggestion.task);
|
|
2933
3314
|
recordTickCommit(cwd, commitHash, execution.verifyCmd || '', taskSlug);
|
|
2934
3315
|
|
|
2935
3316
|
// Every 10th tick, run retroactive regression check
|
|
@@ -2976,6 +3357,36 @@ async function autopilotAtris(description, options = {}) {
|
|
|
2976
3357
|
}
|
|
2977
3358
|
|
|
2978
3359
|
} catch (err) {
|
|
3360
|
+
// T33a: a do-phase timeout with commits landed is a dead reporter, not
|
|
3361
|
+
// dead work — write the reconciliation receipt, mark the bullet, and
|
|
3362
|
+
// record work-landed-receipt-died instead of halting for a human.
|
|
3363
|
+
let reconciliation = null;
|
|
3364
|
+
if (isDoPhaseTimeoutMessage(err.message)) {
|
|
3365
|
+
try {
|
|
3366
|
+
reconciliation = reconcileTimedOutTick(cwd, preTickHeads, lastTaskTitle || suggestion.task);
|
|
3367
|
+
} catch { reconciliation = null; }
|
|
3368
|
+
}
|
|
3369
|
+
if (reconciliation && reconciliation.reconciled) {
|
|
3370
|
+
completed++;
|
|
3371
|
+
const landed = reconciliation.advanced
|
|
3372
|
+
.map((r) => `${r.label} ${String(r.before).slice(0, 7)} → ${String(r.after).slice(0, 7)}`)
|
|
3373
|
+
.join(', ');
|
|
3374
|
+
tickOutcome = 'work-landed-receipt-died';
|
|
3375
|
+
tickOutcomeText = `"${lastTaskTitle}" hit the do-phase wall but commits landed (${landed}). I wrote the reconciliation receipt and marked the bullet — work-landed-receipt-died, no human halt.`;
|
|
3376
|
+
tickNextStep = 'pick the next task';
|
|
3377
|
+
if (verbose) {
|
|
3378
|
+
console.log(` do phase timed out, but work landed (${landed}). reconciled — no human halt.`);
|
|
3379
|
+
} else {
|
|
3380
|
+
printPlainBlock([
|
|
3381
|
+
'The do phase timed out, but commits landed before the wall.',
|
|
3382
|
+
`Landed: ${landed}.`,
|
|
3383
|
+
'I wrote the reconciliation receipt and marked the task bullet.',
|
|
3384
|
+
'',
|
|
3385
|
+
'Next tick will pick the next task.'
|
|
3386
|
+
].join('\n'));
|
|
3387
|
+
}
|
|
3388
|
+
break;
|
|
3389
|
+
}
|
|
2979
3390
|
tickOutcome = 'halted';
|
|
2980
3391
|
tickOutcomeText = `I hit an error while running "${lastTaskTitle || 'a task'}": ${err.message}`;
|
|
2981
3392
|
tickNextStep = 'stop until a human looks at the error';
|
|
@@ -3159,7 +3570,7 @@ Search the codebase to verify. Reply: YES <reason> or NO <reason>`;
|
|
|
3159
3570
|
const env = { ...process.env };
|
|
3160
3571
|
delete env.CLAUDECODE;
|
|
3161
3572
|
const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Glob,Grep"`;
|
|
3162
|
-
const output =
|
|
3573
|
+
const output = execPhaseCommandSync(cmd, {
|
|
3163
3574
|
cwd,
|
|
3164
3575
|
encoding: 'utf8',
|
|
3165
3576
|
timeout: 60000,
|
|
@@ -3192,6 +3603,13 @@ async function autopilotFromTodo(options = {}) {
|
|
|
3192
3603
|
|
|
3193
3604
|
module.exports = {
|
|
3194
3605
|
appendTickSummary,
|
|
3606
|
+
snapshotRepoHeads,
|
|
3607
|
+
diffAdvancedRepoHeads,
|
|
3608
|
+
reconcileTimedOutTick,
|
|
3609
|
+
markTodoBulletDone,
|
|
3610
|
+
findCompletionReceipt,
|
|
3611
|
+
isDoPhaseTimeoutMessage,
|
|
3612
|
+
validateVerifyCommandShape,
|
|
3195
3613
|
askHuman,
|
|
3196
3614
|
askModel,
|
|
3197
3615
|
autopilotAtris,
|
|
@@ -3233,5 +3651,9 @@ module.exports = {
|
|
|
3233
3651
|
scoreEndgameCandidates,
|
|
3234
3652
|
suggestNextTask,
|
|
3235
3653
|
shouldSkipAutoHumanGate,
|
|
3236
|
-
writeLesson
|
|
3654
|
+
writeLesson,
|
|
3655
|
+
isPhaseTimeoutError,
|
|
3656
|
+
execPhaseCommandSync,
|
|
3657
|
+
executePhaseDetailed,
|
|
3658
|
+
lessonSlug
|
|
3237
3659
|
};
|