atris 3.0.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,11 +8,18 @@
8
8
 
9
9
  const fs = require('fs');
10
10
  const path = require('path');
11
- const { execSync } = require('child_process');
11
+ const { execSync, execFileSync } = require('child_process');
12
12
  const readline = require('readline');
13
13
  const { getLogPath, ensureLogDirectory, createLogFile } = require('../lib/journal');
14
14
  const { parseTodo } = require('../lib/todo');
15
15
  const { findStalePages, findStaleTasks, healBrokenMapRefs } = require('./clean');
16
+ const {
17
+ buildScorecardData,
18
+ readScorecards,
19
+ writeScorecard,
20
+ detectEndgameCompletion
21
+ } = require('../lib/scorecard');
22
+ const { REWARD_CONFIG, REWARD_CHECKSUM } = require('../lib/reward-config');
16
23
 
17
24
  const pkg = require('../package.json');
18
25
 
@@ -22,7 +29,7 @@ const PHASE_TIMEOUT = 600000; // 10 min per phase
22
29
  * Scan workspace for the next thing worth doing.
23
30
  * Returns { task, why, kind } or null.
24
31
  */
25
- async function suggestNextTask(cwd, skipped = new Set()) {
32
+ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {}) {
26
33
  const atrisDir = path.join(cwd, 'atris');
27
34
  const suggestions = [];
28
35
 
@@ -31,6 +38,7 @@ async function suggestNextTask(cwd, skipped = new Set()) {
31
38
  const todo = parseTodo(todoPath);
32
39
 
33
40
  for (const t of todo.backlog) {
41
+ if (t.tags && t.tags.includes('unverified')) continue;
34
42
  if (t.tag === 'endgame' && !skipped.has(t.title)) {
35
43
  suggestions.push({
36
44
  task: t.title,
@@ -45,7 +53,7 @@ async function suggestNextTask(cwd, skipped = new Set()) {
45
53
  // --- Resume interrupted work ---
46
54
  if (todo.inProgress.length > 0) {
47
55
  const t = todo.inProgress[0];
48
- if (!skipped.has(t.title)) {
56
+ if (!(t.tags && t.tags.includes('unverified')) && !skipped.has(t.title)) {
49
57
  suggestions.push({
50
58
  task: t.title,
51
59
  why: `This was already started${t.claimed ? ` by ${t.claimed}` : ''} but never finished.`,
@@ -96,15 +104,17 @@ async function suggestNextTask(cwd, skipped = new Set()) {
96
104
  }
97
105
 
98
106
  // --- Backlog tasks ---
99
- for (const t of todo.backlog.slice(0, 1)) {
107
+ for (const t of todo.backlog) {
108
+ if (t.tags && t.tags.includes('unverified')) continue;
100
109
  if (skipped.has(t.title)) continue;
101
- const remaining = todo.backlog.length;
110
+ const remaining = todo.backlog.filter(b => !(b.tags && b.tags.includes('unverified'))).length;
102
111
  suggestions.push({
103
112
  task: t.title,
104
113
  why: `Next in the backlog${t.tag ? ` (${t.tag})` : ''}. ${remaining} task${remaining > 1 ? 's' : ''} waiting.`,
105
114
  kind: 'backlog',
106
115
  priority: 5
107
116
  });
117
+ break;
108
118
  }
109
119
 
110
120
  // --- Unprocessed inbox items ---
@@ -204,7 +214,7 @@ async function suggestNextTask(cwd, skipped = new Set()) {
204
214
  if (suggestions.length === 0) {
205
215
  try {
206
216
  const candidates = await proposeCandidateHorizons(cwd);
207
- const top = candidates.reduce((best, c) => (c.confidence > best.confidence ? c : best), candidates[0]);
217
+ const top = scoreEndgameCandidates(cwd, candidates);
208
218
  return {
209
219
  task: top.title,
210
220
  why: top.rationale,
@@ -217,7 +227,64 @@ async function suggestNextTask(cwd, skipped = new Set()) {
217
227
  }
218
228
 
219
229
  suggestions.sort((a, b) => a.priority - b.priority);
220
- return suggestions[0];
230
+
231
+ // Staleness gate: filter out unverified/stale suggestions
232
+ const staleSkipped = [];
233
+ const fresh = [];
234
+ for (const s of suggestions) {
235
+ const fakeTask = { title: s.task, tag: s.kind === 'endgame' ? 'endgame' : null, claimed: null };
236
+ if (s.kind === 'resume' && todo.inProgress.length > 0) {
237
+ fakeTask.claimed = todo.inProgress[0].claimed;
238
+ }
239
+ const age = getTaskAgeDays(fakeTask, todoPath);
240
+ const status = isStillTrue({ title: s.task, age, source: null }, cwd);
241
+ if (status === 'stale') {
242
+ staleSkipped.push({ task: s.task, status, reasoning: null });
243
+ continue;
244
+ }
245
+ if (status === 'unverified') {
246
+ if (auto) {
247
+ // Auto mode: use model check
248
+ const result = askModel({ title: s.task, age, source: null }, cwd);
249
+ if (!result.fresh) {
250
+ staleSkipped.push({ task: s.task, status: 'unverified (model: not fresh)', reasoning: result.reasoning });
251
+ continue;
252
+ }
253
+ } else {
254
+ // Interactive mode: ask the human
255
+ const result = await askHuman(s.task);
256
+ if (!result.fresh) {
257
+ staleSkipped.push({ task: s.task, status: 'unverified (human: not relevant)', reasoning: null });
258
+ continue;
259
+ }
260
+ }
261
+ }
262
+ fresh.push(s);
263
+ }
264
+
265
+ // Log skipped items to journal
266
+ if (staleSkipped.length > 0) {
267
+ try {
268
+ const { logFile } = getLogPath();
269
+ const now = new Date();
270
+ const hhmm = `${String(now.getHours()).padStart(2, '0')}:${String(now.getMinutes()).padStart(2, '0')}`;
271
+ const lines = staleSkipped.map(s => `- ${s.task} (${s.status})${s.reasoning ? ` — ${s.reasoning}` : ''}`);
272
+ const note = `\n### Staleness skip — ${hhmm}\n${lines.join('\n')}\n`;
273
+ if (fs.existsSync(logFile)) {
274
+ const content = fs.readFileSync(logFile, 'utf8');
275
+ const notesIdx = content.indexOf('## Notes');
276
+ if (notesIdx !== -1) {
277
+ const insertAt = content.indexOf('\n', notesIdx) + 1;
278
+ const updated = content.slice(0, insertAt) + note + content.slice(insertAt);
279
+ fs.writeFileSync(logFile, updated);
280
+ } else {
281
+ fs.appendFileSync(logFile, `\n## Notes\n${note}`);
282
+ }
283
+ }
284
+ } catch {}
285
+ }
286
+
287
+ return fresh[0] || null;
221
288
  }
222
289
 
223
290
  /**
@@ -236,6 +303,22 @@ function askApproval() {
236
303
  });
237
304
  }
238
305
 
306
+ /**
307
+ * Ask the human whether an unverified task is still relevant.
308
+ * Interactive mode only — in auto mode, caller skips silently.
309
+ * Returns { fresh: boolean }.
310
+ */
311
+ function askHuman(taskTitle) {
312
+ return new Promise((resolve) => {
313
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
314
+ rl.question(` is "${taskTitle}" still relevant? y/n → `, (answer) => {
315
+ rl.close();
316
+ const a = (answer || '').trim().toLowerCase();
317
+ resolve({ fresh: a === 'y' || a === 'yes' });
318
+ });
319
+ });
320
+ }
321
+
239
322
  /**
240
323
  * Run a phase via claude -p subprocess.
241
324
  */
@@ -525,10 +608,165 @@ If broken beyond quick fix, reply: failed — [reason].`;
525
608
  return '';
526
609
  }
527
610
 
611
+ /**
612
+ * Write a lesson to atris/lessons.md
613
+ * Appends a line in format: - **[YYYY-MM-DD] slug** — pass/fail — explanation
614
+ */
615
+ function writeLesson(cwd, slug, status, explanation) {
616
+ const lessonsPath = path.join(cwd, 'atris', 'lessons.md');
617
+ const today = new Date().toISOString().split('T')[0]; // YYYY-MM-DD
618
+ const lessonLine = `- **[${today}] ${slug}** — ${status} — ${explanation}`;
619
+
620
+ if (!fs.existsSync(lessonsPath)) {
621
+ fs.writeFileSync(lessonsPath, `# lessons.md — What We Learned\n\n> Append-only. One line per lesson.\n\n---\n\n${lessonLine}\n`);
622
+ return;
623
+ }
624
+
625
+ let content = fs.readFileSync(lessonsPath, 'utf8');
626
+ // Append after the --- separator
627
+ if (content.includes('---\n')) {
628
+ content = content.replace(/---\n/, `---\n\n${lessonLine}\n`);
629
+ } else {
630
+ content += `\n${lessonLine}\n`;
631
+ }
632
+ fs.writeFileSync(lessonsPath, content);
633
+ }
634
+
635
+ /**
636
+ * Record a tick's commit hash and verify command in atris/tick-registry.json.
637
+ * Each entry: { hash, verifyCmd, slug, timestamp }.
638
+ */
639
+ function recordTickCommit(cwd, hash, verifyCmd, slug) {
640
+ const registryPath = path.join(cwd, 'atris', 'tick-registry.json');
641
+ let registry = [];
642
+ if (fs.existsSync(registryPath)) {
643
+ try { registry = JSON.parse(fs.readFileSync(registryPath, 'utf8')); } catch { registry = []; }
644
+ }
645
+ registry.push({ hash, verifyCmd, slug, timestamp: new Date().toISOString() });
646
+ fs.writeFileSync(registryPath, JSON.stringify(registry, null, 2) + '\n');
647
+ }
648
+
649
+ /**
650
+ * Retroactive regression check. Reads last 10 entries from tick-registry.json,
651
+ * re-runs each verify command at its original commit using git worktree,
652
+ * returns array of { hash, slug, pass }. On failure: writes a lesson with
653
+ * retroactive context.
654
+ */
655
+ function regressionCheck(cwd) {
656
+ const registryPath = path.join(cwd, 'atris', 'tick-registry.json');
657
+ if (!fs.existsSync(registryPath)) return [];
658
+
659
+ let registry = [];
660
+ try { registry = JSON.parse(fs.readFileSync(registryPath, 'utf8')); } catch { return []; }
661
+ if (!Array.isArray(registry) || registry.length === 0) return [];
662
+
663
+ const entries = registry.slice(-10);
664
+ const results = [];
665
+
666
+ for (const entry of entries) {
667
+ if (!entry.hash || !entry.verifyCmd) {
668
+ results.push({ hash: entry.hash, slug: entry.slug, pass: true, skipped: true });
669
+ continue;
670
+ }
671
+
672
+ const worktreePath = path.join(cwd, '.regression-worktree-' + entry.hash.slice(0, 8));
673
+ let pass = false;
674
+ try {
675
+ // Create a worktree at the commit
676
+ execSync(`git worktree add "${worktreePath}" ${entry.hash} --detach 2>/dev/null`, { cwd, stdio: 'pipe' });
677
+ try {
678
+ execSync(entry.verifyCmd, { cwd: worktreePath, stdio: 'pipe', timeout: 60000 });
679
+ pass = true;
680
+ } catch {
681
+ pass = false;
682
+ }
683
+ } catch {
684
+ // If worktree creation fails (e.g., commit doesn't exist), skip
685
+ results.push({ hash: entry.hash, slug: entry.slug, pass: true, skipped: true });
686
+ continue;
687
+ } finally {
688
+ // Clean up worktree
689
+ try { execSync(`git worktree remove "${worktreePath}" --force 2>/dev/null`, { cwd, stdio: 'pipe' }); } catch {}
690
+ }
691
+
692
+ if (!pass) {
693
+ writeLesson(cwd, `regression-${entry.slug || 'unknown'}`, 'fail',
694
+ `Retroactive regression: verify command for tick ${entry.hash.slice(0, 7)} (${entry.slug}) now fails. -5 retroactive penalty applied.`);
695
+ }
696
+
697
+ results.push({ hash: entry.hash, slug: entry.slug, pass });
698
+ }
699
+
700
+ return results;
701
+ }
702
+
703
+ /**
704
+ * Get the verify command for a task from TODO.md
705
+ * Reads TODO.md, finds the task by title across active/completed sections,
706
+ * and extracts the verify field.
707
+ * Returns { cmd, explicit } — explicit is true only if the task has an explicit Verify field.
708
+ */
709
+ function getVerifyCommand(cwd, taskTitle) {
710
+ const todoPath = path.join(cwd, 'atris', 'TODO.md');
711
+ if (!fs.existsSync(todoPath)) return { cmd: null, explicit: false };
712
+
713
+ const todo = parseTodo(todoPath);
714
+ const task = [...todo.inProgress, ...todo.backlog, ...todo.completed]
715
+ .find(t => t.title === taskTitle);
716
+
717
+ if (!task || !task.verify) return { cmd: null, explicit: false };
718
+ return { cmd: task.verify, explicit: true };
719
+ }
720
+
721
+ /**
722
+ * Verify that computeTickReward has not been modified since ship time.
723
+ * Returns { ok, expected, actual }.
724
+ */
725
+ function verifyJudgeIntegrity() {
726
+ const crypto = require('crypto');
727
+ const h = crypto.createHash('sha256');
728
+ h.update(JSON.stringify(REWARD_CONFIG));
729
+ h.update(computeTickReward.toString());
730
+ const actual = h.digest('hex');
731
+ return { ok: actual === REWARD_CHECKSUM, expected: REWARD_CHECKSUM, actual };
732
+ }
733
+
528
734
  function runTaskOnce(context, options = {}) {
529
- const { verbose = false } = options;
735
+ const { verbose = false, cwd = process.cwd() } = options;
736
+
737
+ // Judge integrity check — halt if computeTickReward was tampered with
738
+ const integrity = verifyJudgeIntegrity();
739
+ if (!integrity.ok) {
740
+ writeLesson(cwd, 'judge-corruption', 'fail',
741
+ `computeTickReward checksum mismatch. Expected ${integrity.expected}, got ${integrity.actual}. Tick halted.`);
742
+ return {
743
+ outcome: 'halted',
744
+ reason: 'judge-corruption',
745
+ phaseResults: {},
746
+ elapsedSeconds: 0,
747
+ verifyRan: false,
748
+ verifyPass: false,
749
+ };
750
+ }
751
+
530
752
  const phaseResults = {};
531
753
  const startedAt = Date.now();
754
+ const verifyResult = getVerifyCommand(cwd, context.task);
755
+ const verifyCmd = verifyResult.cmd;
756
+
757
+ // Guard: refuse to run ticks without an explicit Verify field
758
+ if (!verifyResult.explicit) {
759
+ writeLesson(cwd, 'no-verify-field', 'fail',
760
+ `Task "${context.task}" has no explicit **Verify:** field in TODO.md. Tick halted — every task must declare how to verify it.`);
761
+ return {
762
+ outcome: 'halted',
763
+ reason: 'no-verify-field',
764
+ phaseResults: {},
765
+ elapsedSeconds: 0,
766
+ verifyRan: false,
767
+ verifyPass: false,
768
+ };
769
+ }
532
770
 
533
771
  for (const phase of ['plan', 'do', 'review']) {
534
772
  const t0 = Date.now();
@@ -542,11 +780,41 @@ function runTaskOnce(context, options = {}) {
542
780
 
543
781
  const reviewOutput = phaseResults.review.output || '';
544
782
 
783
+ // After review succeeds, run verify command if present
784
+ let verifyPass = false;
785
+ let verifyRan = false;
786
+ if (verifyCmd) {
787
+ verifyRan = true;
788
+ let t0 = Date.now();
789
+ try {
790
+ execSync(verifyCmd, { cwd, stdio: 'pipe' });
791
+ verifyPass = true;
792
+ const verifyTime = Math.round((Date.now() - t0) / 1000);
793
+ phaseResults.verify = {
794
+ output: `Verify passed (${verifyTime}s)`,
795
+ elapsedSeconds: verifyTime,
796
+ };
797
+ } catch (e) {
798
+ const verifyTime = Math.round((Date.now() - t0) / 1000);
799
+ phaseResults.verify = {
800
+ output: `Verify failed: ${e.message}`,
801
+ elapsedSeconds: verifyTime,
802
+ };
803
+ try {
804
+ const slug = (context.task || 'unknown').replace(/\s+/g, '-').toLowerCase().slice(0, 40);
805
+ writeLesson(cwd, `verify-fail-${slug}`, 'fail', `Verify command \`${verifyCmd}\` failed: ${e.message.split('\n')[0]}`);
806
+ } catch { /* lesson write must not crash the tick */ }
807
+ }
808
+ }
809
+
545
810
  return {
546
- success: !reviewOutput.includes('failed'),
811
+ success: verifyRan && verifyPass,
547
812
  elapsedSeconds: Math.round((Date.now() - startedAt) / 1000),
548
813
  phaseResults,
549
814
  reviewOutput,
815
+ verifyCmd,
816
+ verifyPass,
817
+ verifyRan,
550
818
  };
551
819
  }
552
820
 
@@ -579,6 +847,47 @@ function logCompletion(description) {
579
847
  fs.writeFileSync(logFile, content);
580
848
  }
581
849
 
850
+ /**
851
+ * Compute per-tick reward score based on execution signals.
852
+ * Rewards:
853
+ * - commit landed: +1
854
+ * - verify passed: +3
855
+ * - npm test passed: +2
856
+ * - validator clean (review passed): +1
857
+ * - halt caught hallucination: -3
858
+ */
859
+ function computeTickReward(execution, tickOutcome, verifyCmd) {
860
+ let reward = 0;
861
+
862
+ // Validator clean: review passed without 'failed'
863
+ if (!execution.reviewOutput || !execution.reviewOutput.includes('failed')) {
864
+ reward += REWARD_CONFIG.REVIEW_CLEAN;
865
+ }
866
+
867
+ // Verify passed
868
+ if (execution.verifyRan && execution.verifyPass) {
869
+ reward += REWARD_CONFIG.VERIFY_PASS;
870
+ }
871
+
872
+ // npm test passed
873
+ if (execution.verifyRan && execution.verifyPass && verifyCmd === 'npm test') {
874
+ reward += REWARD_CONFIG.NPM_TEST_BONUS;
875
+ }
876
+
877
+ // Commit landed: check do phase output for git commit patterns
878
+ const doOutput = execution.phaseResults.do.output || '';
879
+ if (doOutput.match(/\[.*\s\d+\sfile.*changed/i) || doOutput.includes('git commit') || doOutput.includes('committed')) {
880
+ reward += REWARD_CONFIG.COMMIT_LANDED;
881
+ }
882
+
883
+ // Halt caught hallucination
884
+ if (tickOutcome === 'halted') {
885
+ reward += REWARD_CONFIG.HALT_PENALTY;
886
+ }
887
+
888
+ return reward;
889
+ }
890
+
582
891
  /**
583
892
  * Append a plain-language tick summary block to today's journal `## Notes`.
584
893
  * Fields:
@@ -588,9 +897,10 @@ function logCompletion(description) {
588
897
  * - nextStep: what the next tick will do
589
898
  * - idle: when true, block must contain literal "0 tasks in 0s"
590
899
  * so getIdleTickCount still works.
900
+ * - reward: optional tick reward score (from computeTickReward)
591
901
  * Safe to call inside a try/catch — a write failure must never crash a tick.
592
902
  */
593
- function appendTickSummary(cwd, { time, outcome, horizon, nextStep, idle } = {}) {
903
+ function appendTickSummary(cwd, { time, outcome, horizon, nextStep, idle, reward } = {}) {
594
904
  const now = new Date();
595
905
  const yyyy = now.getFullYear();
596
906
  const mm = String(now.getMonth() + 1).padStart(2, '0');
@@ -623,6 +933,10 @@ function appendTickSummary(cwd, { time, outcome, horizon, nextStep, idle } = {})
623
933
  ` ${horizonLine}`,
624
934
  ` ${nextLine}`,
625
935
  ];
936
+ // Add reward score if present
937
+ if (reward !== undefined && reward !== null) {
938
+ blockLines.push(` Reward: ${reward}`);
939
+ }
626
940
  // Idle marker must be the last non-empty line so getIdleTickCount, which
627
941
  // scans bottom-up, counts this block when idle=true.
628
942
  if (idleLine) blockLines.push(` ${idleLine}`);
@@ -644,18 +958,60 @@ function appendTickSummary(cwd, { time, outcome, horizon, nextStep, idle } = {})
644
958
  }
645
959
 
646
960
  /**
647
- * Read the current endgame slug from atris/TODO.md. Returns 'unset' on miss.
961
+ * Read the current endgame state from atris/TODO.md.
648
962
  */
649
- function readHorizonSlug(cwd) {
963
+ function readEndgameState(cwd) {
650
964
  try {
651
965
  const todoPath = path.join(cwd, 'atris', 'TODO.md');
652
- if (!fs.existsSync(todoPath)) return 'unset';
966
+ if (!fs.existsSync(todoPath)) {
967
+ return { slug: 'unset', pickedAt: null, horizon: '', remaining: 0, completed: 0 };
968
+ }
969
+
970
+ const todo = parseTodo(todoPath);
653
971
  const content = fs.readFileSync(todoPath, 'utf8');
654
- const match = content.match(/\*\*Slug:\*\*\s*(\S+)/);
655
- return match ? match[1].trim() : 'unset';
972
+ const endgameMatch = content.match(/##\s+Endgame\s*\n([\s\S]*?)(?=\n##|$)/);
973
+ const section = endgameMatch ? endgameMatch[1] : '';
974
+ const slugMatch = section.match(/\*\*Slug:\*\*\s*(\S+)/);
975
+ const pickedMatch = section.match(/\*\*Picked:\*\*\s*(.+)/);
976
+ const horizonMatch = section.match(/\*\*Horizon:\*\*\s*(.+)/);
977
+
978
+ return {
979
+ slug: slugMatch ? slugMatch[1].trim() : 'unset',
980
+ pickedAt: pickedMatch ? pickedMatch[1].trim() : null,
981
+ horizon: horizonMatch ? horizonMatch[1].trim() : '',
982
+ remaining: todo.backlog.filter(t => t.tag === 'endgame').length
983
+ + todo.inProgress.filter(t => t.tag === 'endgame').length,
984
+ completed: todo.completed.filter(t => t.tag === 'endgame').length,
985
+ };
656
986
  } catch {
657
- return 'unset';
987
+ return { slug: 'unset', pickedAt: null, horizon: '', remaining: 0, completed: 0 };
988
+ }
989
+ }
990
+
991
+ function readHorizonSlug(cwd) {
992
+ return readEndgameState(cwd).slug;
993
+ }
994
+
995
+ function maybeWriteCompletedEndgameScorecard(cwd, startingEndgame) {
996
+ if (!startingEndgame || startingEndgame.slug === 'unset' || startingEndgame.remaining === 0) {
997
+ return false;
658
998
  }
999
+
1000
+ const atrisDir = path.join(cwd, 'atris');
1001
+ if (!fs.existsSync(atrisDir)) return false;
1002
+
1003
+ const { complete, endgameSlug } = detectEndgameCompletion(atrisDir);
1004
+ if (!complete || endgameSlug !== startingEndgame.slug) return false;
1005
+
1006
+ const alreadyWritten = readScorecards(atrisDir).some(sc => sc.slug === endgameSlug);
1007
+ if (alreadyWritten) return false;
1008
+
1009
+ const data = buildScorecardData(atrisDir, {
1010
+ slug: endgameSlug,
1011
+ pickedAt: startingEndgame.pickedAt,
1012
+ });
1013
+ writeScorecard(atrisDir, data);
1014
+ return true;
659
1015
  }
660
1016
 
661
1017
  /**
@@ -737,24 +1093,11 @@ function getTickStatus(cwd) {
737
1093
  }
738
1094
  }
739
1095
 
740
- let slug = '(no endgame active — feed inbox or /endgame)';
741
- let horizon = '';
742
- const todoPath = path.join(atrisDir, 'TODO.md');
743
- let remaining = 0;
744
- let completedEndgame = 0;
745
- if (fs.existsSync(todoPath)) {
746
- const todoContent = fs.readFileSync(todoPath, 'utf8');
747
- const endgameMatch = todoContent.match(/##\s+Endgame\s*\n([\s\S]*?)(?=\n##|$)/);
748
- if (endgameMatch) {
749
- const slugMatch = endgameMatch[1].match(/\*\*Slug:\*\*\s*(.+)/);
750
- const horizonMatch = endgameMatch[1].match(/\*\*Horizon:\*\*\s*(.+)/);
751
- if (slugMatch) slug = slugMatch[1].trim();
752
- if (horizonMatch) horizon = horizonMatch[1].trim();
753
- }
754
- const todo = parseTodo(todoPath);
755
- remaining = todo.backlog.filter(t => t.tag === 'endgame').length;
756
- completedEndgame = todo.completed.filter(t => /^[A-Z]\d+[a-z]?[:\s]/.test((t.title || '').trim())).length;
757
- }
1096
+ const endgame = readEndgameState(cwd);
1097
+ const slug = endgame.slug === 'unset' ? '(no endgame active — feed inbox or /endgame)' : endgame.slug;
1098
+ const horizon = endgame.horizon;
1099
+ const remaining = endgame.remaining;
1100
+ const completedEndgame = endgame.completed;
758
1101
 
759
1102
  const total = remaining + completedEndgame;
760
1103
  const done = completedEndgame;
@@ -912,14 +1255,195 @@ function getRecentSignals(cwd) {
912
1255
  return { recentCommits, wikiHealth, recentLessons };
913
1256
  }
914
1257
 
1258
+ /**
1259
+ * Score endgame candidates by historical reward of similar horizon types.
1260
+ * Reads last 10 scorecards, infers type from slug prefix, calculates mean
1261
+ * reward per type, scores candidates by expected value.
1262
+ *
1263
+ * Adaptive explore rate: if the last 5 endgames are all the same type,
1264
+ * explore rate boosts to 50%. Otherwise scales between 20%-50% based on
1265
+ * type repetition in the last 5.
1266
+ *
1267
+ * Difficulty floor: candidates whose inferred type has >80% success rate
1268
+ * AND mean reward >5 are filtered out when harder candidates exist, so
1269
+ * easy wins don't starve hard work.
1270
+ *
1271
+ * @param {string} cwd - Current working directory
1272
+ * @param {array} candidates - Array of { title, confidence, rationale }
1273
+ * @returns {object} - Single candidate: { title, confidence, rationale, scored: true, reason }
1274
+ */
1275
+ function scoreEndgameCandidates(cwd, candidates) {
1276
+ const atrisDir = path.join(cwd, 'atris');
1277
+ if (!fs.existsSync(atrisDir)) {
1278
+ // No atris folder yet - can't score, return best by confidence
1279
+ const best = candidates.reduce((a, b) => (a.confidence > b.confidence ? a : b), candidates[0]);
1280
+ return { ...best, scored: false, reason: 'no atris folder' };
1281
+ }
1282
+
1283
+ try {
1284
+ const scorecards = readScorecards(atrisDir).slice(-10); // Last 10
1285
+ if (scorecards.length === 0) {
1286
+ // No scorecards yet - return best by confidence
1287
+ const best = candidates.reduce((a, b) => (a.confidence > b.confidence ? a : b), candidates[0]);
1288
+ return { ...best, scored: false, reason: 'no scorecards' };
1289
+ }
1290
+
1291
+ // Infer type from slug/title by taking prefix before first dash
1292
+ const typeToRewards = {};
1293
+ const typeToAttempts = {}; // track shipped/attempted per type
1294
+ for (const sc of scorecards) {
1295
+ const type = sc.slug.split('-')[0];
1296
+ if (!typeToRewards[type]) typeToRewards[type] = [];
1297
+ typeToRewards[type].push(sc.totalReward);
1298
+ if (!typeToAttempts[type]) typeToAttempts[type] = { shipped: 0, attempted: 0 };
1299
+ typeToAttempts[type].shipped += sc.tasksShipped;
1300
+ typeToAttempts[type].attempted += sc.tasksAttempted;
1301
+ }
1302
+
1303
+ // Calculate mean reward per type
1304
+ const typeMeans = {};
1305
+ for (const [type, rewards] of Object.entries(typeToRewards)) {
1306
+ const mean = rewards.reduce((a, b) => a + b, 0) / rewards.length;
1307
+ typeMeans[type] = mean;
1308
+ }
1309
+
1310
+ // Calculate success rate per type
1311
+ const typeSuccessRate = {};
1312
+ for (const [type, counts] of Object.entries(typeToAttempts)) {
1313
+ typeSuccessRate[type] = counts.attempted > 0 ? counts.shipped / counts.attempted : 0;
1314
+ }
1315
+
1316
+ // Adaptive explore rate based on diversity of last 5 scorecards
1317
+ const last5 = scorecards.slice(-5);
1318
+ const last5Types = last5.map(sc => sc.slug.split('-')[0]);
1319
+ const uniqueTypes = new Set(last5Types).size;
1320
+ // All same type → exploreRate=0.5; all different → exploreRate=0.2
1321
+ // Linear interpolation: exploreRate = 0.5 - (uniqueTypes - 1) * 0.3 / (last5Types.length - 1 || 1)
1322
+ const maxTypes = last5Types.length;
1323
+ const exploreRate = maxTypes <= 1
1324
+ ? 0.2
1325
+ : 0.5 - (uniqueTypes - 1) * 0.3 / (maxTypes - 1);
1326
+
1327
+ // Score each candidate by expected value based on historical type mean
1328
+ const scored = candidates.map(c => {
1329
+ // Infer type from title keywords that match scorecard slug prefixes
1330
+ const titleLower = (c.title || '').toLowerCase();
1331
+ const cType = Object.keys(typeMeans).find(t => titleLower.includes(t)) || titleLower.split(/[\s\-]+/)[0];
1332
+ const historicalMean = typeMeans[cType] !== undefined ? typeMeans[cType] : 0;
1333
+ const successRate = typeSuccessRate[cType] !== undefined ? typeSuccessRate[cType] : 0;
1334
+ const expectedValue = historicalMean * c.confidence;
1335
+ return {
1336
+ ...c,
1337
+ expectedValue,
1338
+ type: cType,
1339
+ historicalMean,
1340
+ successRate
1341
+ };
1342
+ });
1343
+
1344
+ // Difficulty floor: filter out easy-win candidates (>80% success rate AND
1345
+ // mean reward >5) when harder candidates exist
1346
+ const hardCandidates = scored.filter(c => !(c.successRate > 0.8 && c.historicalMean > 5));
1347
+ const pool = hardCandidates.length > 0 ? hardCandidates : scored;
1348
+
1349
+ // Sort by expected value (descending)
1350
+ pool.sort((a, b) => b.expectedValue - a.expectedValue);
1351
+
1352
+ // Adaptive exploit/explore split
1353
+ const choice = Math.random();
1354
+ let selected;
1355
+ if (choice < (1 - exploreRate)) {
1356
+ // Exploit: return highest expected value
1357
+ selected = pool[0];
1358
+ } else {
1359
+ // Explore: return random candidate from full scored list (not filtered)
1360
+ selected = scored[Math.floor(Math.random() * scored.length)];
1361
+ }
1362
+
1363
+ const reason = choice < (1 - exploreRate)
1364
+ ? `exploit: type=${selected.type} mean-reward=${selected.historicalMean.toFixed(1)} expected-value=${selected.expectedValue.toFixed(1)} explore-rate=${exploreRate.toFixed(2)}`
1365
+ : `explore: random-candidate type=${selected.type} explore-rate=${exploreRate.toFixed(2)}`;
1366
+
1367
+ return {
1368
+ title: selected.title,
1369
+ confidence: selected.confidence,
1370
+ rationale: selected.rationale,
1371
+ scored: true,
1372
+ reason,
1373
+ exploreRate
1374
+ };
1375
+ } catch (err) {
1376
+ // If scoring fails, fall back to best by confidence
1377
+ const best = candidates.reduce((a, b) => (a.confidence > b.confidence ? a : b), candidates[0]);
1378
+ return { ...best, scored: false, reason: `scoring error: ${err.message}` };
1379
+ }
1380
+ }
1381
+
1382
+ /**
1383
+ * Check whether a lesson's bug pattern is still present in the named files.
1384
+ * Parses the lesson line for file paths (e.g. `commands/autopilot.js:116`)
1385
+ * and the slug (e.g. `inbox-parser-eats-hr-separator`). Greps the named
1386
+ * files for slug keywords. If none match → lesson is resolved.
1387
+ *
1388
+ * @param {string} lessonLine - A single line from lessons.md
1389
+ * @param {string} cwd - Current working directory
1390
+ * @returns {boolean} true if the lesson's bug pattern is gone (resolved)
1391
+ */
1392
+ function isLessonResolved(lessonLine, cwd) {
1393
+ // Extract slug: bold text after date, e.g. **[2026-04-08] inbox-parser-eats-hr-separator**
1394
+ const slugMatch = lessonLine.match(/\*\*\[\d{4}-\d{2}-\d{2}\]\s+([\w-]+)\*\*/);
1395
+ if (!slugMatch) return false;
1396
+ const slug = slugMatch[1];
1397
+
1398
+ // Extract file paths: patterns like `commands/autopilot.js:116` or `commands/run.js:157`
1399
+ const fileRefs = [];
1400
+ const filePattern = /`([a-zA-Z0-9_/./-]+\.[a-zA-Z]+(?::\d+(?:-\d+)?)?)`/g;
1401
+ let m;
1402
+ while ((m = filePattern.exec(lessonLine)) !== null) {
1403
+ const ref = m[1].replace(/:\d+(-\d+)?$/, ''); // strip line numbers
1404
+ if (ref.includes('/') || ref.endsWith('.js') || ref.endsWith('.md') || ref.endsWith('.ts')) {
1405
+ fileRefs.push(ref);
1406
+ }
1407
+ }
1408
+
1409
+ if (fileRefs.length === 0) return false;
1410
+
1411
+ // Derive keywords from slug (split on dashes, drop short words)
1412
+ const keywords = slug.split('-').filter(w => w.length > 2);
1413
+ if (keywords.length === 0) return false;
1414
+
1415
+ // Grep each named file for any keyword. If at least one file still matches → not resolved.
1416
+ for (const ref of fileRefs) {
1417
+ const absPath = path.isAbsolute(ref) ? ref : path.join(cwd, ref);
1418
+ if (!fs.existsSync(absPath)) continue; // file deleted = pattern gone
1419
+ for (const kw of keywords) {
1420
+ try {
1421
+ execFileSync('grep', ['-q', '-i', kw, absPath], {
1422
+ cwd,
1423
+ timeout: 5000,
1424
+ stdio: ['ignore', 'ignore', 'ignore']
1425
+ });
1426
+ // grep exited 0 → keyword found → lesson still applies
1427
+ return false;
1428
+ } catch {
1429
+ // grep exited non-zero → keyword not found in this file, continue
1430
+ }
1431
+ }
1432
+ }
1433
+
1434
+ // No keyword matched in any named file → lesson is resolved
1435
+ return true;
1436
+ }
1437
+
915
1438
  /**
916
1439
  * Propose 3 candidate next horizons for the autopilot loop. Combines
917
1440
  * `getIdleTickCount` + `getRecentSignals` into a prompt asking the LLM
918
1441
  * to imagine what to work on next, spawns `claude -p`, and parses the
919
1442
  * JSON response into `[{ title, confidence, rationale }]`.
920
1443
  *
921
- * Throws on subprocess failure or when fewer than 3 valid candidates
922
- * come back. Callers are responsible for catching and falling back.
1444
+ * Filters out candidates derived from resolved lessons (bug pattern no
1445
+ * longer present in named files). Resolved lessons get tagged `[resolved]`
1446
+ * in lessons.md. Requires at least 1 valid candidate after filtering.
923
1447
  */
924
1448
  async function proposeCandidateHorizons(cwd) {
925
1449
  const idleTicks = getIdleTickCount(cwd);
@@ -1015,11 +1539,49 @@ Reply with the JSON array and nothing else.`;
1015
1539
  c.rationale.length > 0
1016
1540
  );
1017
1541
 
1018
- if (candidates.length < 3) {
1019
- throw new Error(`proposeCandidateHorizons: expected 3 valid candidates, got ${candidates.length}`);
1542
+ if (candidates.length < 1) {
1543
+ throw new Error(`proposeCandidateHorizons: expected at least 1 valid candidate, got ${candidates.length}`);
1544
+ }
1545
+
1546
+ // Filter out candidates derived from resolved lessons
1547
+ const lessonsPath = path.join(cwd, 'atris', 'lessons.md');
1548
+ const filtered = [];
1549
+ for (const c of candidates) {
1550
+ const combinedText = `${c.title} ${c.rationale}`.toLowerCase();
1551
+ let droppedByLesson = false;
1552
+ for (const lessonLine of signals.recentLessons) {
1553
+ const slugMatch = lessonLine.match(/\*\*\[\d{4}-\d{2}-\d{2}\]\s+([\w-]+)\*\*/);
1554
+ if (!slugMatch) continue;
1555
+ if (lessonLine.includes('[resolved]')) continue;
1556
+ const slug = slugMatch[1];
1557
+ // Fuzzy match: check if slug keywords appear in the candidate text
1558
+ const slugWords = slug.split('-').filter(w => w.length > 2);
1559
+ const matchCount = slugWords.filter(w => combinedText.includes(w)).length;
1560
+ if (matchCount < Math.ceil(slugWords.length * 0.5)) continue;
1561
+ // Candidate matches this lesson — check if the lesson is resolved
1562
+ if (isLessonResolved(lessonLine, cwd)) {
1563
+ // Tag lesson [resolved] in lessons.md
1564
+ try {
1565
+ let content = fs.readFileSync(lessonsPath, 'utf8');
1566
+ const taggedLine = lessonLine.replace(
1567
+ /\*\*\[(\d{4}-\d{2}-\d{2})\]\s+([\w-]+)\*\*/,
1568
+ '**[$1] $2** [resolved]'
1569
+ );
1570
+ content = content.replace(lessonLine.trim(), taggedLine.trim());
1571
+ fs.writeFileSync(lessonsPath, content);
1572
+ } catch {}
1573
+ droppedByLesson = true;
1574
+ break;
1575
+ }
1576
+ }
1577
+ if (!droppedByLesson) filtered.push(c);
1578
+ }
1579
+
1580
+ if (filtered.length < 1) {
1581
+ throw new Error('proposeCandidateHorizons: all candidates were from resolved lessons');
1020
1582
  }
1021
1583
 
1022
- return candidates.slice(0, 3);
1584
+ return filtered.slice(0, 3);
1023
1585
  }
1024
1586
 
1025
1587
  async function autopilotAtris(description, options = {}) {
@@ -1100,6 +1662,8 @@ async function autopilotAtris(description, options = {}) {
1100
1662
  let tickOutcomeText = 'I stopped for a manual check.';
1101
1663
  let tickNextStep = 'look for new work';
1102
1664
  let lastTaskTitle = null;
1665
+ let lastExecution = null;
1666
+ let lastVerifyCmd = null;
1103
1667
 
1104
1668
  for (let i = 0; i < maxIterations; i++) {
1105
1669
  // Check time budget
@@ -1117,7 +1681,7 @@ async function autopilotAtris(description, options = {}) {
1117
1681
  break;
1118
1682
  }
1119
1683
 
1120
- const suggestion = await suggestNextTask(cwd, skipped);
1684
+ const suggestion = await suggestNextTask(cwd, skipped, { auto });
1121
1685
 
1122
1686
  if (!suggestion) {
1123
1687
  tickOutcome = 'idle';
@@ -1212,6 +1776,7 @@ async function autopilotAtris(description, options = {}) {
1212
1776
  // Execute: plan → do → review
1213
1777
  lastTaskTitle = suggestion.task;
1214
1778
  const context = { task: suggestion.task, kind: suggestion.kind };
1779
+ const startingEndgame = readEndgameState(cwd);
1215
1780
 
1216
1781
  try {
1217
1782
  if (verbose) {
@@ -1224,7 +1789,25 @@ async function autopilotAtris(description, options = {}) {
1224
1789
  'Next I will report what happened and whether review passed.'
1225
1790
  ].join('\n'));
1226
1791
  }
1227
- const execution = runTaskOnce(context, { verbose });
1792
+ const execution = runTaskOnce(context, { verbose, cwd });
1793
+ lastExecution = execution;
1794
+ lastVerifyCmd = execution.verifyCmd;
1795
+
1796
+ // Early halt — judge corruption or no verify field
1797
+ if (execution.outcome === 'halted') {
1798
+ tickOutcome = 'halted';
1799
+ tickOutcomeText = `I halted before running "${lastTaskTitle}": ${execution.reason}.`;
1800
+ tickNextStep = 'stop until a human looks at the error';
1801
+ if (!verbose) {
1802
+ printPlainBlock([
1803
+ `I halted: ${execution.reason}.`,
1804
+ '',
1805
+ 'Next I stopped the loop.'
1806
+ ].join('\n'));
1807
+ }
1808
+ break;
1809
+ }
1810
+
1228
1811
  const planTime = execution.phaseResults.plan.elapsedSeconds;
1229
1812
  if (verbose) console.log(` planned (${planTime}s)`);
1230
1813
 
@@ -1253,11 +1836,65 @@ async function autopilotAtris(description, options = {}) {
1253
1836
  }
1254
1837
  if (verbose) console.log(` reviewed (${reviewTime}s)`);
1255
1838
 
1839
+ // Handle verify failure
1840
+ if (!execution.verifyPass) {
1841
+ tickOutcome = 'halted';
1842
+ tickOutcomeText = `I planned, built, and reviewed "${lastTaskTitle}" but verify failed.`;
1843
+ tickNextStep = 'verify failed, halting';
1844
+ writeLesson(cwd, 'verify-failed', 'fail', `Task "${lastTaskTitle}" passed review but failed verify command.`);
1845
+ if (verbose) {
1846
+ console.log(` verify failed. stopping for manual check.`);
1847
+ } else {
1848
+ printPlainBlock([
1849
+ `I planned, built, and reviewed the task, but the verify check failed.`,
1850
+ '',
1851
+ 'Next I stopped for a manual check.'
1852
+ ].join('\n'));
1853
+ }
1854
+ break;
1855
+ }
1856
+
1256
1857
  completed++;
1257
1858
  tickOutcome = 'built';
1258
1859
  tickOutcomeText = `I planned, built, and reviewed "${suggestion.task}".`;
1259
1860
  tickNextStep = 'pick the next endgame task';
1260
1861
  logCompletion(suggestion.task);
1862
+
1863
+ // Record commit hash + verify command for retroactive regression checks
1864
+ try {
1865
+ const commitHash = execSync('git rev-parse HEAD', { cwd, encoding: 'utf8' }).trim();
1866
+ const taskSlug = (suggestion.task || 'unknown').replace(/\s+/g, '-').toLowerCase().slice(0, 40);
1867
+ recordTickCommit(cwd, commitHash, execution.verifyCmd || '', taskSlug);
1868
+
1869
+ // Every 10th tick, run retroactive regression check
1870
+ const registryPath = path.join(cwd, 'atris', 'tick-registry.json');
1871
+ if (fs.existsSync(registryPath)) {
1872
+ try {
1873
+ const registry = JSON.parse(fs.readFileSync(registryPath, 'utf8'));
1874
+ if (Array.isArray(registry) && registry.length % 10 === 0) {
1875
+ const regressionResults = regressionCheck(cwd);
1876
+ const failures = regressionResults.filter(r => !r.pass && !r.skipped);
1877
+ if (failures.length > 0) {
1878
+ // Apply -5 retroactive penalty per failure via journal note
1879
+ for (const f of failures) {
1880
+ appendTickSummary(cwd, {
1881
+ outcome: `Retroactive regression failure: tick ${f.hash.slice(0, 7)} (${f.slug}) verify now fails. -5 penalty.`,
1882
+ horizon: readHorizonSlug(cwd),
1883
+ nextStep: 'investigate regression',
1884
+ reward: -5,
1885
+ });
1886
+ }
1887
+ if (verbose) console.log(` regression check: ${failures.length} failure(s) found`);
1888
+ } else if (verbose) {
1889
+ console.log(` regression check: all ${regressionResults.length} entries pass`);
1890
+ }
1891
+ }
1892
+ } catch { /* registry read failure must not crash */ }
1893
+ }
1894
+ } catch { /* commit recording failure must not crash the tick */ }
1895
+ if (maybeWriteCompletedEndgameScorecard(cwd, startingEndgame)) {
1896
+ tickNextStep = 'pick the next horizon';
1897
+ }
1261
1898
  if (verbose) {
1262
1899
  console.log(` done. ${completed} task${completed > 1 ? 's' : ''} completed.`);
1263
1900
  console.log('');
@@ -1301,12 +1938,20 @@ async function autopilotAtris(description, options = {}) {
1301
1938
  minute: '2-digit'
1302
1939
  }).toLowerCase();
1303
1940
  const idle = tickOutcome === 'idle' || (completed === 0 && tickOutcome !== 'halted');
1941
+
1942
+ // Compute reward score if we had an execution
1943
+ let tickReward = undefined;
1944
+ if (lastExecution && lastVerifyCmd) {
1945
+ tickReward = computeTickReward(lastExecution, tickOutcome, lastVerifyCmd);
1946
+ }
1947
+
1304
1948
  appendTickSummary(cwd, {
1305
1949
  time,
1306
1950
  outcome: tickOutcomeText,
1307
1951
  horizon: horizonSlug === 'unset' ? null : horizonSlug,
1308
1952
  nextStep: tickNextStep,
1309
- idle
1953
+ idle,
1954
+ reward: tickReward
1310
1955
  });
1311
1956
  } catch {
1312
1957
  /* journal write failure must not crash the tick */
@@ -1326,6 +1971,152 @@ async function autopilotAtris(description, options = {}) {
1326
1971
  return { success: completed > 0, completed };
1327
1972
  }
1328
1973
 
1974
+ /**
1975
+ * Compute age in days for a task.
1976
+ * Endgame tasks use the Picked: date from TODO.md Endgame section.
1977
+ * In-progress tasks parse timestamp from Claimed by: field.
1978
+ * Fallback returns 0 (fresh).
1979
+ */
1980
+ function getTaskAgeDays(task, todoPath) {
1981
+ if (task.claimed) {
1982
+ const tsMatch = task.claimed.match(/\d{4}-\d{2}-\d{2}/);
1983
+ if (tsMatch) {
1984
+ const d = new Date(tsMatch[0]);
1985
+ if (!isNaN(d)) return Math.floor((Date.now() - d.getTime()) / (1000 * 60 * 60 * 24));
1986
+ }
1987
+ }
1988
+ if (task.tag === 'endgame' && todoPath && fs.existsSync(todoPath)) {
1989
+ const content = fs.readFileSync(todoPath, 'utf8');
1990
+ const m = content.match(/\*\*Picked:\*\*\s*(\d{4}-\d{2}-\d{2})/);
1991
+ if (m) {
1992
+ const d = new Date(m[1]);
1993
+ if (!isNaN(d)) return Math.floor((Date.now() - d.getTime()) / (1000 * 60 * 60 * 24));
1994
+ }
1995
+ }
1996
+ return 0;
1997
+ }
1998
+
1999
+ /**
2000
+ * Check whether a task/fact is still actionable.
2001
+ *
2002
+ * @param {{ title: string, age: number, source?: string }} fact
2003
+ * - title: the task or fact description
2004
+ * - age: age in days since the task was created/last verified
2005
+ * - source: optional file path or identifier where the fact originated
2006
+ * @param {string} cwd - workspace root
2007
+ * @returns {'actionable'|'unverified'|'stale'}
2008
+ */
2009
+ function isStillTrue(fact, cwd) {
2010
+ const { title, age, source } = fact;
2011
+
2012
+ // Fresh tasks are always actionable
2013
+ if (age <= 7) return 'actionable';
2014
+
2015
+ // Extract searchable keywords from the title (skip short/common words)
2016
+ const keywords = title
2017
+ .replace(/[`\[\](){}]/g, '')
2018
+ .split(/[\s/\\.:,;]+/)
2019
+ .filter(w => w.length > 3)
2020
+ .slice(0, 5);
2021
+
2022
+ if (keywords.length === 0) return 'unverified';
2023
+
2024
+ // Strategy 1: If source file is given, check it still exists
2025
+ if (source) {
2026
+ const sourcePath = path.isAbsolute(source) ? source : path.join(cwd, source);
2027
+ if (!fs.existsSync(sourcePath)) return 'stale';
2028
+ }
2029
+
2030
+ // Strategy 2: grep the codebase for key terms from the title
2031
+ let grepHits = 0;
2032
+ for (const kw of keywords) {
2033
+ try {
2034
+ execFileSync('grep', ['-r', '-l', '--include=*.js', '--include=*.md', '-m', '1', kw, '.'], {
2035
+ cwd,
2036
+ stdio: ['ignore', 'pipe', 'ignore'],
2037
+ timeout: 10000
2038
+ });
2039
+ grepHits++;
2040
+ } catch {
2041
+ // grep returns non-zero when no match — that's fine
2042
+ }
2043
+ }
2044
+
2045
+ // If none of the keywords appear in the codebase, it's stale
2046
+ if (grepHits === 0) return 'stale';
2047
+
2048
+ // Strategy 3: check git log for recent activity related to the keywords
2049
+ let gitHits = 0;
2050
+ for (const kw of keywords.slice(0, 3)) {
2051
+ try {
2052
+ const out = execFileSync(
2053
+ 'git', ['log', '--oneline', '--since=30 days ago', '--all', `--grep=${kw}`, '-1'],
2054
+ { cwd, stdio: ['ignore', 'pipe', 'ignore'], timeout: 10000 }
2055
+ ).toString().trim();
2056
+ if (out.length > 0) gitHits++;
2057
+ } catch {
2058
+ // git-log failure is non-fatal
2059
+ }
2060
+ }
2061
+
2062
+ // Strong mechanical evidence: grep found terms AND recent git activity
2063
+ if (gitHits > 0) return 'actionable';
2064
+
2065
+ // Grep found terms but no recent git activity — can't fully verify
2066
+ return 'unverified';
2067
+ }
2068
+
2069
+ /**
2070
+ * Ask a local model whether a task/fact is still relevant.
2071
+ * Called when isStillTrue returns 'unverified' — the mechanical check
2072
+ * couldn't confirm or deny, so we ask claude -p to inspect the codebase.
2073
+ *
2074
+ * @param {{ title: string, age: number, source?: string }} fact
2075
+ * @param {string} cwd - workspace root
2076
+ * @returns {{ fresh: boolean, reasoning: string }}
2077
+ */
2078
+ function askModel(fact, cwd) {
2079
+ const { title, source } = fact;
2080
+ const sourceHint = source ? `\nOriginal source file: ${source}` : '';
2081
+ const prompt = `You are a staleness checker. Answer with exactly one line: YES or NO, followed by a short reason (under 30 words).
2082
+
2083
+ Is this task still relevant to the codebase? Check for the mentioned files, functions, or patterns.
2084
+
2085
+ Task: "${title}"${sourceHint}
2086
+
2087
+ Search the codebase to verify. Reply: YES <reason> or NO <reason>`;
2088
+
2089
+ const tmpFile = path.join(cwd, '.staleness-prompt.tmp');
2090
+ fs.writeFileSync(tmpFile, prompt);
2091
+
2092
+ try {
2093
+ const env = { ...process.env };
2094
+ delete env.CLAUDECODE;
2095
+ const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Glob,Grep"`;
2096
+ const output = execSync(cmd, {
2097
+ cwd,
2098
+ encoding: 'utf8',
2099
+ timeout: 60000,
2100
+ stdio: 'pipe',
2101
+ maxBuffer: 2 * 1024 * 1024,
2102
+ env
2103
+ }).trim();
2104
+
2105
+ try { fs.unlinkSync(tmpFile); } catch {}
2106
+
2107
+ // Parse YES/NO from the first line of output
2108
+ const firstLine = output.split('\n').find(l => /^\s*(YES|NO)\b/i.test(l)) || output.split('\n')[0] || '';
2109
+ const fresh = /^\s*YES\b/i.test(firstLine);
2110
+ const reasoning = firstLine.replace(/^\s*(YES|NO)\s*/i, '').trim() || output.slice(0, 200);
2111
+
2112
+ return { fresh, reasoning };
2113
+ } catch (err) {
2114
+ try { fs.unlinkSync(tmpFile); } catch {}
2115
+ // On timeout or crash, treat as unverifiable — conservative default
2116
+ return { fresh: false, reasoning: `Model check failed: ${(err.message || '').slice(0, 100)}` };
2117
+ }
2118
+ }
2119
+
1329
2120
  /**
1330
2121
  * Entry point when called without a description.
1331
2122
  */
@@ -1335,15 +2126,28 @@ async function autopilotFromTodo(options = {}) {
1335
2126
 
1336
2127
  module.exports = {
1337
2128
  appendTickSummary,
2129
+ askHuman,
2130
+ askModel,
1338
2131
  autopilotAtris,
1339
2132
  autopilotFromTodo,
1340
2133
  buildPrompt,
2134
+ isLessonResolved,
2135
+ isStillTrue,
2136
+ getTaskAgeDays,
1341
2137
  getIdleTickCount,
1342
2138
  getRecentSignals,
1343
2139
  getTickStatus,
2140
+ getVerifyCommand,
2141
+ computeTickReward,
2142
+ verifyJudgeIntegrity,
2143
+ maybeWriteCompletedEndgameScorecard,
1344
2144
  renderHumanSuggestion,
1345
2145
  renderHumanTickIntro,
1346
2146
  proposeCandidateHorizons,
2147
+ recordTickCommit,
2148
+ regressionCheck,
1347
2149
  runTaskOnce,
1348
- suggestNextTask
2150
+ scoreEndgameCandidates,
2151
+ suggestNextTask,
2152
+ writeLesson
1349
2153
  };