atris 3.1.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -2
- package/atris/skills/improve/SKILL.md +2 -2
- package/bin/atris.js +7 -1
- package/commands/autopilot.js +562 -39
- package/commands/business.js +14 -9
- package/commands/experiments.js +1 -1
- package/commands/release.js +183 -0
- package/commands/research.js +52 -0
- package/commands/sync.js +102 -13
- package/commands/verify.js +3 -3
- package/commands/wiki.js +45 -25
- package/lib/reward-config.js +24 -0
- package/lib/scorecard.js +16 -2
- package/lib/wiki.js +87 -56
- package/package.json +3 -2
package/commands/autopilot.js
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
const fs = require('fs');
|
|
10
10
|
const path = require('path');
|
|
11
|
-
const { execSync } = require('child_process');
|
|
11
|
+
const { execSync, execFileSync } = require('child_process');
|
|
12
12
|
const readline = require('readline');
|
|
13
13
|
const { getLogPath, ensureLogDirectory, createLogFile } = require('../lib/journal');
|
|
14
14
|
const { parseTodo } = require('../lib/todo');
|
|
@@ -19,6 +19,7 @@ const {
|
|
|
19
19
|
writeScorecard,
|
|
20
20
|
detectEndgameCompletion
|
|
21
21
|
} = require('../lib/scorecard');
|
|
22
|
+
const { REWARD_CONFIG, REWARD_CHECKSUM } = require('../lib/reward-config');
|
|
22
23
|
|
|
23
24
|
const pkg = require('../package.json');
|
|
24
25
|
|
|
@@ -28,7 +29,7 @@ const PHASE_TIMEOUT = 600000; // 10 min per phase
|
|
|
28
29
|
* Scan workspace for the next thing worth doing.
|
|
29
30
|
* Returns { task, why, kind } or null.
|
|
30
31
|
*/
|
|
31
|
-
async function suggestNextTask(cwd, skipped = new Set()) {
|
|
32
|
+
async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {}) {
|
|
32
33
|
const atrisDir = path.join(cwd, 'atris');
|
|
33
34
|
const suggestions = [];
|
|
34
35
|
|
|
@@ -37,6 +38,7 @@ async function suggestNextTask(cwd, skipped = new Set()) {
|
|
|
37
38
|
const todo = parseTodo(todoPath);
|
|
38
39
|
|
|
39
40
|
for (const t of todo.backlog) {
|
|
41
|
+
if (t.tags && t.tags.includes('unverified')) continue;
|
|
40
42
|
if (t.tag === 'endgame' && !skipped.has(t.title)) {
|
|
41
43
|
suggestions.push({
|
|
42
44
|
task: t.title,
|
|
@@ -51,7 +53,7 @@ async function suggestNextTask(cwd, skipped = new Set()) {
|
|
|
51
53
|
// --- Resume interrupted work ---
|
|
52
54
|
if (todo.inProgress.length > 0) {
|
|
53
55
|
const t = todo.inProgress[0];
|
|
54
|
-
if (!skipped.has(t.title)) {
|
|
56
|
+
if (!(t.tags && t.tags.includes('unverified')) && !skipped.has(t.title)) {
|
|
55
57
|
suggestions.push({
|
|
56
58
|
task: t.title,
|
|
57
59
|
why: `This was already started${t.claimed ? ` by ${t.claimed}` : ''} but never finished.`,
|
|
@@ -102,15 +104,17 @@ async function suggestNextTask(cwd, skipped = new Set()) {
|
|
|
102
104
|
}
|
|
103
105
|
|
|
104
106
|
// --- Backlog tasks ---
|
|
105
|
-
for (const t of todo.backlog
|
|
107
|
+
for (const t of todo.backlog) {
|
|
108
|
+
if (t.tags && t.tags.includes('unverified')) continue;
|
|
106
109
|
if (skipped.has(t.title)) continue;
|
|
107
|
-
const remaining = todo.backlog.length;
|
|
110
|
+
const remaining = todo.backlog.filter(b => !(b.tags && b.tags.includes('unverified'))).length;
|
|
108
111
|
suggestions.push({
|
|
109
112
|
task: t.title,
|
|
110
113
|
why: `Next in the backlog${t.tag ? ` (${t.tag})` : ''}. ${remaining} task${remaining > 1 ? 's' : ''} waiting.`,
|
|
111
114
|
kind: 'backlog',
|
|
112
115
|
priority: 5
|
|
113
116
|
});
|
|
117
|
+
break;
|
|
114
118
|
}
|
|
115
119
|
|
|
116
120
|
// --- Unprocessed inbox items ---
|
|
@@ -223,7 +227,64 @@ async function suggestNextTask(cwd, skipped = new Set()) {
|
|
|
223
227
|
}
|
|
224
228
|
|
|
225
229
|
suggestions.sort((a, b) => a.priority - b.priority);
|
|
226
|
-
|
|
230
|
+
|
|
231
|
+
// Staleness gate: filter out unverified/stale suggestions
|
|
232
|
+
const staleSkipped = [];
|
|
233
|
+
const fresh = [];
|
|
234
|
+
for (const s of suggestions) {
|
|
235
|
+
const fakeTask = { title: s.task, tag: s.kind === 'endgame' ? 'endgame' : null, claimed: null };
|
|
236
|
+
if (s.kind === 'resume' && todo.inProgress.length > 0) {
|
|
237
|
+
fakeTask.claimed = todo.inProgress[0].claimed;
|
|
238
|
+
}
|
|
239
|
+
const age = getTaskAgeDays(fakeTask, todoPath);
|
|
240
|
+
const status = isStillTrue({ title: s.task, age, source: null }, cwd);
|
|
241
|
+
if (status === 'stale') {
|
|
242
|
+
staleSkipped.push({ task: s.task, status, reasoning: null });
|
|
243
|
+
continue;
|
|
244
|
+
}
|
|
245
|
+
if (status === 'unverified') {
|
|
246
|
+
if (auto) {
|
|
247
|
+
// Auto mode: use model check
|
|
248
|
+
const result = askModel({ title: s.task, age, source: null }, cwd);
|
|
249
|
+
if (!result.fresh) {
|
|
250
|
+
staleSkipped.push({ task: s.task, status: 'unverified (model: not fresh)', reasoning: result.reasoning });
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
} else {
|
|
254
|
+
// Interactive mode: ask the human
|
|
255
|
+
const result = await askHuman(s.task);
|
|
256
|
+
if (!result.fresh) {
|
|
257
|
+
staleSkipped.push({ task: s.task, status: 'unverified (human: not relevant)', reasoning: null });
|
|
258
|
+
continue;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
fresh.push(s);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// Log skipped items to journal
|
|
266
|
+
if (staleSkipped.length > 0) {
|
|
267
|
+
try {
|
|
268
|
+
const { logFile } = getLogPath();
|
|
269
|
+
const now = new Date();
|
|
270
|
+
const hhmm = `${String(now.getHours()).padStart(2, '0')}:${String(now.getMinutes()).padStart(2, '0')}`;
|
|
271
|
+
const lines = staleSkipped.map(s => `- ${s.task} (${s.status})${s.reasoning ? ` — ${s.reasoning}` : ''}`);
|
|
272
|
+
const note = `\n### Staleness skip — ${hhmm}\n${lines.join('\n')}\n`;
|
|
273
|
+
if (fs.existsSync(logFile)) {
|
|
274
|
+
const content = fs.readFileSync(logFile, 'utf8');
|
|
275
|
+
const notesIdx = content.indexOf('## Notes');
|
|
276
|
+
if (notesIdx !== -1) {
|
|
277
|
+
const insertAt = content.indexOf('\n', notesIdx) + 1;
|
|
278
|
+
const updated = content.slice(0, insertAt) + note + content.slice(insertAt);
|
|
279
|
+
fs.writeFileSync(logFile, updated);
|
|
280
|
+
} else {
|
|
281
|
+
fs.appendFileSync(logFile, `\n## Notes\n${note}`);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
} catch {}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
return fresh[0] || null;
|
|
227
288
|
}
|
|
228
289
|
|
|
229
290
|
/**
|
|
@@ -242,6 +303,22 @@ function askApproval() {
|
|
|
242
303
|
});
|
|
243
304
|
}
|
|
244
305
|
|
|
306
|
+
/**
|
|
307
|
+
* Ask the human whether an unverified task is still relevant.
|
|
308
|
+
* Interactive mode only — in auto mode, caller skips silently.
|
|
309
|
+
* Returns { fresh: boolean }.
|
|
310
|
+
*/
|
|
311
|
+
function askHuman(taskTitle) {
|
|
312
|
+
return new Promise((resolve) => {
|
|
313
|
+
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
314
|
+
rl.question(` is "${taskTitle}" still relevant? y/n → `, (answer) => {
|
|
315
|
+
rl.close();
|
|
316
|
+
const a = (answer || '').trim().toLowerCase();
|
|
317
|
+
resolve({ fresh: a === 'y' || a === 'yes' });
|
|
318
|
+
});
|
|
319
|
+
});
|
|
320
|
+
}
|
|
321
|
+
|
|
245
322
|
/**
|
|
246
323
|
* Run a phase via claude -p subprocess.
|
|
247
324
|
*/
|
|
@@ -555,30 +632,141 @@ function writeLesson(cwd, slug, status, explanation) {
|
|
|
555
632
|
fs.writeFileSync(lessonsPath, content);
|
|
556
633
|
}
|
|
557
634
|
|
|
635
|
+
/**
|
|
636
|
+
* Record a tick's commit hash and verify command in atris/tick-registry.json.
|
|
637
|
+
* Each entry: { hash, verifyCmd, slug, timestamp }.
|
|
638
|
+
*/
|
|
639
|
+
function recordTickCommit(cwd, hash, verifyCmd, slug) {
|
|
640
|
+
const registryPath = path.join(cwd, 'atris', 'tick-registry.json');
|
|
641
|
+
let registry = [];
|
|
642
|
+
if (fs.existsSync(registryPath)) {
|
|
643
|
+
try { registry = JSON.parse(fs.readFileSync(registryPath, 'utf8')); } catch { registry = []; }
|
|
644
|
+
}
|
|
645
|
+
registry.push({ hash, verifyCmd, slug, timestamp: new Date().toISOString() });
|
|
646
|
+
fs.writeFileSync(registryPath, JSON.stringify(registry, null, 2) + '\n');
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
/**
|
|
650
|
+
* Retroactive regression check. Reads last 10 entries from tick-registry.json,
|
|
651
|
+
* re-runs each verify command at its original commit using git worktree,
|
|
652
|
+
* returns array of { hash, slug, pass }. On failure: writes a lesson with
|
|
653
|
+
* retroactive context.
|
|
654
|
+
*/
|
|
655
|
+
function regressionCheck(cwd) {
|
|
656
|
+
const registryPath = path.join(cwd, 'atris', 'tick-registry.json');
|
|
657
|
+
if (!fs.existsSync(registryPath)) return [];
|
|
658
|
+
|
|
659
|
+
let registry = [];
|
|
660
|
+
try { registry = JSON.parse(fs.readFileSync(registryPath, 'utf8')); } catch { return []; }
|
|
661
|
+
if (!Array.isArray(registry) || registry.length === 0) return [];
|
|
662
|
+
|
|
663
|
+
const entries = registry.slice(-10);
|
|
664
|
+
const results = [];
|
|
665
|
+
|
|
666
|
+
for (const entry of entries) {
|
|
667
|
+
if (!entry.hash || !entry.verifyCmd) {
|
|
668
|
+
results.push({ hash: entry.hash, slug: entry.slug, pass: true, skipped: true });
|
|
669
|
+
continue;
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
const worktreePath = path.join(cwd, '.regression-worktree-' + entry.hash.slice(0, 8));
|
|
673
|
+
let pass = false;
|
|
674
|
+
try {
|
|
675
|
+
// Create a worktree at the commit
|
|
676
|
+
execSync(`git worktree add "${worktreePath}" ${entry.hash} --detach 2>/dev/null`, { cwd, stdio: 'pipe' });
|
|
677
|
+
try {
|
|
678
|
+
execSync(entry.verifyCmd, { cwd: worktreePath, stdio: 'pipe', timeout: 60000 });
|
|
679
|
+
pass = true;
|
|
680
|
+
} catch {
|
|
681
|
+
pass = false;
|
|
682
|
+
}
|
|
683
|
+
} catch {
|
|
684
|
+
// If worktree creation fails (e.g., commit doesn't exist), skip
|
|
685
|
+
results.push({ hash: entry.hash, slug: entry.slug, pass: true, skipped: true });
|
|
686
|
+
continue;
|
|
687
|
+
} finally {
|
|
688
|
+
// Clean up worktree
|
|
689
|
+
try { execSync(`git worktree remove "${worktreePath}" --force 2>/dev/null`, { cwd, stdio: 'pipe' }); } catch {}
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
if (!pass) {
|
|
693
|
+
writeLesson(cwd, `regression-${entry.slug || 'unknown'}`, 'fail',
|
|
694
|
+
`Retroactive regression: verify command for tick ${entry.hash.slice(0, 7)} (${entry.slug}) now fails. -5 retroactive penalty applied.`);
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
results.push({ hash: entry.hash, slug: entry.slug, pass });
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
return results;
|
|
701
|
+
}
|
|
702
|
+
|
|
558
703
|
/**
|
|
559
704
|
* Get the verify command for a task from TODO.md
|
|
560
705
|
* Reads TODO.md, finds the task by title across active/completed sections,
|
|
561
706
|
* and extracts the verify field.
|
|
562
|
-
*
|
|
707
|
+
* Returns { cmd, explicit } — explicit is true only if the task has an explicit Verify field.
|
|
563
708
|
*/
|
|
564
709
|
function getVerifyCommand(cwd, taskTitle) {
|
|
565
710
|
const todoPath = path.join(cwd, 'atris', 'TODO.md');
|
|
566
|
-
if (!fs.existsSync(todoPath)) return
|
|
711
|
+
if (!fs.existsSync(todoPath)) return { cmd: null, explicit: false };
|
|
567
712
|
|
|
568
713
|
const todo = parseTodo(todoPath);
|
|
569
714
|
const task = [...todo.inProgress, ...todo.backlog, ...todo.completed]
|
|
570
715
|
.find(t => t.title === taskTitle);
|
|
571
716
|
|
|
572
|
-
if (!task) return
|
|
573
|
-
|
|
574
|
-
|
|
717
|
+
if (!task || !task.verify) return { cmd: null, explicit: false };
|
|
718
|
+
return { cmd: task.verify, explicit: true };
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
/**
|
|
722
|
+
* Verify that computeTickReward has not been modified since ship time.
|
|
723
|
+
* Returns { ok, expected, actual }.
|
|
724
|
+
*/
|
|
725
|
+
function verifyJudgeIntegrity() {
|
|
726
|
+
const crypto = require('crypto');
|
|
727
|
+
const h = crypto.createHash('sha256');
|
|
728
|
+
h.update(JSON.stringify(REWARD_CONFIG));
|
|
729
|
+
h.update(computeTickReward.toString());
|
|
730
|
+
const actual = h.digest('hex');
|
|
731
|
+
return { ok: actual === REWARD_CHECKSUM, expected: REWARD_CHECKSUM, actual };
|
|
575
732
|
}
|
|
576
733
|
|
|
577
734
|
function runTaskOnce(context, options = {}) {
|
|
578
735
|
const { verbose = false, cwd = process.cwd() } = options;
|
|
736
|
+
|
|
737
|
+
// Judge integrity check — halt if computeTickReward was tampered with
|
|
738
|
+
const integrity = verifyJudgeIntegrity();
|
|
739
|
+
if (!integrity.ok) {
|
|
740
|
+
writeLesson(cwd, 'judge-corruption', 'fail',
|
|
741
|
+
`computeTickReward checksum mismatch. Expected ${integrity.expected}, got ${integrity.actual}. Tick halted.`);
|
|
742
|
+
return {
|
|
743
|
+
outcome: 'halted',
|
|
744
|
+
reason: 'judge-corruption',
|
|
745
|
+
phaseResults: {},
|
|
746
|
+
elapsedSeconds: 0,
|
|
747
|
+
verifyRan: false,
|
|
748
|
+
verifyPass: false,
|
|
749
|
+
};
|
|
750
|
+
}
|
|
751
|
+
|
|
579
752
|
const phaseResults = {};
|
|
580
753
|
const startedAt = Date.now();
|
|
581
|
-
const
|
|
754
|
+
const verifyResult = getVerifyCommand(cwd, context.task);
|
|
755
|
+
const verifyCmd = verifyResult.cmd;
|
|
756
|
+
|
|
757
|
+
// Guard: refuse to run ticks without an explicit Verify field
|
|
758
|
+
if (!verifyResult.explicit) {
|
|
759
|
+
writeLesson(cwd, 'no-verify-field', 'fail',
|
|
760
|
+
`Task "${context.task}" has no explicit **Verify:** field in TODO.md. Tick halted — every task must declare how to verify it.`);
|
|
761
|
+
return {
|
|
762
|
+
outcome: 'halted',
|
|
763
|
+
reason: 'no-verify-field',
|
|
764
|
+
phaseResults: {},
|
|
765
|
+
elapsedSeconds: 0,
|
|
766
|
+
verifyRan: false,
|
|
767
|
+
verifyPass: false,
|
|
768
|
+
};
|
|
769
|
+
}
|
|
582
770
|
|
|
583
771
|
for (const phase of ['plan', 'do', 'review']) {
|
|
584
772
|
const t0 = Date.now();
|
|
@@ -595,7 +783,7 @@ function runTaskOnce(context, options = {}) {
|
|
|
595
783
|
// After review succeeds, run verify command if present
|
|
596
784
|
let verifyPass = false;
|
|
597
785
|
let verifyRan = false;
|
|
598
|
-
if (
|
|
786
|
+
if (verifyCmd) {
|
|
599
787
|
verifyRan = true;
|
|
600
788
|
let t0 = Date.now();
|
|
601
789
|
try {
|
|
@@ -620,7 +808,7 @@ function runTaskOnce(context, options = {}) {
|
|
|
620
808
|
}
|
|
621
809
|
|
|
622
810
|
return {
|
|
623
|
-
success:
|
|
811
|
+
success: verifyRan && verifyPass,
|
|
624
812
|
elapsedSeconds: Math.round((Date.now() - startedAt) / 1000),
|
|
625
813
|
phaseResults,
|
|
626
814
|
reviewOutput,
|
|
@@ -673,28 +861,28 @@ function computeTickReward(execution, tickOutcome, verifyCmd) {
|
|
|
673
861
|
|
|
674
862
|
// Validator clean: review passed without 'failed'
|
|
675
863
|
if (!execution.reviewOutput || !execution.reviewOutput.includes('failed')) {
|
|
676
|
-
reward +=
|
|
864
|
+
reward += REWARD_CONFIG.REVIEW_CLEAN;
|
|
677
865
|
}
|
|
678
866
|
|
|
679
|
-
// Verify passed
|
|
867
|
+
// Verify passed
|
|
680
868
|
if (execution.verifyRan && execution.verifyPass) {
|
|
681
|
-
reward +=
|
|
869
|
+
reward += REWARD_CONFIG.VERIFY_PASS;
|
|
682
870
|
}
|
|
683
871
|
|
|
684
|
-
// npm test passed
|
|
872
|
+
// npm test passed
|
|
685
873
|
if (execution.verifyRan && execution.verifyPass && verifyCmd === 'npm test') {
|
|
686
|
-
reward +=
|
|
874
|
+
reward += REWARD_CONFIG.NPM_TEST_BONUS;
|
|
687
875
|
}
|
|
688
876
|
|
|
689
877
|
// Commit landed: check do phase output for git commit patterns
|
|
690
878
|
const doOutput = execution.phaseResults.do.output || '';
|
|
691
879
|
if (doOutput.match(/\[.*\s\d+\sfile.*changed/i) || doOutput.includes('git commit') || doOutput.includes('committed')) {
|
|
692
|
-
reward +=
|
|
880
|
+
reward += REWARD_CONFIG.COMMIT_LANDED;
|
|
693
881
|
}
|
|
694
882
|
|
|
695
|
-
// Halt caught hallucination
|
|
883
|
+
// Halt caught hallucination
|
|
696
884
|
if (tickOutcome === 'halted') {
|
|
697
|
-
reward
|
|
885
|
+
reward += REWARD_CONFIG.HALT_PENALTY;
|
|
698
886
|
}
|
|
699
887
|
|
|
700
888
|
return reward;
|
|
@@ -1070,7 +1258,15 @@ function getRecentSignals(cwd) {
|
|
|
1070
1258
|
/**
|
|
1071
1259
|
* Score endgame candidates by historical reward of similar horizon types.
|
|
1072
1260
|
* Reads last 10 scorecards, infers type from slug prefix, calculates mean
|
|
1073
|
-
* reward per type, scores candidates by expected value
|
|
1261
|
+
* reward per type, scores candidates by expected value.
|
|
1262
|
+
*
|
|
1263
|
+
* Adaptive explore rate: if the last 5 endgames are all the same type,
|
|
1264
|
+
* explore rate boosts to 50%. Otherwise scales between 20%-50% based on
|
|
1265
|
+
* type repetition in the last 5.
|
|
1266
|
+
*
|
|
1267
|
+
* Difficulty floor: candidates whose inferred type has >80% success rate
|
|
1268
|
+
* AND mean reward >5 are filtered out when harder candidates exist, so
|
|
1269
|
+
* easy wins don't starve hard work.
|
|
1074
1270
|
*
|
|
1075
1271
|
* @param {string} cwd - Current working directory
|
|
1076
1272
|
* @param {array} candidates - Array of { title, confidence, rationale }
|
|
@@ -1094,10 +1290,14 @@ function scoreEndgameCandidates(cwd, candidates) {
|
|
|
1094
1290
|
|
|
1095
1291
|
// Infer type from slug/title by taking prefix before first dash
|
|
1096
1292
|
const typeToRewards = {};
|
|
1293
|
+
const typeToAttempts = {}; // track shipped/attempted per type
|
|
1097
1294
|
for (const sc of scorecards) {
|
|
1098
1295
|
const type = sc.slug.split('-')[0];
|
|
1099
1296
|
if (!typeToRewards[type]) typeToRewards[type] = [];
|
|
1100
1297
|
typeToRewards[type].push(sc.totalReward);
|
|
1298
|
+
if (!typeToAttempts[type]) typeToAttempts[type] = { shipped: 0, attempted: 0 };
|
|
1299
|
+
typeToAttempts[type].shipped += sc.tasksShipped;
|
|
1300
|
+
typeToAttempts[type].attempted += sc.tasksAttempted;
|
|
1101
1301
|
}
|
|
1102
1302
|
|
|
1103
1303
|
// Calculate mean reward per type
|
|
@@ -1107,45 +1307,70 @@ function scoreEndgameCandidates(cwd, candidates) {
|
|
|
1107
1307
|
typeMeans[type] = mean;
|
|
1108
1308
|
}
|
|
1109
1309
|
|
|
1310
|
+
// Calculate success rate per type
|
|
1311
|
+
const typeSuccessRate = {};
|
|
1312
|
+
for (const [type, counts] of Object.entries(typeToAttempts)) {
|
|
1313
|
+
typeSuccessRate[type] = counts.attempted > 0 ? counts.shipped / counts.attempted : 0;
|
|
1314
|
+
}
|
|
1315
|
+
|
|
1316
|
+
// Adaptive explore rate based on diversity of last 5 scorecards
|
|
1317
|
+
const last5 = scorecards.slice(-5);
|
|
1318
|
+
const last5Types = last5.map(sc => sc.slug.split('-')[0]);
|
|
1319
|
+
const uniqueTypes = new Set(last5Types).size;
|
|
1320
|
+
// All same type → exploreRate=0.5; all different → exploreRate=0.2
|
|
1321
|
+
// Linear interpolation: exploreRate = 0.5 - (uniqueTypes - 1) * 0.3 / (last5Types.length - 1 || 1)
|
|
1322
|
+
const maxTypes = last5Types.length;
|
|
1323
|
+
const exploreRate = maxTypes <= 1
|
|
1324
|
+
? 0.2
|
|
1325
|
+
: 0.5 - (uniqueTypes - 1) * 0.3 / (maxTypes - 1);
|
|
1326
|
+
|
|
1110
1327
|
// Score each candidate by expected value based on historical type mean
|
|
1111
1328
|
const scored = candidates.map(c => {
|
|
1112
1329
|
// Infer type from title keywords that match scorecard slug prefixes
|
|
1113
1330
|
const titleLower = (c.title || '').toLowerCase();
|
|
1114
1331
|
const cType = Object.keys(typeMeans).find(t => titleLower.includes(t)) || titleLower.split(/[\s\-]+/)[0];
|
|
1115
1332
|
const historicalMean = typeMeans[cType] !== undefined ? typeMeans[cType] : 0;
|
|
1333
|
+
const successRate = typeSuccessRate[cType] !== undefined ? typeSuccessRate[cType] : 0;
|
|
1116
1334
|
const expectedValue = historicalMean * c.confidence;
|
|
1117
1335
|
return {
|
|
1118
1336
|
...c,
|
|
1119
1337
|
expectedValue,
|
|
1120
1338
|
type: cType,
|
|
1121
|
-
historicalMean
|
|
1339
|
+
historicalMean,
|
|
1340
|
+
successRate
|
|
1122
1341
|
};
|
|
1123
1342
|
});
|
|
1124
1343
|
|
|
1344
|
+
// Difficulty floor: filter out easy-win candidates (>80% success rate AND
|
|
1345
|
+
// mean reward >5) when harder candidates exist
|
|
1346
|
+
const hardCandidates = scored.filter(c => !(c.successRate > 0.8 && c.historicalMean > 5));
|
|
1347
|
+
const pool = hardCandidates.length > 0 ? hardCandidates : scored;
|
|
1348
|
+
|
|
1125
1349
|
// Sort by expected value (descending)
|
|
1126
|
-
|
|
1350
|
+
pool.sort((a, b) => b.expectedValue - a.expectedValue);
|
|
1127
1351
|
|
|
1128
|
-
//
|
|
1352
|
+
// Adaptive exploit/explore split
|
|
1129
1353
|
const choice = Math.random();
|
|
1130
1354
|
let selected;
|
|
1131
|
-
if (choice <
|
|
1355
|
+
if (choice < (1 - exploreRate)) {
|
|
1132
1356
|
// Exploit: return highest expected value
|
|
1133
|
-
selected =
|
|
1357
|
+
selected = pool[0];
|
|
1134
1358
|
} else {
|
|
1135
|
-
// Explore: return random candidate
|
|
1359
|
+
// Explore: return random candidate from full scored list (not filtered)
|
|
1136
1360
|
selected = scored[Math.floor(Math.random() * scored.length)];
|
|
1137
1361
|
}
|
|
1138
1362
|
|
|
1139
|
-
const reason = choice <
|
|
1140
|
-
? `exploit: type=${selected.type} mean-reward=${selected.historicalMean.toFixed(1)} expected-value=${selected.expectedValue.toFixed(1)}`
|
|
1141
|
-
: `explore: random-candidate type=${selected.type}`;
|
|
1363
|
+
const reason = choice < (1 - exploreRate)
|
|
1364
|
+
? `exploit: type=${selected.type} mean-reward=${selected.historicalMean.toFixed(1)} expected-value=${selected.expectedValue.toFixed(1)} explore-rate=${exploreRate.toFixed(2)}`
|
|
1365
|
+
: `explore: random-candidate type=${selected.type} explore-rate=${exploreRate.toFixed(2)}`;
|
|
1142
1366
|
|
|
1143
1367
|
return {
|
|
1144
1368
|
title: selected.title,
|
|
1145
1369
|
confidence: selected.confidence,
|
|
1146
1370
|
rationale: selected.rationale,
|
|
1147
1371
|
scored: true,
|
|
1148
|
-
reason
|
|
1372
|
+
reason,
|
|
1373
|
+
exploreRate
|
|
1149
1374
|
};
|
|
1150
1375
|
} catch (err) {
|
|
1151
1376
|
// If scoring fails, fall back to best by confidence
|
|
@@ -1154,14 +1379,71 @@ function scoreEndgameCandidates(cwd, candidates) {
|
|
|
1154
1379
|
}
|
|
1155
1380
|
}
|
|
1156
1381
|
|
|
1382
|
+
/**
|
|
1383
|
+
* Check whether a lesson's bug pattern is still present in the named files.
|
|
1384
|
+
* Parses the lesson line for file paths (e.g. `commands/autopilot.js:116`)
|
|
1385
|
+
* and the slug (e.g. `inbox-parser-eats-hr-separator`). Greps the named
|
|
1386
|
+
* files for slug keywords. If none match → lesson is resolved.
|
|
1387
|
+
*
|
|
1388
|
+
* @param {string} lessonLine - A single line from lessons.md
|
|
1389
|
+
* @param {string} cwd - Current working directory
|
|
1390
|
+
* @returns {boolean} true if the lesson's bug pattern is gone (resolved)
|
|
1391
|
+
*/
|
|
1392
|
+
function isLessonResolved(lessonLine, cwd) {
|
|
1393
|
+
// Extract slug: bold text after date, e.g. **[2026-04-08] inbox-parser-eats-hr-separator**
|
|
1394
|
+
const slugMatch = lessonLine.match(/\*\*\[\d{4}-\d{2}-\d{2}\]\s+([\w-]+)\*\*/);
|
|
1395
|
+
if (!slugMatch) return false;
|
|
1396
|
+
const slug = slugMatch[1];
|
|
1397
|
+
|
|
1398
|
+
// Extract file paths: patterns like `commands/autopilot.js:116` or `commands/run.js:157`
|
|
1399
|
+
const fileRefs = [];
|
|
1400
|
+
const filePattern = /`([a-zA-Z0-9_/./-]+\.[a-zA-Z]+(?::\d+(?:-\d+)?)?)`/g;
|
|
1401
|
+
let m;
|
|
1402
|
+
while ((m = filePattern.exec(lessonLine)) !== null) {
|
|
1403
|
+
const ref = m[1].replace(/:\d+(-\d+)?$/, ''); // strip line numbers
|
|
1404
|
+
if (ref.includes('/') || ref.endsWith('.js') || ref.endsWith('.md') || ref.endsWith('.ts')) {
|
|
1405
|
+
fileRefs.push(ref);
|
|
1406
|
+
}
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1409
|
+
if (fileRefs.length === 0) return false;
|
|
1410
|
+
|
|
1411
|
+
// Derive keywords from slug (split on dashes, drop short words)
|
|
1412
|
+
const keywords = slug.split('-').filter(w => w.length > 2);
|
|
1413
|
+
if (keywords.length === 0) return false;
|
|
1414
|
+
|
|
1415
|
+
// Grep each named file for any keyword. If at least one file still matches → not resolved.
|
|
1416
|
+
for (const ref of fileRefs) {
|
|
1417
|
+
const absPath = path.isAbsolute(ref) ? ref : path.join(cwd, ref);
|
|
1418
|
+
if (!fs.existsSync(absPath)) continue; // file deleted = pattern gone
|
|
1419
|
+
for (const kw of keywords) {
|
|
1420
|
+
try {
|
|
1421
|
+
execFileSync('grep', ['-q', '-i', kw, absPath], {
|
|
1422
|
+
cwd,
|
|
1423
|
+
timeout: 5000,
|
|
1424
|
+
stdio: ['ignore', 'ignore', 'ignore']
|
|
1425
|
+
});
|
|
1426
|
+
// grep exited 0 → keyword found → lesson still applies
|
|
1427
|
+
return false;
|
|
1428
|
+
} catch {
|
|
1429
|
+
// grep exited non-zero → keyword not found in this file, continue
|
|
1430
|
+
}
|
|
1431
|
+
}
|
|
1432
|
+
}
|
|
1433
|
+
|
|
1434
|
+
// No keyword matched in any named file → lesson is resolved
|
|
1435
|
+
return true;
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1157
1438
|
/**
|
|
1158
1439
|
* Propose 3 candidate next horizons for the autopilot loop. Combines
|
|
1159
1440
|
* `getIdleTickCount` + `getRecentSignals` into a prompt asking the LLM
|
|
1160
1441
|
* to imagine what to work on next, spawns `claude -p`, and parses the
|
|
1161
1442
|
* JSON response into `[{ title, confidence, rationale }]`.
|
|
1162
1443
|
*
|
|
1163
|
-
*
|
|
1164
|
-
*
|
|
1444
|
+
* Filters out candidates derived from resolved lessons (bug pattern no
|
|
1445
|
+
* longer present in named files). Resolved lessons get tagged `[resolved]`
|
|
1446
|
+
* in lessons.md. Requires at least 1 valid candidate after filtering.
|
|
1165
1447
|
*/
|
|
1166
1448
|
async function proposeCandidateHorizons(cwd) {
|
|
1167
1449
|
const idleTicks = getIdleTickCount(cwd);
|
|
@@ -1257,11 +1539,49 @@ Reply with the JSON array and nothing else.`;
|
|
|
1257
1539
|
c.rationale.length > 0
|
|
1258
1540
|
);
|
|
1259
1541
|
|
|
1260
|
-
if (candidates.length <
|
|
1261
|
-
throw new Error(`proposeCandidateHorizons: expected
|
|
1542
|
+
if (candidates.length < 1) {
|
|
1543
|
+
throw new Error(`proposeCandidateHorizons: expected at least 1 valid candidate, got ${candidates.length}`);
|
|
1544
|
+
}
|
|
1545
|
+
|
|
1546
|
+
// Filter out candidates derived from resolved lessons
|
|
1547
|
+
const lessonsPath = path.join(cwd, 'atris', 'lessons.md');
|
|
1548
|
+
const filtered = [];
|
|
1549
|
+
for (const c of candidates) {
|
|
1550
|
+
const combinedText = `${c.title} ${c.rationale}`.toLowerCase();
|
|
1551
|
+
let droppedByLesson = false;
|
|
1552
|
+
for (const lessonLine of signals.recentLessons) {
|
|
1553
|
+
const slugMatch = lessonLine.match(/\*\*\[\d{4}-\d{2}-\d{2}\]\s+([\w-]+)\*\*/);
|
|
1554
|
+
if (!slugMatch) continue;
|
|
1555
|
+
if (lessonLine.includes('[resolved]')) continue;
|
|
1556
|
+
const slug = slugMatch[1];
|
|
1557
|
+
// Fuzzy match: check if slug keywords appear in the candidate text
|
|
1558
|
+
const slugWords = slug.split('-').filter(w => w.length > 2);
|
|
1559
|
+
const matchCount = slugWords.filter(w => combinedText.includes(w)).length;
|
|
1560
|
+
if (matchCount < Math.ceil(slugWords.length * 0.5)) continue;
|
|
1561
|
+
// Candidate matches this lesson — check if the lesson is resolved
|
|
1562
|
+
if (isLessonResolved(lessonLine, cwd)) {
|
|
1563
|
+
// Tag lesson [resolved] in lessons.md
|
|
1564
|
+
try {
|
|
1565
|
+
let content = fs.readFileSync(lessonsPath, 'utf8');
|
|
1566
|
+
const taggedLine = lessonLine.replace(
|
|
1567
|
+
/\*\*\[(\d{4}-\d{2}-\d{2})\]\s+([\w-]+)\*\*/,
|
|
1568
|
+
'**[$1] $2** [resolved]'
|
|
1569
|
+
);
|
|
1570
|
+
content = content.replace(lessonLine.trim(), taggedLine.trim());
|
|
1571
|
+
fs.writeFileSync(lessonsPath, content);
|
|
1572
|
+
} catch {}
|
|
1573
|
+
droppedByLesson = true;
|
|
1574
|
+
break;
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
if (!droppedByLesson) filtered.push(c);
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1580
|
+
if (filtered.length < 1) {
|
|
1581
|
+
throw new Error('proposeCandidateHorizons: all candidates were from resolved lessons');
|
|
1262
1582
|
}
|
|
1263
1583
|
|
|
1264
|
-
return
|
|
1584
|
+
return filtered.slice(0, 3);
|
|
1265
1585
|
}
|
|
1266
1586
|
|
|
1267
1587
|
async function autopilotAtris(description, options = {}) {
|
|
@@ -1361,7 +1681,7 @@ async function autopilotAtris(description, options = {}) {
|
|
|
1361
1681
|
break;
|
|
1362
1682
|
}
|
|
1363
1683
|
|
|
1364
|
-
const suggestion = await suggestNextTask(cwd, skipped);
|
|
1684
|
+
const suggestion = await suggestNextTask(cwd, skipped, { auto });
|
|
1365
1685
|
|
|
1366
1686
|
if (!suggestion) {
|
|
1367
1687
|
tickOutcome = 'idle';
|
|
@@ -1472,6 +1792,22 @@ async function autopilotAtris(description, options = {}) {
|
|
|
1472
1792
|
const execution = runTaskOnce(context, { verbose, cwd });
|
|
1473
1793
|
lastExecution = execution;
|
|
1474
1794
|
lastVerifyCmd = execution.verifyCmd;
|
|
1795
|
+
|
|
1796
|
+
// Early halt — judge corruption or no verify field
|
|
1797
|
+
if (execution.outcome === 'halted') {
|
|
1798
|
+
tickOutcome = 'halted';
|
|
1799
|
+
tickOutcomeText = `I halted before running "${lastTaskTitle}": ${execution.reason}.`;
|
|
1800
|
+
tickNextStep = 'stop until a human looks at the error';
|
|
1801
|
+
if (!verbose) {
|
|
1802
|
+
printPlainBlock([
|
|
1803
|
+
`I halted: ${execution.reason}.`,
|
|
1804
|
+
'',
|
|
1805
|
+
'Next I stopped the loop.'
|
|
1806
|
+
].join('\n'));
|
|
1807
|
+
}
|
|
1808
|
+
break;
|
|
1809
|
+
}
|
|
1810
|
+
|
|
1475
1811
|
const planTime = execution.phaseResults.plan.elapsedSeconds;
|
|
1476
1812
|
if (verbose) console.log(` planned (${planTime}s)`);
|
|
1477
1813
|
|
|
@@ -1523,6 +1859,39 @@ async function autopilotAtris(description, options = {}) {
|
|
|
1523
1859
|
tickOutcomeText = `I planned, built, and reviewed "${suggestion.task}".`;
|
|
1524
1860
|
tickNextStep = 'pick the next endgame task';
|
|
1525
1861
|
logCompletion(suggestion.task);
|
|
1862
|
+
|
|
1863
|
+
// Record commit hash + verify command for retroactive regression checks
|
|
1864
|
+
try {
|
|
1865
|
+
const commitHash = execSync('git rev-parse HEAD', { cwd, encoding: 'utf8' }).trim();
|
|
1866
|
+
const taskSlug = (suggestion.task || 'unknown').replace(/\s+/g, '-').toLowerCase().slice(0, 40);
|
|
1867
|
+
recordTickCommit(cwd, commitHash, execution.verifyCmd || '', taskSlug);
|
|
1868
|
+
|
|
1869
|
+
// Every 10th tick, run retroactive regression check
|
|
1870
|
+
const registryPath = path.join(cwd, 'atris', 'tick-registry.json');
|
|
1871
|
+
if (fs.existsSync(registryPath)) {
|
|
1872
|
+
try {
|
|
1873
|
+
const registry = JSON.parse(fs.readFileSync(registryPath, 'utf8'));
|
|
1874
|
+
if (Array.isArray(registry) && registry.length % 10 === 0) {
|
|
1875
|
+
const regressionResults = regressionCheck(cwd);
|
|
1876
|
+
const failures = regressionResults.filter(r => !r.pass && !r.skipped);
|
|
1877
|
+
if (failures.length > 0) {
|
|
1878
|
+
// Apply -5 retroactive penalty per failure via journal note
|
|
1879
|
+
for (const f of failures) {
|
|
1880
|
+
appendTickSummary(cwd, {
|
|
1881
|
+
outcome: `Retroactive regression failure: tick ${f.hash.slice(0, 7)} (${f.slug}) verify now fails. -5 penalty.`,
|
|
1882
|
+
horizon: readHorizonSlug(cwd),
|
|
1883
|
+
nextStep: 'investigate regression',
|
|
1884
|
+
reward: -5,
|
|
1885
|
+
});
|
|
1886
|
+
}
|
|
1887
|
+
if (verbose) console.log(` regression check: ${failures.length} failure(s) found`);
|
|
1888
|
+
} else if (verbose) {
|
|
1889
|
+
console.log(` regression check: all ${regressionResults.length} entries pass`);
|
|
1890
|
+
}
|
|
1891
|
+
}
|
|
1892
|
+
} catch { /* registry read failure must not crash */ }
|
|
1893
|
+
}
|
|
1894
|
+
} catch { /* commit recording failure must not crash the tick */ }
|
|
1526
1895
|
if (maybeWriteCompletedEndgameScorecard(cwd, startingEndgame)) {
|
|
1527
1896
|
tickNextStep = 'pick the next horizon';
|
|
1528
1897
|
}
|
|
@@ -1602,6 +1971,152 @@ async function autopilotAtris(description, options = {}) {
|
|
|
1602
1971
|
return { success: completed > 0, completed };
|
|
1603
1972
|
}
|
|
1604
1973
|
|
|
1974
|
+
/**
|
|
1975
|
+
* Compute age in days for a task.
|
|
1976
|
+
* Endgame tasks use the Picked: date from TODO.md Endgame section.
|
|
1977
|
+
* In-progress tasks parse timestamp from Claimed by: field.
|
|
1978
|
+
* Fallback returns 0 (fresh).
|
|
1979
|
+
*/
|
|
1980
|
+
function getTaskAgeDays(task, todoPath) {
|
|
1981
|
+
if (task.claimed) {
|
|
1982
|
+
const tsMatch = task.claimed.match(/\d{4}-\d{2}-\d{2}/);
|
|
1983
|
+
if (tsMatch) {
|
|
1984
|
+
const d = new Date(tsMatch[0]);
|
|
1985
|
+
if (!isNaN(d)) return Math.floor((Date.now() - d.getTime()) / (1000 * 60 * 60 * 24));
|
|
1986
|
+
}
|
|
1987
|
+
}
|
|
1988
|
+
if (task.tag === 'endgame' && todoPath && fs.existsSync(todoPath)) {
|
|
1989
|
+
const content = fs.readFileSync(todoPath, 'utf8');
|
|
1990
|
+
const m = content.match(/\*\*Picked:\*\*\s*(\d{4}-\d{2}-\d{2})/);
|
|
1991
|
+
if (m) {
|
|
1992
|
+
const d = new Date(m[1]);
|
|
1993
|
+
if (!isNaN(d)) return Math.floor((Date.now() - d.getTime()) / (1000 * 60 * 60 * 24));
|
|
1994
|
+
}
|
|
1995
|
+
}
|
|
1996
|
+
return 0;
|
|
1997
|
+
}
|
|
1998
|
+
|
|
1999
|
+
/**
|
|
2000
|
+
* Check whether a task/fact is still actionable.
|
|
2001
|
+
*
|
|
2002
|
+
* @param {{ title: string, age: number, source?: string }} fact
|
|
2003
|
+
* - title: the task or fact description
|
|
2004
|
+
* - age: age in days since the task was created/last verified
|
|
2005
|
+
* - source: optional file path or identifier where the fact originated
|
|
2006
|
+
* @param {string} cwd - workspace root
|
|
2007
|
+
* @returns {'actionable'|'unverified'|'stale'}
|
|
2008
|
+
*/
|
|
2009
|
+
function isStillTrue(fact, cwd) {
|
|
2010
|
+
const { title, age, source } = fact;
|
|
2011
|
+
|
|
2012
|
+
// Fresh tasks are always actionable
|
|
2013
|
+
if (age <= 7) return 'actionable';
|
|
2014
|
+
|
|
2015
|
+
// Extract searchable keywords from the title (skip short/common words)
|
|
2016
|
+
const keywords = title
|
|
2017
|
+
.replace(/[`\[\](){}]/g, '')
|
|
2018
|
+
.split(/[\s/\\.:,;]+/)
|
|
2019
|
+
.filter(w => w.length > 3)
|
|
2020
|
+
.slice(0, 5);
|
|
2021
|
+
|
|
2022
|
+
if (keywords.length === 0) return 'unverified';
|
|
2023
|
+
|
|
2024
|
+
// Strategy 1: If source file is given, check it still exists
|
|
2025
|
+
if (source) {
|
|
2026
|
+
const sourcePath = path.isAbsolute(source) ? source : path.join(cwd, source);
|
|
2027
|
+
if (!fs.existsSync(sourcePath)) return 'stale';
|
|
2028
|
+
}
|
|
2029
|
+
|
|
2030
|
+
// Strategy 2: grep the codebase for key terms from the title
|
|
2031
|
+
let grepHits = 0;
|
|
2032
|
+
for (const kw of keywords) {
|
|
2033
|
+
try {
|
|
2034
|
+
execFileSync('grep', ['-r', '-l', '--include=*.js', '--include=*.md', '-m', '1', kw, '.'], {
|
|
2035
|
+
cwd,
|
|
2036
|
+
stdio: ['ignore', 'pipe', 'ignore'],
|
|
2037
|
+
timeout: 10000
|
|
2038
|
+
});
|
|
2039
|
+
grepHits++;
|
|
2040
|
+
} catch {
|
|
2041
|
+
// grep returns non-zero when no match — that's fine
|
|
2042
|
+
}
|
|
2043
|
+
}
|
|
2044
|
+
|
|
2045
|
+
// If none of the keywords appear in the codebase, it's stale
|
|
2046
|
+
if (grepHits === 0) return 'stale';
|
|
2047
|
+
|
|
2048
|
+
// Strategy 3: check git log for recent activity related to the keywords
|
|
2049
|
+
let gitHits = 0;
|
|
2050
|
+
for (const kw of keywords.slice(0, 3)) {
|
|
2051
|
+
try {
|
|
2052
|
+
const out = execFileSync(
|
|
2053
|
+
'git', ['log', '--oneline', '--since=30 days ago', '--all', `--grep=${kw}`, '-1'],
|
|
2054
|
+
{ cwd, stdio: ['ignore', 'pipe', 'ignore'], timeout: 10000 }
|
|
2055
|
+
).toString().trim();
|
|
2056
|
+
if (out.length > 0) gitHits++;
|
|
2057
|
+
} catch {
|
|
2058
|
+
// git-log failure is non-fatal
|
|
2059
|
+
}
|
|
2060
|
+
}
|
|
2061
|
+
|
|
2062
|
+
// Strong mechanical evidence: grep found terms AND recent git activity
|
|
2063
|
+
if (gitHits > 0) return 'actionable';
|
|
2064
|
+
|
|
2065
|
+
// Grep found terms but no recent git activity — can't fully verify
|
|
2066
|
+
return 'unverified';
|
|
2067
|
+
}
|
|
2068
|
+
|
|
2069
|
+
/**
|
|
2070
|
+
* Ask a local model whether a task/fact is still relevant.
|
|
2071
|
+
* Called when isStillTrue returns 'unverified' — the mechanical check
|
|
2072
|
+
* couldn't confirm or deny, so we ask claude -p to inspect the codebase.
|
|
2073
|
+
*
|
|
2074
|
+
* @param {{ title: string, age: number, source?: string }} fact
|
|
2075
|
+
* @param {string} cwd - workspace root
|
|
2076
|
+
* @returns {{ fresh: boolean, reasoning: string }}
|
|
2077
|
+
*/
|
|
2078
|
+
function askModel(fact, cwd) {
|
|
2079
|
+
const { title, source } = fact;
|
|
2080
|
+
const sourceHint = source ? `\nOriginal source file: ${source}` : '';
|
|
2081
|
+
const prompt = `You are a staleness checker. Answer with exactly one line: YES or NO, followed by a short reason (under 30 words).
|
|
2082
|
+
|
|
2083
|
+
Is this task still relevant to the codebase? Check for the mentioned files, functions, or patterns.
|
|
2084
|
+
|
|
2085
|
+
Task: "${title}"${sourceHint}
|
|
2086
|
+
|
|
2087
|
+
Search the codebase to verify. Reply: YES <reason> or NO <reason>`;
|
|
2088
|
+
|
|
2089
|
+
const tmpFile = path.join(cwd, '.staleness-prompt.tmp');
|
|
2090
|
+
fs.writeFileSync(tmpFile, prompt);
|
|
2091
|
+
|
|
2092
|
+
try {
|
|
2093
|
+
const env = { ...process.env };
|
|
2094
|
+
delete env.CLAUDECODE;
|
|
2095
|
+
const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Glob,Grep"`;
|
|
2096
|
+
const output = execSync(cmd, {
|
|
2097
|
+
cwd,
|
|
2098
|
+
encoding: 'utf8',
|
|
2099
|
+
timeout: 60000,
|
|
2100
|
+
stdio: 'pipe',
|
|
2101
|
+
maxBuffer: 2 * 1024 * 1024,
|
|
2102
|
+
env
|
|
2103
|
+
}).trim();
|
|
2104
|
+
|
|
2105
|
+
try { fs.unlinkSync(tmpFile); } catch {}
|
|
2106
|
+
|
|
2107
|
+
// Parse YES/NO from the first line of output
|
|
2108
|
+
const firstLine = output.split('\n').find(l => /^\s*(YES|NO)\b/i.test(l)) || output.split('\n')[0] || '';
|
|
2109
|
+
const fresh = /^\s*YES\b/i.test(firstLine);
|
|
2110
|
+
const reasoning = firstLine.replace(/^\s*(YES|NO)\s*/i, '').trim() || output.slice(0, 200);
|
|
2111
|
+
|
|
2112
|
+
return { fresh, reasoning };
|
|
2113
|
+
} catch (err) {
|
|
2114
|
+
try { fs.unlinkSync(tmpFile); } catch {}
|
|
2115
|
+
// On timeout or crash, treat as unverifiable — conservative default
|
|
2116
|
+
return { fresh: false, reasoning: `Model check failed: ${(err.message || '').slice(0, 100)}` };
|
|
2117
|
+
}
|
|
2118
|
+
}
|
|
2119
|
+
|
|
1605
2120
|
/**
|
|
1606
2121
|
* Entry point when called without a description.
|
|
1607
2122
|
*/
|
|
@@ -1611,18 +2126,26 @@ async function autopilotFromTodo(options = {}) {
|
|
|
1611
2126
|
|
|
1612
2127
|
module.exports = {
|
|
1613
2128
|
appendTickSummary,
|
|
2129
|
+
askHuman,
|
|
2130
|
+
askModel,
|
|
1614
2131
|
autopilotAtris,
|
|
1615
2132
|
autopilotFromTodo,
|
|
1616
2133
|
buildPrompt,
|
|
2134
|
+
isLessonResolved,
|
|
2135
|
+
isStillTrue,
|
|
2136
|
+
getTaskAgeDays,
|
|
1617
2137
|
getIdleTickCount,
|
|
1618
2138
|
getRecentSignals,
|
|
1619
2139
|
getTickStatus,
|
|
1620
2140
|
getVerifyCommand,
|
|
1621
2141
|
computeTickReward,
|
|
2142
|
+
verifyJudgeIntegrity,
|
|
1622
2143
|
maybeWriteCompletedEndgameScorecard,
|
|
1623
2144
|
renderHumanSuggestion,
|
|
1624
2145
|
renderHumanTickIntro,
|
|
1625
2146
|
proposeCandidateHorizons,
|
|
2147
|
+
recordTickCommit,
|
|
2148
|
+
regressionCheck,
|
|
1626
2149
|
runTaskOnce,
|
|
1627
2150
|
scoreEndgameCandidates,
|
|
1628
2151
|
suggestNextTask,
|