atris 3.2.0 → 3.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/GETTING_STARTED.md +65 -131
- package/README.md +18 -2
- package/atris/GETTING_STARTED.md +65 -131
- package/atris/PERSONA.md +5 -1
- package/atris/atris.md +122 -153
- package/atris/skills/aeo/SKILL.md +117 -0
- package/atris/skills/atris/SKILL.md +49 -25
- package/atris/skills/create-member/SKILL.md +29 -9
- package/atris/skills/endgame/SKILL.md +9 -0
- package/atris/skills/research-search/SKILL.md +167 -0
- package/atris/skills/research-search/arxiv_search.py +157 -0
- package/atris/skills/research-search/program.md +48 -0
- package/atris/skills/research-search/results.tsv +6 -0
- package/atris/skills/research-search/scholar_search.py +154 -0
- package/atris/skills/tidy/SKILL.md +36 -21
- package/atris/team/_template/MEMBER.md +2 -0
- package/atris/team/validator/MEMBER.md +35 -1
- package/atris.md +118 -178
- package/bin/atris.js +46 -12
- package/cli/__pycache__/atris_code.cpython-314.pyc +0 -0
- package/cli/__pycache__/runtime_guard.cpython-312.pyc +0 -0
- package/cli/__pycache__/runtime_guard.cpython-314.pyc +0 -0
- package/cli/atris_code.py +889 -0
- package/cli/runtime_guard.py +693 -0
- package/commands/align.js +16 -0
- package/commands/app.js +316 -0
- package/commands/autopilot.js +863 -23
- package/commands/brainstorm.js +7 -5
- package/commands/business.js +677 -2
- package/commands/clean.js +19 -3
- package/commands/computer.js +2022 -43
- package/commands/context-sync.js +5 -0
- package/commands/integrations.js +14 -9
- package/commands/lifecycle.js +12 -0
- package/commands/plugin.js +24 -0
- package/commands/pull.js +86 -11
- package/commands/push.js +153 -9
- package/commands/serve.js +1 -0
- package/commands/sync.js +272 -76
- package/commands/verify.js +50 -1
- package/commands/wiki.js +27 -2
- package/commands/workflow.js +24 -9
- package/lib/file-ops.js +13 -1
- package/lib/journal.js +23 -0
- package/lib/manifest.js +3 -0
- package/lib/scorecard.js +42 -4
- package/lib/sync-telemetry.js +59 -0
- package/lib/todo.js +6 -0
- package/lib/wiki.js +150 -6
- package/lib/workspace-safety.js +87 -0
- package/package.json +2 -1
- package/utils/api.js +19 -0
- package/utils/auth.js +25 -1
- package/utils/config.js +24 -0
- package/utils/update-check.js +16 -0
package/commands/autopilot.js
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
const fs = require('fs');
|
|
10
10
|
const path = require('path');
|
|
11
|
-
const { execSync, execFileSync } = require('child_process');
|
|
11
|
+
const { execSync, execFileSync, spawnSync } = require('child_process');
|
|
12
12
|
const readline = require('readline');
|
|
13
13
|
const { getLogPath, ensureLogDirectory, createLogFile } = require('../lib/journal');
|
|
14
14
|
const { parseTodo } = require('../lib/todo');
|
|
@@ -73,7 +73,8 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
|
|
|
73
73
|
task: `Re-read sources and update ${pageName}`,
|
|
74
74
|
why: `"${sp.staleSource}" changed on ${sp.sourceDate} but the page was last compiled ${sp.compiledDate}. The content may be wrong.`,
|
|
75
75
|
kind: 'staleness',
|
|
76
|
-
priority: 2
|
|
76
|
+
priority: 2,
|
|
77
|
+
skipKey: key
|
|
77
78
|
});
|
|
78
79
|
break;
|
|
79
80
|
}
|
|
@@ -87,7 +88,8 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
|
|
|
87
88
|
task: `Finish or remove stale task: ${st.title}`,
|
|
88
89
|
why: `Claimed ${st.daysSinceClaim} days ago and never completed. Either finish it or delete it — stale tasks add noise.`,
|
|
89
90
|
kind: 'cleanup',
|
|
90
|
-
priority: 3
|
|
91
|
+
priority: 3,
|
|
92
|
+
skipKey: key
|
|
91
93
|
});
|
|
92
94
|
}
|
|
93
95
|
|
|
@@ -103,6 +105,23 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
|
|
|
103
105
|
});
|
|
104
106
|
}
|
|
105
107
|
|
|
108
|
+
// --- Self-healing: unresolved fail lessons (bug still present per grep) ---
|
|
109
|
+
if (!skipped.has('self-heal')) {
|
|
110
|
+
const failLesson = pickUnresolvedFailLesson(cwd);
|
|
111
|
+
if (failLesson && !skipped.has(`self-heal:${failLesson.slug}`)) {
|
|
112
|
+
suggestions.push({
|
|
113
|
+
task: `Fix unresolved fail lesson: ${failLesson.slug}`,
|
|
114
|
+
why: `Lesson from ${failLesson.date} tagged \`fail\` and grep confirms the bug pattern is still present in-repo. Self-heal before taking new work.`,
|
|
115
|
+
kind: 'self-heal',
|
|
116
|
+
priority: 4.5,
|
|
117
|
+
lessonLine: failLesson.line,
|
|
118
|
+
lessonSlug: failLesson.slug,
|
|
119
|
+
lessonDate: failLesson.date,
|
|
120
|
+
skipKey: `self-heal:${failLesson.slug}`
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
106
125
|
// --- Backlog tasks ---
|
|
107
126
|
for (const t of todo.backlog) {
|
|
108
127
|
if (t.tags && t.tags.includes('unverified')) continue;
|
|
@@ -117,6 +136,14 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
|
|
|
117
136
|
break;
|
|
118
137
|
}
|
|
119
138
|
|
|
139
|
+
// --- Proactive "surprise me" anomalies (didn't-ask-but-noticed signals) ---
|
|
140
|
+
try {
|
|
141
|
+
for (const anomaly of scanAnomalies(cwd)) {
|
|
142
|
+
if (anomaly.skipKey && skipped.has(anomaly.skipKey)) continue;
|
|
143
|
+
suggestions.push(anomaly);
|
|
144
|
+
}
|
|
145
|
+
} catch { /* anomaly scanner must never crash the tick */ }
|
|
146
|
+
|
|
120
147
|
// --- Unprocessed inbox items ---
|
|
121
148
|
const { logFile } = getLogPath();
|
|
122
149
|
if (fs.existsSync(logFile)) {
|
|
@@ -492,6 +519,27 @@ Read today's journal completions and the git log from the past few days.
|
|
|
492
519
|
Extract patterns worth remembering — things that surprised you, approaches that worked,
|
|
493
520
|
mistakes that were caught. Append to atris/lessons.md. One line per lesson. Be specific.
|
|
494
521
|
|
|
522
|
+
When done, reply: done.`;
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
if (kind === 'self-heal') {
|
|
526
|
+
const { lessonLine = '', lessonSlug = '' } = context;
|
|
527
|
+
const lessonBlock = lessonLine ? `\nUnresolved fail lesson:\n${lessonLine}\n` : '';
|
|
528
|
+
return `${baseRules}${lessonBlock}
|
|
529
|
+
Self-heal task: ${task}
|
|
530
|
+
|
|
531
|
+
This is an unresolved \`fail\` lesson from atris/lessons.md. grep confirms the bug pattern
|
|
532
|
+
is still present in-repo — the fix has NOT been shipped yet.
|
|
533
|
+
|
|
534
|
+
Plan the smallest fix:
|
|
535
|
+
1. Parse the lesson for file:line references and the described bug pattern.
|
|
536
|
+
2. Read those files to confirm the bug is exactly as described (or has drifted).
|
|
537
|
+
3. Write ONE task in atris/TODO.md with:
|
|
538
|
+
- **Exit:** the specific behavior that proves the fix
|
|
539
|
+
- **Verify:** a command that fails now and will pass after the fix${lessonSlug ? ` (include "${lessonSlug}" in the task title so the lesson auto-resolves)` : ''}
|
|
540
|
+
- **Rollback:** how to revert if the fix misses
|
|
541
|
+
4. Do NOT fix it in this phase — planner only. The executor will do the work.
|
|
542
|
+
|
|
495
543
|
When done, reply: done.`;
|
|
496
544
|
}
|
|
497
545
|
|
|
@@ -527,6 +575,32 @@ Task: ${task}
|
|
|
527
575
|
4. Update MAP.md only if file locations truly shifted because of your change.
|
|
528
576
|
5. If updating wiki pages, set last_compiled in frontmatter to today's date.
|
|
529
577
|
|
|
578
|
+
When done, reply: done.`;
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
if (kind === 'self-heal') {
|
|
582
|
+
const { lessonLine = '', lessonSlug = '' } = context;
|
|
583
|
+
const lessonBlock = lessonLine ? `\nUnresolved fail lesson:\n${lessonLine}\n` : '';
|
|
584
|
+
return `You are the executor. Read your MEMBER.md spec first if available.
|
|
585
|
+
|
|
586
|
+
Rules:
|
|
587
|
+
- You CAN read and write code. You CANNOT plan or create new tasks.
|
|
588
|
+
- Execute ONE step at a time. Verify each step before moving on.
|
|
589
|
+
- Check MAP.md for file locations before grepping.
|
|
590
|
+
- Stay in scope. Only fix the bug described in the lesson — no side quests.
|
|
591
|
+
|
|
592
|
+
Read these files first:
|
|
593
|
+
${readFiles}
|
|
594
|
+
${lessonBlock}
|
|
595
|
+
Self-heal task: ${task}
|
|
596
|
+
|
|
597
|
+
1. Find the self-heal task in TODO.md and claim it (Claimed by: Executor at ${new Date().toISOString()}).
|
|
598
|
+
2. Parse the lesson above for file:line references. Open those files and locate the bug pattern.
|
|
599
|
+
3. Make the smallest change that removes the bug pattern AND makes the lesson's Verify command pass.
|
|
600
|
+
4. Run the Verify command yourself to confirm it passes.
|
|
601
|
+
5. Update MAP.md only if file:line locations shifted because of your fix.
|
|
602
|
+
6. Commit: git add <specific-files> && git commit -m "fix: ${lessonSlug || 'self-heal'}"
|
|
603
|
+
|
|
530
604
|
When done, reply: done.`;
|
|
531
605
|
}
|
|
532
606
|
|
|
@@ -708,14 +782,46 @@ function regressionCheck(cwd) {
|
|
|
708
782
|
*/
|
|
709
783
|
function getVerifyCommand(cwd, taskTitle) {
|
|
710
784
|
const todoPath = path.join(cwd, 'atris', 'TODO.md');
|
|
711
|
-
if (
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
785
|
+
if (fs.existsSync(todoPath)) {
|
|
786
|
+
const todo = parseTodo(todoPath);
|
|
787
|
+
const task = [...todo.inProgress, ...todo.backlog, ...todo.completed]
|
|
788
|
+
.find(t => t.title === taskTitle);
|
|
789
|
+
if (task && task.verify) return { cmd: task.verify, explicit: true };
|
|
790
|
+
}
|
|
791
|
+
// Fallback: detect repo shape and pick a sensible default.
|
|
792
|
+
// Reactive tasks (inbox/staleness/imagined) don't carry explicit verify fields,
|
|
793
|
+
// so without shape detection they get `npm test` even on Python/Rust/Go repos.
|
|
794
|
+
return { cmd: detectDefaultVerify(cwd), explicit: false };
|
|
795
|
+
}
|
|
716
796
|
|
|
717
|
-
|
|
718
|
-
|
|
797
|
+
/**
|
|
798
|
+
* Infer a default verify command from the repo shape. Order matters:
|
|
799
|
+
* package.json with a non-stub test script → `npm test`; then pytest/python;
|
|
800
|
+
* then rust/go; otherwise null (no default — skip verify).
|
|
801
|
+
*/
|
|
802
|
+
function detectDefaultVerify(cwd) {
|
|
803
|
+
const pkg = path.join(cwd, 'package.json');
|
|
804
|
+
if (fs.existsSync(pkg)) {
|
|
805
|
+
try {
|
|
806
|
+
const parsed = JSON.parse(fs.readFileSync(pkg, 'utf8'));
|
|
807
|
+
const test = parsed.scripts && parsed.scripts.test;
|
|
808
|
+
if (test && test !== 'echo "Error: no test specified" && exit 1') {
|
|
809
|
+
return 'npm test';
|
|
810
|
+
}
|
|
811
|
+
} catch { /* fall through */ }
|
|
812
|
+
}
|
|
813
|
+
if (fs.existsSync(path.join(cwd, 'pytest.ini')) ||
|
|
814
|
+
fs.existsSync(path.join(cwd, 'pyproject.toml')) ||
|
|
815
|
+
fs.existsSync(path.join(cwd, 'setup.py'))) {
|
|
816
|
+
return 'pytest';
|
|
817
|
+
}
|
|
818
|
+
if (fs.existsSync(path.join(cwd, 'Cargo.toml'))) {
|
|
819
|
+
return 'cargo test';
|
|
820
|
+
}
|
|
821
|
+
if (fs.existsSync(path.join(cwd, 'go.mod'))) {
|
|
822
|
+
return 'go test ./...';
|
|
823
|
+
}
|
|
824
|
+
return null;
|
|
719
825
|
}
|
|
720
826
|
|
|
721
827
|
/**
|
|
@@ -731,6 +837,303 @@ function verifyJudgeIntegrity() {
|
|
|
731
837
|
return { ok: actual === REWARD_CHECKSUM, expected: REWARD_CHECKSUM, actual };
|
|
732
838
|
}
|
|
733
839
|
|
|
840
|
+
/**
|
|
841
|
+
* Build the validator's plan-review prompt. Fresh context — the validator
|
|
842
|
+
* reads the plan output and the contract fields as if it has never seen them.
|
|
843
|
+
*/
|
|
844
|
+
function buildPlanReviewPrompt(context, planOutput) {
|
|
845
|
+
const files = Array.isArray(context.files) && context.files.length
|
|
846
|
+
? context.files.join(', ')
|
|
847
|
+
: 'none declared in context';
|
|
848
|
+
return `You are the validator in plan-review mode. You have NOT seen the planning context — read everything fresh.
|
|
849
|
+
|
|
850
|
+
Task: "${context.task}"
|
|
851
|
+
Kind: ${context.kind || 'unknown'}
|
|
852
|
+
Files declared in context: ${files}
|
|
853
|
+
|
|
854
|
+
Plan output from the navigator:
|
|
855
|
+
---
|
|
856
|
+
${planOutput || '(no plan output captured)'}
|
|
857
|
+
---
|
|
858
|
+
|
|
859
|
+
Read from disk:
|
|
860
|
+
- atris/atris.md (the workspace protocol — operating rules and task shape)
|
|
861
|
+
- atris/TODO.md (find this task; inspect Files, Exit, Verify, After, Rollback)
|
|
862
|
+
- atris/lessons.md (recent failures — last 20 lines)
|
|
863
|
+
|
|
864
|
+
Decide if the plan is safe to execute. Check:
|
|
865
|
+
1. Verify points at a falsifiable rubric or test (not \`true\`, \`echo ok\`, or similar).
|
|
866
|
+
Prefer \`atris verify <slug> --section <name>\`.
|
|
867
|
+
2. Files are explicitly declared (not empty, not vague).
|
|
868
|
+
3. Rollback is named (commit, checkpoint, or \`git revert\`).
|
|
869
|
+
4. The plan's claims match the declared Task fields.
|
|
870
|
+
5. Nothing in lessons.md contradicts this plan.
|
|
871
|
+
|
|
872
|
+
Output EXACTLY one of these two formats as the LAST thing in your response. No preamble before the verdict line.
|
|
873
|
+
|
|
874
|
+
SIGNOFF: <one sentence on why the plan is safe>
|
|
875
|
+
|
|
876
|
+
or
|
|
877
|
+
|
|
878
|
+
REJECT: <one sentence on what is wrong>
|
|
879
|
+
FIX: <one sentence on what must change>
|
|
880
|
+
PROPOSED:
|
|
881
|
+
Files: <concrete path list, or omit this line if original is fine>
|
|
882
|
+
Exit: <sharp observable done condition, or omit this line if original is fine>
|
|
883
|
+
Verify: <falsifiable shell command, or omit this line if original is fine>
|
|
884
|
+
Rollback: <git revert <sha> or concrete checkpoint, or omit this line if original is fine>
|
|
885
|
+
|
|
886
|
+
Be a drafting partner, not just a critic. When you REJECT, write the PROPOSED block as a concrete draft the human can accept as-is, edit, or reject. Include each PROPOSED line only for fields that need changing; skip a line if the original is correct. Omit the entire PROPOSED block only if the rejection is about scope or intent rather than a draftable field.
|
|
887
|
+
`;
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
/**
|
|
891
|
+
* Parse the validator's verdict line(s) from their output. Returns one of:
|
|
892
|
+
* { verdict: 'SIGNOFF', reason }
|
|
893
|
+
* { verdict: 'REJECT', reason, fix }
|
|
894
|
+
* If neither format is present, treats it as a REJECT with a parse-fail reason.
|
|
895
|
+
*/
|
|
896
|
+
function parseVerdict(output) {
|
|
897
|
+
const text = String(output || '');
|
|
898
|
+
const rawLines = text.split('\n');
|
|
899
|
+
const lines = rawLines.map((l) => l.trim()).filter(Boolean);
|
|
900
|
+
// Scan from the end backwards — the verdict is supposed to be LAST.
|
|
901
|
+
for (let i = lines.length - 1; i >= 0; i--) {
|
|
902
|
+
const line = lines[i];
|
|
903
|
+
if (/^SIGNOFF\s*:/i.test(line)) {
|
|
904
|
+
return { verdict: 'SIGNOFF', reason: line.replace(/^SIGNOFF\s*:\s*/i, ''), fix: '', proposed: null };
|
|
905
|
+
}
|
|
906
|
+
if (/^REJECT\s*:/i.test(line)) {
|
|
907
|
+
const reason = line.replace(/^REJECT\s*:\s*/i, '');
|
|
908
|
+
// Fix line is usually immediately after REJECT.
|
|
909
|
+
const tail = lines.slice(i);
|
|
910
|
+
const fixLine = tail.find((l) => /^FIX\s*:/i.test(l));
|
|
911
|
+
const fix = fixLine ? fixLine.replace(/^FIX\s*:\s*/i, '') : '';
|
|
912
|
+
const proposed = parseProposedBlock(rawLines.slice(rawLines.findIndex((l) => /PROPOSED\s*:/i.test(l))));
|
|
913
|
+
return { verdict: 'REJECT', reason, fix, proposed };
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
return {
|
|
917
|
+
verdict: 'REJECT',
|
|
918
|
+
reason: 'validator output did not contain SIGNOFF or REJECT',
|
|
919
|
+
fix: 'ensure validator emits machine-parseable verdict as the last line',
|
|
920
|
+
proposed: null,
|
|
921
|
+
};
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
/**
|
|
925
|
+
* Parse the PROPOSED block: 4 optional indented fields (Files, Exit, Verify,
|
|
926
|
+
* Rollback). Returns null if no block, or an object with only the fields the
|
|
927
|
+
* validator chose to propose.
|
|
928
|
+
*/
|
|
929
|
+
function parseProposedBlock(lines) {
|
|
930
|
+
if (!lines || !lines.length || !/PROPOSED\s*:/i.test(lines[0] || '')) return null;
|
|
931
|
+
const proposed = {};
|
|
932
|
+
const fieldMatchers = {
|
|
933
|
+
files: /^\s*Files\s*:\s*(.+)$/i,
|
|
934
|
+
exit: /^\s*Exit\s*:\s*(.+)$/i,
|
|
935
|
+
verify: /^\s*Verify\s*:\s*(.+)$/i,
|
|
936
|
+
rollback: /^\s*Rollback\s*:\s*(.+)$/i,
|
|
937
|
+
};
|
|
938
|
+
for (let j = 1; j < lines.length; j++) {
|
|
939
|
+
const raw = lines[j];
|
|
940
|
+
// Stop at a blank line or a new top-level marker (no leading whitespace
|
|
941
|
+
// and a known verb). Keep scanning through indented lines.
|
|
942
|
+
if (/^\S/.test(raw) && !/^(Files|Exit|Verify|Rollback)\s*:/i.test(raw)) break;
|
|
943
|
+
for (const [key, matcher] of Object.entries(fieldMatchers)) {
|
|
944
|
+
const m = raw.match(matcher);
|
|
945
|
+
if (m) proposed[key] = m[1].trim();
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
return Object.keys(proposed).length ? proposed : null;
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
/**
|
|
952
|
+
* Default executor for plan-review: spawn a fresh claude -p call.
|
|
953
|
+
* Kept thin so tests can inject a stub via options.planReviewExec.
|
|
954
|
+
*/
|
|
955
|
+
function defaultPlanReviewExecutor(prompt, { cwd, timeout = 180000 } = {}) {
|
|
956
|
+
const tmpFile = path.join(cwd, '.autopilot-plan-review.tmp');
|
|
957
|
+
fs.writeFileSync(tmpFile, prompt);
|
|
958
|
+
try {
|
|
959
|
+
const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Grep,Glob"`;
|
|
960
|
+
const env = { ...process.env };
|
|
961
|
+
delete env.CLAUDECODE;
|
|
962
|
+
const output = execSync(cmd, {
|
|
963
|
+
cwd,
|
|
964
|
+
encoding: 'utf8',
|
|
965
|
+
timeout,
|
|
966
|
+
stdio: 'pipe',
|
|
967
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
968
|
+
env,
|
|
969
|
+
});
|
|
970
|
+
return output || '';
|
|
971
|
+
} catch (err) {
|
|
972
|
+
if (err.stdout) return err.stdout;
|
|
973
|
+
throw err;
|
|
974
|
+
} finally {
|
|
975
|
+
try { fs.unlinkSync(tmpFile); } catch {}
|
|
976
|
+
}
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
/**
|
|
980
|
+
* Default executor for codex: spawn `codex` with the prompt via stdin.
|
|
981
|
+
* Users can override with ATRIS_CODEX_CMD env var; tests inject via options.codexExec.
|
|
982
|
+
*/
|
|
983
|
+
function defaultCodexExecutor(prompt, { cwd, timeout = 180000 } = {}) {
|
|
984
|
+
const cmd = process.env.ATRIS_CODEX_CMD || 'codex';
|
|
985
|
+
const proc = spawnSync(cmd, ['-p', prompt], {
|
|
986
|
+
cwd,
|
|
987
|
+
encoding: 'utf8',
|
|
988
|
+
timeout,
|
|
989
|
+
stdio: 'pipe',
|
|
990
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
991
|
+
});
|
|
992
|
+
if (proc.status !== 0 && !proc.stdout) {
|
|
993
|
+
throw new Error(`codex exited with status ${proc.status}: ${proc.stderr || 'no output'}`);
|
|
994
|
+
}
|
|
995
|
+
return proc.stdout || '';
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
/**
|
|
999
|
+
* Check if codex is available on PATH (or ATRIS_CODEX_CMD points to something runnable).
|
|
1000
|
+
* Kept simple: `which` probe. Tests override via options.hasCodex.
|
|
1001
|
+
*/
|
|
1002
|
+
function hasCodex() {
|
|
1003
|
+
const cmd = process.env.ATRIS_CODEX_CMD || 'codex';
|
|
1004
|
+
try {
|
|
1005
|
+
const r = spawnSync('which', [cmd], { stdio: 'pipe' });
|
|
1006
|
+
return r.status === 0;
|
|
1007
|
+
} catch {
|
|
1008
|
+
return false;
|
|
1009
|
+
}
|
|
1010
|
+
}
|
|
1011
|
+
|
|
1012
|
+
/**
|
|
1013
|
+
* Run plan-review: the validator (and optionally codex) read the plan and
|
|
1014
|
+
* decide if it is safe to execute. Returns { verdict, reason, fix, signers, notes }.
|
|
1015
|
+
*
|
|
1016
|
+
* Codex is invoked only when the task explicitly opts in:
|
|
1017
|
+
* - env ATRIS_USE_CODEX=1, or
|
|
1018
|
+
* - context.tags includes 'codex', or
|
|
1019
|
+
* - context.kind === 'endgame' AND context.tags includes 'gray' or 'high-risk'
|
|
1020
|
+
*
|
|
1021
|
+
* If codex is opted-in but not installed, we skip gracefully and surface a note.
|
|
1022
|
+
* If both signers run and disagree, verdict is REJECT with both opinions in reason.
|
|
1023
|
+
*/
|
|
1024
|
+
function runPlanReview({ cwd, context, planOutput, options = {} }) {
|
|
1025
|
+
const prompt = buildPlanReviewPrompt(context, planOutput);
|
|
1026
|
+
const tags = Array.isArray(context.tags) ? context.tags : [];
|
|
1027
|
+
|
|
1028
|
+
// Primary signer: validator.
|
|
1029
|
+
const validatorExec = options.planReviewExec || defaultPlanReviewExecutor;
|
|
1030
|
+
const validatorOutput = validatorExec(prompt, { cwd, role: 'validator' });
|
|
1031
|
+
const primary = parseVerdict(validatorOutput);
|
|
1032
|
+
|
|
1033
|
+
// Codex: opted in explicitly, not inferred.
|
|
1034
|
+
const codexOptIn =
|
|
1035
|
+
process.env.ATRIS_USE_CODEX === '1' ||
|
|
1036
|
+
tags.includes('codex') ||
|
|
1037
|
+
tags.includes('gray') ||
|
|
1038
|
+
tags.includes('high-risk');
|
|
1039
|
+
|
|
1040
|
+
if (!codexOptIn) {
|
|
1041
|
+
return { ...primary, signers: ['validator'], proposed: primary.proposed || null };
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
const codexCheck = options.hasCodex != null ? options.hasCodex : hasCodex();
|
|
1045
|
+
if (!codexCheck) {
|
|
1046
|
+
return {
|
|
1047
|
+
...primary,
|
|
1048
|
+
signers: ['validator'],
|
|
1049
|
+
proposed: primary.proposed || null,
|
|
1050
|
+
notes: 'codex was requested but not on PATH; skipped gracefully',
|
|
1051
|
+
};
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
const codexExec = options.codexExec || defaultCodexExecutor;
|
|
1055
|
+
let codexOutput;
|
|
1056
|
+
try {
|
|
1057
|
+
codexOutput = codexExec(prompt, { cwd, role: 'codex' });
|
|
1058
|
+
} catch (err) {
|
|
1059
|
+
return {
|
|
1060
|
+
...primary,
|
|
1061
|
+
signers: ['validator'],
|
|
1062
|
+
notes: `codex invocation failed: ${err.message}; falling back to single signer`,
|
|
1063
|
+
};
|
|
1064
|
+
}
|
|
1065
|
+
const codex = parseVerdict(codexOutput);
|
|
1066
|
+
|
|
1067
|
+
if (primary.verdict === 'SIGNOFF' && codex.verdict === 'SIGNOFF') {
|
|
1068
|
+
return {
|
|
1069
|
+
verdict: 'SIGNOFF',
|
|
1070
|
+
reason: primary.reason,
|
|
1071
|
+
fix: '',
|
|
1072
|
+
proposed: null,
|
|
1073
|
+
signers: ['validator', 'codex'],
|
|
1074
|
+
};
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
// Any disagreement or joint reject → halt with both opinions surfaced.
|
|
1078
|
+
// If either signer wrote a PROPOSED draft, surface the validator's first
|
|
1079
|
+
// (or codex's if validator didn't propose one).
|
|
1080
|
+
return {
|
|
1081
|
+
verdict: 'REJECT',
|
|
1082
|
+
reason: `Split verdict. validator=${primary.verdict} (${primary.reason || 'no reason'}); codex=${codex.verdict} (${codex.reason || 'no reason'}).`,
|
|
1083
|
+
fix: primary.fix || codex.fix || 'reconcile the two signers before re-planning',
|
|
1084
|
+
proposed: primary.proposed || codex.proposed || null,
|
|
1085
|
+
signers: ['validator', 'codex'],
|
|
1086
|
+
split: true,
|
|
1087
|
+
};
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
/**
|
|
1091
|
+
* Append a plan-review rejection to today's journal under ## Notes.
|
|
1092
|
+
* Intentionally does NOT write to lessons.md — rejections only become lessons
|
|
1093
|
+
* if a human spots a reusable failure pattern.
|
|
1094
|
+
*/
|
|
1095
|
+
function appendPlanRejection(cwd, context, review) {
|
|
1096
|
+
try {
|
|
1097
|
+
// Compute the journal path from the passed cwd so tests and isolated
|
|
1098
|
+
// workspaces both work. getLogPath() resolves against process.cwd()
|
|
1099
|
+
// which isn't always the task's workspace.
|
|
1100
|
+
const date = new Date();
|
|
1101
|
+
const year = date.getFullYear();
|
|
1102
|
+
const month = String(date.getMonth() + 1).padStart(2, '0');
|
|
1103
|
+
const day = String(date.getDate()).padStart(2, '0');
|
|
1104
|
+
const logFile = path.join(cwd, 'atris', 'logs', String(year), `${year}-${month}-${day}.md`);
|
|
1105
|
+
if (!fs.existsSync(logFile)) return;
|
|
1106
|
+
const now = new Date().toISOString().slice(0, 16).replace('T', ' ');
|
|
1107
|
+
const signers = (review.signers || []).join(' + ');
|
|
1108
|
+
const proposedBlock = review.proposed
|
|
1109
|
+
? `**Proposed draft:**\n` +
|
|
1110
|
+
(review.proposed.files ? `- Files: ${review.proposed.files}\n` : '') +
|
|
1111
|
+
(review.proposed.exit ? `- Exit: ${review.proposed.exit}\n` : '') +
|
|
1112
|
+
(review.proposed.verify ? `- Verify: ${review.proposed.verify}\n` : '') +
|
|
1113
|
+
(review.proposed.rollback ? `- Rollback: ${review.proposed.rollback}\n` : '')
|
|
1114
|
+
: '';
|
|
1115
|
+
const block =
|
|
1116
|
+
`\n### Plan rejected — ${now}\n\n` +
|
|
1117
|
+
`**Task:** ${context.task}\n` +
|
|
1118
|
+
`**Signers:** ${signers}\n` +
|
|
1119
|
+
`**Reason:** ${review.reason}\n` +
|
|
1120
|
+
(review.fix ? `**Fix:** ${review.fix}\n` : '') +
|
|
1121
|
+
(proposedBlock ? `${proposedBlock}` : '') +
|
|
1122
|
+
(review.notes ? `**Notes:** ${review.notes}\n` : '');
|
|
1123
|
+
let content = fs.readFileSync(logFile, 'utf8');
|
|
1124
|
+
const notesIdx = content.indexOf('## Notes');
|
|
1125
|
+
if (notesIdx === -1) {
|
|
1126
|
+
content = content.replace(/\s*$/, '') + `\n\n## Notes\n${block}\n`;
|
|
1127
|
+
} else {
|
|
1128
|
+
const eol = content.indexOf('\n', notesIdx);
|
|
1129
|
+
content = content.slice(0, eol + 1) + block + content.slice(eol + 1);
|
|
1130
|
+
}
|
|
1131
|
+
fs.writeFileSync(logFile, content);
|
|
1132
|
+
} catch {
|
|
1133
|
+
// journaling must never crash the tick
|
|
1134
|
+
}
|
|
1135
|
+
}
|
|
1136
|
+
|
|
734
1137
|
function runTaskOnce(context, options = {}) {
|
|
735
1138
|
const { verbose = false, cwd = process.cwd() } = options;
|
|
736
1139
|
|
|
@@ -754,10 +1157,11 @@ function runTaskOnce(context, options = {}) {
|
|
|
754
1157
|
const verifyResult = getVerifyCommand(cwd, context.task);
|
|
755
1158
|
const verifyCmd = verifyResult.cmd;
|
|
756
1159
|
|
|
757
|
-
// Guard:
|
|
758
|
-
|
|
1160
|
+
// Guard: endgame tasks must have an explicit Verify field.
|
|
1161
|
+
// Reactive signals (inbox, staleness, imagined) use npm test as default.
|
|
1162
|
+
if (!verifyResult.explicit && context.kind === 'endgame') {
|
|
759
1163
|
writeLesson(cwd, 'no-verify-field', 'fail',
|
|
760
|
-
`Task "${context.task}" has no explicit **Verify:** field in TODO.md. Tick halted — every task must declare how to verify it.`);
|
|
1164
|
+
`Task "${context.task}" has no explicit **Verify:** field in TODO.md. Tick halted — every endgame task must declare how to verify it.`);
|
|
761
1165
|
return {
|
|
762
1166
|
outcome: 'halted',
|
|
763
1167
|
reason: 'no-verify-field',
|
|
@@ -768,10 +1172,92 @@ function runTaskOnce(context, options = {}) {
|
|
|
768
1172
|
};
|
|
769
1173
|
}
|
|
770
1174
|
|
|
771
|
-
|
|
1175
|
+
// Falsifiability gate (endgame + explicit Verify only).
|
|
1176
|
+
// Run Verify BEFORE the work. If it passes, the rubric is trivial or the
|
|
1177
|
+
// task is already done — either way, halt. This is the keystone that makes
|
|
1178
|
+
// Verify load-bearing. The cmd is captured here and reused post-execute so
|
|
1179
|
+
// an agent cannot swap the rubric mid-tick.
|
|
1180
|
+
const skipFalsifiability = options.skipFalsifiability === true;
|
|
1181
|
+
if (!skipFalsifiability && verifyResult.explicit && context.kind === 'endgame' && verifyCmd) {
|
|
1182
|
+
try {
|
|
1183
|
+
execSync(verifyCmd, { cwd, stdio: 'pipe', timeout: 60000 });
|
|
1184
|
+
writeLesson(cwd, 'verify-not-falsifiable', 'fail',
|
|
1185
|
+
`Verify \`${verifyCmd}\` passed before work started on "${context.task}". Either the rubric is trivial or the task is already done. Tick halted.`);
|
|
1186
|
+
return {
|
|
1187
|
+
outcome: 'halted',
|
|
1188
|
+
reason: 'verify-not-falsifiable',
|
|
1189
|
+
phaseResults: {},
|
|
1190
|
+
elapsedSeconds: 0,
|
|
1191
|
+
verifyRan: true,
|
|
1192
|
+
verifyPass: false,
|
|
1193
|
+
};
|
|
1194
|
+
} catch {
|
|
1195
|
+
// Pre-verify failed — good, the rubric is falsifiable. Proceed.
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
// Phase: plan
|
|
1200
|
+
{
|
|
1201
|
+
const t0 = Date.now();
|
|
1202
|
+
const result = (options.phaseExec || executePhaseDetailed)('plan', context, options);
|
|
1203
|
+
phaseResults.plan = {
|
|
1204
|
+
prompt: result.prompt,
|
|
1205
|
+
output: result.output || '',
|
|
1206
|
+
elapsedSeconds: Math.round((Date.now() - t0) / 1000),
|
|
1207
|
+
};
|
|
1208
|
+
}
|
|
1209
|
+
|
|
1210
|
+
// Phase: plan-review — validator reads the plan fresh and signs off or rejects.
|
|
1211
|
+
// Can be skipped via options.skipPlanReview (tests only). Codex is optional,
|
|
1212
|
+
// opt-in via env var / tags. On REJECT, the tick halts and the rejection is
|
|
1213
|
+
// journaled; lessons.md is NOT touched (only promoted lessons go there).
|
|
1214
|
+
if (!options.skipPlanReview) {
|
|
1215
|
+
const t0 = Date.now();
|
|
1216
|
+
const review = runPlanReview({
|
|
1217
|
+
cwd,
|
|
1218
|
+
context,
|
|
1219
|
+
planOutput: phaseResults.plan.output,
|
|
1220
|
+
options,
|
|
1221
|
+
});
|
|
1222
|
+
const elapsed = Math.round((Date.now() - t0) / 1000);
|
|
1223
|
+
phaseResults['plan-review'] = {
|
|
1224
|
+
output:
|
|
1225
|
+
`${review.verdict}: ${review.reason || ''}` +
|
|
1226
|
+
(review.fix ? `\nFIX: ${review.fix}` : '') +
|
|
1227
|
+
(review.notes ? `\n(${review.notes})` : ''),
|
|
1228
|
+
signers: review.signers,
|
|
1229
|
+
elapsedSeconds: elapsed,
|
|
1230
|
+
};
|
|
1231
|
+
|
|
1232
|
+
if (review.verdict === 'REJECT') {
|
|
1233
|
+
appendPlanRejection(cwd, context, review);
|
|
1234
|
+
return {
|
|
1235
|
+
outcome: 'halted',
|
|
1236
|
+
reason: 'plan-rejected-at-review',
|
|
1237
|
+
phaseResults,
|
|
1238
|
+
elapsedSeconds: Math.round((Date.now() - startedAt) / 1000),
|
|
1239
|
+
verifyRan: false,
|
|
1240
|
+
verifyPass: false,
|
|
1241
|
+
};
|
|
1242
|
+
}
|
|
1243
|
+
}
|
|
1244
|
+
|
|
1245
|
+
// Phase: do
|
|
1246
|
+
{
|
|
772
1247
|
const t0 = Date.now();
|
|
773
|
-
const result = executePhaseDetailed(
|
|
774
|
-
phaseResults
|
|
1248
|
+
const result = (options.phaseExec || executePhaseDetailed)('do', context, options);
|
|
1249
|
+
phaseResults.do = {
|
|
1250
|
+
prompt: result.prompt,
|
|
1251
|
+
output: result.output || '',
|
|
1252
|
+
elapsedSeconds: Math.round((Date.now() - t0) / 1000),
|
|
1253
|
+
};
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1256
|
+
// Phase: review
|
|
1257
|
+
{
|
|
1258
|
+
const t0 = Date.now();
|
|
1259
|
+
const result = (options.phaseExec || executePhaseDetailed)('review', context, options);
|
|
1260
|
+
phaseResults.review = {
|
|
775
1261
|
prompt: result.prompt,
|
|
776
1262
|
output: result.output || '',
|
|
777
1263
|
elapsedSeconds: Math.round((Date.now() - t0) / 1000),
|
|
@@ -1379,17 +1865,295 @@ function scoreEndgameCandidates(cwd, candidates) {
|
|
|
1379
1865
|
}
|
|
1380
1866
|
}
|
|
1381
1867
|
|
|
1868
|
+
/**
|
|
1869
|
+
* Proactive "surprise me" scanner — surfaces things the user didn't ask about.
|
|
1870
|
+
* Returns an array of suggestion objects in the same shape as the reactive
|
|
1871
|
+
* signals in suggestNextTask. Three orthogonal checks, none requiring
|
|
1872
|
+
* cross-session state:
|
|
1873
|
+
* - orphan-todo: `// TODO` or `// FIXME` in source with no matching backlog item
|
|
1874
|
+
* - unverified-detector: typed lesson has a detector but no last_detected stamp
|
|
1875
|
+
* - hotspot: file with >5 git commits in last 24h (churn signal)
|
|
1876
|
+
*
|
|
1877
|
+
* Each suggestion includes a `skipKey` so dry-run / skip doesn't re-fire it.
|
|
1878
|
+
*/
|
|
1879
|
+
function scanAnomalies(cwd) {
|
|
1880
|
+
const results = [];
|
|
1881
|
+
const atrisDir = path.join(cwd, 'atris');
|
|
1882
|
+
|
|
1883
|
+
// --- orphan-todo: code TODOs not tracked in TODO.md backlog ---
|
|
1884
|
+
try {
|
|
1885
|
+
const codeTodos = findCodeTodos(cwd);
|
|
1886
|
+
if (codeTodos.length > 0) {
|
|
1887
|
+
const todoFile = path.join(atrisDir, 'TODO.md');
|
|
1888
|
+
const backlogText = fs.existsSync(todoFile) ? fs.readFileSync(todoFile, 'utf8') : '';
|
|
1889
|
+
const untracked = codeTodos.filter(t => !isTodoTracked(t.text, backlogText));
|
|
1890
|
+
if (untracked.length > 0) {
|
|
1891
|
+
const first = untracked[0];
|
|
1892
|
+
const sample = untracked.slice(0, 3).map(t => `${t.file}:${t.line}`).join(', ');
|
|
1893
|
+
const firstText = first.text.slice(0, 60);
|
|
1894
|
+
results.push({
|
|
1895
|
+
task: `Track the ${untracked.length} orphan TODO${untracked.length > 1 ? 's' : ''} in source — first: "${firstText}"`,
|
|
1896
|
+
why: `Code has ${untracked.length} \`// TODO\`/\`// FIXME\` comment${untracked.length > 1 ? 's' : ''} never written to TODO.md. First: "${firstText}" (${sample}). Either convert to real tasks or delete if obsolete.`,
|
|
1897
|
+
kind: 'orphan-todo',
|
|
1898
|
+
priority: 6,
|
|
1899
|
+
skipKey: 'orphan-todo'
|
|
1900
|
+
});
|
|
1901
|
+
}
|
|
1902
|
+
}
|
|
1903
|
+
} catch { /* best-effort scan */ }
|
|
1904
|
+
|
|
1905
|
+
// --- unverified-detector: lesson has detector but last_detected missing/stale ---
|
|
1906
|
+
try {
|
|
1907
|
+
const meta = loadLessonMetadata(cwd);
|
|
1908
|
+
const unverified = [];
|
|
1909
|
+
for (const [slug, entry] of Object.entries(meta)) {
|
|
1910
|
+
if (slug === '_schema') continue;
|
|
1911
|
+
if (!entry || typeof entry !== 'object') continue;
|
|
1912
|
+
if (!entry.detector) continue;
|
|
1913
|
+
if (!entry.last_detected) unverified.push(slug);
|
|
1914
|
+
}
|
|
1915
|
+
if (unverified.length > 0) {
|
|
1916
|
+
results.push({
|
|
1917
|
+
task: `Run the ${unverified.length} unverified detector${unverified.length > 1 ? 's' : ''} in atris/lessons.json`,
|
|
1918
|
+
why: `These lessons claim they're resolved via a detector but the detector has never been run: ${unverified.slice(0, 3).join(', ')}${unverified.length > 3 ? ', …' : ''}. Until it runs and exits 0, the resolved claim is unverified.`,
|
|
1919
|
+
kind: 'unverified-detector',
|
|
1920
|
+
priority: 5.5,
|
|
1921
|
+
skipKey: 'unverified-detector'
|
|
1922
|
+
});
|
|
1923
|
+
}
|
|
1924
|
+
} catch { /* best-effort */ }
|
|
1925
|
+
|
|
1926
|
+
// --- hotspot: file with high churn in last 24h ---
|
|
1927
|
+
try {
|
|
1928
|
+
const hotspot = findHotspot(cwd);
|
|
1929
|
+
if (hotspot) {
|
|
1930
|
+
results.push({
|
|
1931
|
+
task: `Pause and review ${hotspot.file} — ${hotspot.commits} commits in the last 24h`,
|
|
1932
|
+
why: `That file has churned more than any other file today. Could be genuine progress or a sign the change isn't sticking. Worth reading the diff before continuing.`,
|
|
1933
|
+
kind: 'hotspot',
|
|
1934
|
+
priority: 6.5,
|
|
1935
|
+
skipKey: `hotspot:${hotspot.file}`
|
|
1936
|
+
});
|
|
1937
|
+
}
|
|
1938
|
+
} catch { /* best-effort */ }
|
|
1939
|
+
|
|
1940
|
+
return results;
|
|
1941
|
+
}
|
|
1942
|
+
|
|
1943
|
+
/**
|
|
1944
|
+
* Grep source code for TODO/FIXME comments. Skips test/, node_modules/,
|
|
1945
|
+
* atris/, and .md files. Returns [{file, line, text}].
|
|
1946
|
+
*
|
|
1947
|
+
* Uses a loose grep then filters to real comment prefixes in JS — git grep's
|
|
1948
|
+
* -E flag doesn't support `\s` on macOS, so we keep the pattern simple and
|
|
1949
|
+
* refine post-hoc.
|
|
1950
|
+
*/
|
|
1951
|
+
function findCodeTodos(cwd) {
|
|
1952
|
+
try {
|
|
1953
|
+
const out = execFileSync('git', [
|
|
1954
|
+
'grep', '-n', '-I', '-E', '(TODO|FIXME)',
|
|
1955
|
+
'--', ':!test/', ':!node_modules/', ':!atris/', ':!**/*.md'
|
|
1956
|
+
], { cwd, encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] });
|
|
1957
|
+
const results = [];
|
|
1958
|
+
for (const raw of out.split('\n').filter(Boolean)) {
|
|
1959
|
+
const m = raw.match(/^([^:]+):(\d+):(.*)$/);
|
|
1960
|
+
if (!m) continue;
|
|
1961
|
+
const line = m[3];
|
|
1962
|
+
// A real TODO is a comment marker at the start of the line (allowing
|
|
1963
|
+
// leading indent) followed by TODO/FIXME and at least one word. This
|
|
1964
|
+
// rejects "TODO.md" string literals in templates (init.js:398 style).
|
|
1965
|
+
const commentMatch = line.match(/^\s*(?:\/\/|#|\/\*|\*)\s*(TODO|FIXME):?\s+(\S.*)/);
|
|
1966
|
+
if (!commentMatch) continue;
|
|
1967
|
+
const text = commentMatch[2].replace(/\*\/\s*$/, '').trim();
|
|
1968
|
+
if (!text) continue;
|
|
1969
|
+
results.push({ file: m[1], line: parseInt(m[2], 10), text });
|
|
1970
|
+
if (results.length >= 100) break;
|
|
1971
|
+
}
|
|
1972
|
+
return results;
|
|
1973
|
+
} catch {
|
|
1974
|
+
return [];
|
|
1975
|
+
}
|
|
1976
|
+
}
|
|
1977
|
+
|
|
1978
|
+
/**
|
|
1979
|
+
* Heuristic: is a code TODO text substring already mentioned in the backlog?
|
|
1980
|
+
* We check for significant words (>=4 chars) overlap. At least 2 must match.
|
|
1981
|
+
*/
|
|
1982
|
+
function isTodoTracked(todoText, backlogText) {
|
|
1983
|
+
if (!todoText || !backlogText) return false;
|
|
1984
|
+
const significantWords = todoText
|
|
1985
|
+
.toLowerCase()
|
|
1986
|
+
.split(/\W+/)
|
|
1987
|
+
.filter(w => w.length >= 4 && !['todo', 'fixme', 'this', 'that', 'with', 'from', 'when', 'then'].includes(w));
|
|
1988
|
+
if (significantWords.length === 0) return false;
|
|
1989
|
+
const lowerBacklog = backlogText.toLowerCase();
|
|
1990
|
+
const matches = significantWords.filter(w => lowerBacklog.includes(w)).length;
|
|
1991
|
+
return matches >= Math.min(2, significantWords.length);
|
|
1992
|
+
}
|
|
1993
|
+
|
|
1994
|
+
/**
|
|
1995
|
+
* Find the file with the most commits in the last 24 hours. Returns null if
|
|
1996
|
+
* no file has more than 5 commits (below the "hotspot" threshold).
|
|
1997
|
+
*/
|
|
1998
|
+
function findHotspot(cwd) {
|
|
1999
|
+
try {
|
|
2000
|
+
const out = execFileSync('git', [
|
|
2001
|
+
'log', '--since=24.hours.ago', '--name-only', '--pretty=format:'
|
|
2002
|
+
], { cwd, encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] });
|
|
2003
|
+
const counts = {};
|
|
2004
|
+
for (const f of out.split('\n').map(s => s.trim()).filter(Boolean)) {
|
|
2005
|
+
counts[f] = (counts[f] || 0) + 1;
|
|
2006
|
+
}
|
|
2007
|
+
let best = null;
|
|
2008
|
+
for (const [file, commits] of Object.entries(counts)) {
|
|
2009
|
+
if (commits < 6) continue;
|
|
2010
|
+
if (!best || commits > best.commits) best = { file, commits };
|
|
2011
|
+
}
|
|
2012
|
+
return best;
|
|
2013
|
+
} catch {
|
|
2014
|
+
return null;
|
|
2015
|
+
}
|
|
2016
|
+
}
|
|
2017
|
+
|
|
2018
|
+
/**
|
|
2019
|
+
* Write `status: attempted` back to the typed lesson sidecar for a slug when
|
|
2020
|
+
* a self-heal tick tried and failed. Increments `attempts`, stamps
|
|
2021
|
+
* `last_attempt` (YYYY-MM-DD) and `last_attempt_reason`. Creates the sidecar
|
|
2022
|
+
* (and the slug entry) if missing.
|
|
2023
|
+
*
|
|
2024
|
+
* This closes the survivorship-bias loop the oracle flagged: without this,
|
|
2025
|
+
* the ledger only records fixes that worked, never the ones that didn't.
|
|
2026
|
+
*
|
|
2027
|
+
* @returns {boolean} true on success, false on malformed sidecar or write error
|
|
2028
|
+
*/
|
|
2029
|
+
function markLessonAttempted(cwd, slug, reason) {
|
|
2030
|
+
if (!slug || typeof slug !== 'string') return false;
|
|
2031
|
+
const metaPath = path.join(cwd, 'atris', 'lessons.json');
|
|
2032
|
+
let meta = {};
|
|
2033
|
+
if (fs.existsSync(metaPath)) {
|
|
2034
|
+
try {
|
|
2035
|
+
const parsed = JSON.parse(fs.readFileSync(metaPath, 'utf8'));
|
|
2036
|
+
if (parsed && typeof parsed === 'object') meta = parsed;
|
|
2037
|
+
} catch { return false; }
|
|
2038
|
+
}
|
|
2039
|
+
if (!meta[slug] || typeof meta[slug] !== 'object') meta[slug] = {};
|
|
2040
|
+
meta[slug].status = 'attempted';
|
|
2041
|
+
meta[slug].attempts = (typeof meta[slug].attempts === 'number' ? meta[slug].attempts : 0) + 1;
|
|
2042
|
+
meta[slug].last_attempt = new Date().toISOString().slice(0, 10);
|
|
2043
|
+
if (reason) meta[slug].last_attempt_reason = String(reason);
|
|
2044
|
+
try {
|
|
2045
|
+
const atrisDir = path.join(cwd, 'atris');
|
|
2046
|
+
if (!fs.existsSync(atrisDir)) fs.mkdirSync(atrisDir, { recursive: true });
|
|
2047
|
+
fs.writeFileSync(metaPath, JSON.stringify(meta, null, 2) + '\n');
|
|
2048
|
+
return true;
|
|
2049
|
+
} catch { return false; }
|
|
2050
|
+
}
|
|
2051
|
+
|
|
2052
|
+
/**
|
|
2053
|
+
* Load the typed lesson metadata sidecar (atris/lessons.json).
|
|
2054
|
+
* Keyed by slug. Each entry may carry: scope, applies_to, detector, status.
|
|
2055
|
+
* Missing file or parse errors → empty object (prose-only fallback).
|
|
2056
|
+
*/
|
|
2057
|
+
function loadLessonMetadata(cwd) {
|
|
2058
|
+
const metaPath = path.join(cwd, 'atris', 'lessons.json');
|
|
2059
|
+
if (!fs.existsSync(metaPath)) return {};
|
|
2060
|
+
try {
|
|
2061
|
+
const raw = fs.readFileSync(metaPath, 'utf8');
|
|
2062
|
+
const parsed = JSON.parse(raw);
|
|
2063
|
+
return (parsed && typeof parsed === 'object') ? parsed : {};
|
|
2064
|
+
} catch {
|
|
2065
|
+
return {};
|
|
2066
|
+
}
|
|
2067
|
+
}
|
|
2068
|
+
|
|
2069
|
+
/**
|
|
2070
|
+
* Parse atris/lessons.md into structured lesson objects, joined with the
|
|
2071
|
+
* optional atris/lessons.json sidecar by slug. Returns an array of:
|
|
2072
|
+
* { id, date, verdict, body, line, resolvedTag, meta, legacy }
|
|
2073
|
+
* where `legacy` is true when no sidecar metadata exists for the slug.
|
|
2074
|
+
*/
|
|
2075
|
+
function parseLessons(cwd) {
|
|
2076
|
+
const lessonsPath = path.join(cwd, 'atris', 'lessons.md');
|
|
2077
|
+
if (!fs.existsSync(lessonsPath)) return [];
|
|
2078
|
+
const content = fs.readFileSync(lessonsPath, 'utf8');
|
|
2079
|
+
const metadata = loadLessonMetadata(cwd);
|
|
2080
|
+
|
|
2081
|
+
const out = [];
|
|
2082
|
+
for (const rawLine of content.split('\n')) {
|
|
2083
|
+
const line = rawLine;
|
|
2084
|
+
if (!line.trim().startsWith('- **[')) continue;
|
|
2085
|
+
const m = line.match(/\*\*\[(\d{4}-\d{2}-\d{2})\]\s+([\w-]+)\*\*\s*[—-]\s*(pass|fail)?\s*[—-]?\s*(.*)$/);
|
|
2086
|
+
if (!m) continue;
|
|
2087
|
+
const [, date, id, verdict, rest] = m;
|
|
2088
|
+
const resolvedTag = /\[resolved\]/.test(rest);
|
|
2089
|
+
const body = rest.replace(/^\[resolved\]\s*/, '').trim();
|
|
2090
|
+
const meta = metadata[id] || null;
|
|
2091
|
+
out.push({
|
|
2092
|
+
id,
|
|
2093
|
+
date,
|
|
2094
|
+
verdict: verdict || null,
|
|
2095
|
+
body,
|
|
2096
|
+
line: line.trim(),
|
|
2097
|
+
resolvedTag,
|
|
2098
|
+
meta,
|
|
2099
|
+
legacy: !meta
|
|
2100
|
+
});
|
|
2101
|
+
}
|
|
2102
|
+
return out;
|
|
2103
|
+
}
|
|
2104
|
+
|
|
2105
|
+
/**
|
|
2106
|
+
* Run a lesson's detector command. Returns true if the detector exits 0,
|
|
2107
|
+
* false otherwise (non-zero exit, timeout, spawn error).
|
|
2108
|
+
* execFileSync is intentionally avoided for detectors because they may
|
|
2109
|
+
* legitimately shell out (e.g. `node --test path | grep X`).
|
|
2110
|
+
*/
|
|
2111
|
+
function runLessonDetector(detector, cwd, timeoutMs = 60000) {
|
|
2112
|
+
if (!detector || typeof detector !== 'string') return false;
|
|
2113
|
+
try {
|
|
2114
|
+
execSync(detector, { cwd, stdio: 'pipe', timeout: timeoutMs });
|
|
2115
|
+
return true;
|
|
2116
|
+
} catch {
|
|
2117
|
+
return false;
|
|
2118
|
+
}
|
|
2119
|
+
}
|
|
2120
|
+
|
|
1382
2121
|
/**
|
|
1383
2122
|
* Check whether a lesson's bug pattern is still present in the named files.
|
|
1384
|
-
*
|
|
1385
|
-
*
|
|
1386
|
-
*
|
|
2123
|
+
*
|
|
2124
|
+
* Detector-backed path (preferred): if sidecar metadata has `detector`, run it.
|
|
2125
|
+
* exit 0 → resolved (true). non-zero → not resolved (false).
|
|
2126
|
+
*
|
|
2127
|
+
* Legacy path (fallback): parse the lesson line for file paths + slug keywords
|
|
2128
|
+
* and grep. If no keyword matches any named file → resolved (true).
|
|
1387
2129
|
*
|
|
1388
2130
|
* @param {string} lessonLine - A single line from lessons.md
|
|
1389
2131
|
* @param {string} cwd - Current working directory
|
|
1390
|
-
* @
|
|
2132
|
+
* @param {object} [options] - Optional pre-loaded metadata ({ meta, detectorTimeout })
|
|
2133
|
+
* @returns {boolean} true if the bug pattern is gone (resolved)
|
|
1391
2134
|
*/
|
|
1392
|
-
function isLessonResolved(lessonLine, cwd) {
|
|
2135
|
+
function isLessonResolved(lessonLine, cwd, options = {}) {
|
|
2136
|
+
const slugMatch = lessonLine.match(/\*\*\[\d{4}-\d{2}-\d{2}\]\s+([\w-]+)\*\*/);
|
|
2137
|
+
if (!slugMatch) return false;
|
|
2138
|
+
const slug = slugMatch[1];
|
|
2139
|
+
|
|
2140
|
+
// Detector-backed check (typed lesson sidecar)
|
|
2141
|
+
const meta = options.meta || loadLessonMetadata(cwd)[slug];
|
|
2142
|
+
if (meta && meta.detector) {
|
|
2143
|
+
return runLessonDetector(meta.detector, cwd, options.detectorTimeout);
|
|
2144
|
+
}
|
|
2145
|
+
|
|
2146
|
+
// Legacy fallback: keyword grep against referenced files.
|
|
2147
|
+
return isLessonResolvedLegacy(lessonLine, cwd);
|
|
2148
|
+
}
|
|
2149
|
+
|
|
2150
|
+
/**
|
|
2151
|
+
* The pre-v3.8 resolver — kept as an internal fallback for prose-only lessons
|
|
2152
|
+
* that don't have detector metadata yet. Never auto-promotes a prose lesson to
|
|
2153
|
+
* resolved in the typed system (callers can still use the `resolvedTag` field
|
|
2154
|
+
* from parseLessons for hand-tagged entries).
|
|
2155
|
+
*/
|
|
2156
|
+
function isLessonResolvedLegacy(lessonLine, cwd) {
|
|
1393
2157
|
// Extract slug: bold text after date, e.g. **[2026-04-08] inbox-parser-eats-hr-separator**
|
|
1394
2158
|
const slugMatch = lessonLine.match(/\*\*\[\d{4}-\d{2}-\d{2}\]\s+([\w-]+)\*\*/);
|
|
1395
2159
|
if (!slugMatch) return false;
|
|
@@ -1435,6 +2199,52 @@ function isLessonResolved(lessonLine, cwd) {
|
|
|
1435
2199
|
return true;
|
|
1436
2200
|
}
|
|
1437
2201
|
|
|
2202
|
+
/**
|
|
2203
|
+
* Pick the oldest unresolved `fail` lesson whose bug pattern is still present.
|
|
2204
|
+
* Returns { date, slug, line } for the top candidate, or null if none.
|
|
2205
|
+
*
|
|
2206
|
+
* Self-healing seed: instead of imagining new horizons via LLM, use what the
|
|
2207
|
+
* system already wrote down about itself. A `fail` lesson with `isLessonResolved
|
|
2208
|
+
* === false` means grep confirms the bug pattern is still present — actionable.
|
|
2209
|
+
*/
|
|
2210
|
+
function pickUnresolvedFailLesson(cwd) {
|
|
2211
|
+
const lessons = parseLessons(cwd);
|
|
2212
|
+
if (lessons.length === 0) return null;
|
|
2213
|
+
|
|
2214
|
+
const MAX_ATTEMPTS = 3;
|
|
2215
|
+
const candidates = [];
|
|
2216
|
+
for (const lesson of lessons) {
|
|
2217
|
+
if (lesson.verdict !== 'fail') continue;
|
|
2218
|
+
if (lesson.resolvedTag) continue;
|
|
2219
|
+
// Typed lesson with explicit status wins — respect the sidecar.
|
|
2220
|
+
// `resolved` = done. `observed` = process rule, not a fixable code state.
|
|
2221
|
+
// `attempted` with attempts >= MAX_ATTEMPTS = needs human re-scoping, skip.
|
|
2222
|
+
// Only `open` and `attempted` (under the cap) flow to self-heal execution.
|
|
2223
|
+
if (lesson.meta && lesson.meta.status) {
|
|
2224
|
+
const s = lesson.meta.status;
|
|
2225
|
+
if (s === 'resolved' || s === 'observed') continue;
|
|
2226
|
+
if (s === 'attempted' && (lesson.meta.attempts || 0) >= MAX_ATTEMPTS) continue;
|
|
2227
|
+
}
|
|
2228
|
+
// Detector-backed or legacy grep check.
|
|
2229
|
+
if (isLessonResolved(lesson.line, cwd, { meta: lesson.meta })) continue;
|
|
2230
|
+
|
|
2231
|
+
candidates.push({
|
|
2232
|
+
date: lesson.date,
|
|
2233
|
+
slug: lesson.id,
|
|
2234
|
+
line: lesson.line,
|
|
2235
|
+
typed: !lesson.legacy,
|
|
2236
|
+
detector: lesson.meta ? lesson.meta.detector || null : null,
|
|
2237
|
+
attempts: lesson.meta ? (lesson.meta.attempts || 0) : 0
|
|
2238
|
+
});
|
|
2239
|
+
}
|
|
2240
|
+
|
|
2241
|
+
if (candidates.length === 0) return null;
|
|
2242
|
+
|
|
2243
|
+
// Oldest first — longest-standing fails get priority
|
|
2244
|
+
candidates.sort((a, b) => a.date.localeCompare(b.date));
|
|
2245
|
+
return candidates[0];
|
|
2246
|
+
}
|
|
2247
|
+
|
|
1438
2248
|
/**
|
|
1439
2249
|
* Propose 3 candidate next horizons for the autopilot loop. Combines
|
|
1440
2250
|
* `getIdleTickCount` + `getRecentSignals` into a prompt asking the LLM
|
|
@@ -1725,6 +2535,7 @@ async function autopilotAtris(description, options = {}) {
|
|
|
1725
2535
|
}
|
|
1726
2536
|
// Track as skipped so dry-run shows variety
|
|
1727
2537
|
skipped.add(suggestion.task);
|
|
2538
|
+
if (suggestion.skipKey) skipped.add(suggestion.skipKey);
|
|
1728
2539
|
if (suggestion.kind === 'docs') skipped.add('fix-map-refs');
|
|
1729
2540
|
if (suggestion.kind === 'review') skipped.add('review');
|
|
1730
2541
|
if (suggestion.kind === 'lessons') skipped.add('lessons');
|
|
@@ -1755,7 +2566,7 @@ async function autopilotAtris(description, options = {}) {
|
|
|
1755
2566
|
|
|
1756
2567
|
if (decision === 'skip') {
|
|
1757
2568
|
skipped.add(suggestion.task);
|
|
1758
|
-
if (suggestion.
|
|
2569
|
+
if (suggestion.skipKey) skipped.add(suggestion.skipKey);
|
|
1759
2570
|
if (suggestion.kind === 'docs') skipped.add('fix-map-refs');
|
|
1760
2571
|
if (suggestion.kind === 'review') skipped.add('review');
|
|
1761
2572
|
if (suggestion.kind === 'lessons') skipped.add('lessons');
|
|
@@ -1775,7 +2586,13 @@ async function autopilotAtris(description, options = {}) {
|
|
|
1775
2586
|
|
|
1776
2587
|
// Execute: plan → do → review
|
|
1777
2588
|
lastTaskTitle = suggestion.task;
|
|
1778
|
-
const context = {
|
|
2589
|
+
const context = {
|
|
2590
|
+
task: suggestion.task,
|
|
2591
|
+
kind: suggestion.kind,
|
|
2592
|
+
...(suggestion.lessonLine ? { lessonLine: suggestion.lessonLine } : {}),
|
|
2593
|
+
...(suggestion.lessonSlug ? { lessonSlug: suggestion.lessonSlug } : {}),
|
|
2594
|
+
...(suggestion.lessonDate ? { lessonDate: suggestion.lessonDate } : {})
|
|
2595
|
+
};
|
|
1779
2596
|
const startingEndgame = readEndgameState(cwd);
|
|
1780
2597
|
|
|
1781
2598
|
try {
|
|
@@ -1798,6 +2615,9 @@ async function autopilotAtris(description, options = {}) {
|
|
|
1798
2615
|
tickOutcome = 'halted';
|
|
1799
2616
|
tickOutcomeText = `I halted before running "${lastTaskTitle}": ${execution.reason}.`;
|
|
1800
2617
|
tickNextStep = 'stop until a human looks at the error';
|
|
2618
|
+
if (suggestion.kind === 'self-heal' && suggestion.lessonSlug) {
|
|
2619
|
+
markLessonAttempted(cwd, suggestion.lessonSlug, `halted:${execution.reason}`);
|
|
2620
|
+
}
|
|
1801
2621
|
if (!verbose) {
|
|
1802
2622
|
printPlainBlock([
|
|
1803
2623
|
`I halted: ${execution.reason}.`,
|
|
@@ -1823,6 +2643,9 @@ async function autopilotAtris(description, options = {}) {
|
|
|
1823
2643
|
tickOutcome = 'halted';
|
|
1824
2644
|
tickOutcomeText = `I built "${lastTaskTitle}" but review flagged issues.`;
|
|
1825
2645
|
tickNextStep = 'wait for a human to check the review output';
|
|
2646
|
+
if (suggestion.kind === 'self-heal' && suggestion.lessonSlug) {
|
|
2647
|
+
markLessonAttempted(cwd, suggestion.lessonSlug, 'review-rejected');
|
|
2648
|
+
}
|
|
1826
2649
|
if (verbose) {
|
|
1827
2650
|
console.log(` review flagged issues (${reviewTime}s). stopping for manual check.`);
|
|
1828
2651
|
} else {
|
|
@@ -1842,6 +2665,9 @@ async function autopilotAtris(description, options = {}) {
|
|
|
1842
2665
|
tickOutcomeText = `I planned, built, and reviewed "${lastTaskTitle}" but verify failed.`;
|
|
1843
2666
|
tickNextStep = 'verify failed, halting';
|
|
1844
2667
|
writeLesson(cwd, 'verify-failed', 'fail', `Task "${lastTaskTitle}" passed review but failed verify command.`);
|
|
2668
|
+
if (suggestion.kind === 'self-heal' && suggestion.lessonSlug) {
|
|
2669
|
+
markLessonAttempted(cwd, suggestion.lessonSlug, 'verify-failed');
|
|
2670
|
+
}
|
|
1845
2671
|
if (verbose) {
|
|
1846
2672
|
console.log(` verify failed. stopping for manual check.`);
|
|
1847
2673
|
} else {
|
|
@@ -2132,6 +2958,12 @@ module.exports = {
|
|
|
2132
2958
|
autopilotFromTodo,
|
|
2133
2959
|
buildPrompt,
|
|
2134
2960
|
isLessonResolved,
|
|
2961
|
+
isLessonResolvedLegacy,
|
|
2962
|
+
loadLessonMetadata,
|
|
2963
|
+
markLessonAttempted,
|
|
2964
|
+
parseLessons,
|
|
2965
|
+
pickUnresolvedFailLesson,
|
|
2966
|
+
runLessonDetector,
|
|
2135
2967
|
isStillTrue,
|
|
2136
2968
|
getTaskAgeDays,
|
|
2137
2969
|
getIdleTickCount,
|
|
@@ -2139,6 +2971,11 @@ module.exports = {
|
|
|
2139
2971
|
getTickStatus,
|
|
2140
2972
|
getVerifyCommand,
|
|
2141
2973
|
computeTickReward,
|
|
2974
|
+
detectDefaultVerify,
|
|
2975
|
+
findCodeTodos,
|
|
2976
|
+
findHotspot,
|
|
2977
|
+
isTodoTracked,
|
|
2978
|
+
scanAnomalies,
|
|
2142
2979
|
verifyJudgeIntegrity,
|
|
2143
2980
|
maybeWriteCompletedEndgameScorecard,
|
|
2144
2981
|
renderHumanSuggestion,
|
|
@@ -2146,7 +2983,10 @@ module.exports = {
|
|
|
2146
2983
|
proposeCandidateHorizons,
|
|
2147
2984
|
recordTickCommit,
|
|
2148
2985
|
regressionCheck,
|
|
2986
|
+
runPlanReview,
|
|
2149
2987
|
runTaskOnce,
|
|
2988
|
+
buildPlanReviewPrompt,
|
|
2989
|
+
parseVerdict,
|
|
2150
2990
|
scoreEndgameCandidates,
|
|
2151
2991
|
suggestNextTask,
|
|
2152
2992
|
writeLesson
|