atris 3.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -0
- package/atris/skills/endgame/SKILL.md +19 -1
- package/atris/skills/improve/SKILL.md +65 -62
- package/atris/skills/launch/SKILL.md +62 -0
- package/atris/skills/tidy/SKILL.md +84 -0
- package/bin/atris.js +2 -1
- package/commands/autopilot.js +312 -31
- package/commands/business.js +149 -32
- package/commands/sync.js +9 -5
- package/lib/scorecard.js +287 -0
- package/lib/todo.js +12 -2
- package/package.json +2 -2
package/commands/autopilot.js
CHANGED
|
@@ -13,6 +13,12 @@ const readline = require('readline');
|
|
|
13
13
|
const { getLogPath, ensureLogDirectory, createLogFile } = require('../lib/journal');
|
|
14
14
|
const { parseTodo } = require('../lib/todo');
|
|
15
15
|
const { findStalePages, findStaleTasks, healBrokenMapRefs } = require('./clean');
|
|
16
|
+
const {
|
|
17
|
+
buildScorecardData,
|
|
18
|
+
readScorecards,
|
|
19
|
+
writeScorecard,
|
|
20
|
+
detectEndgameCompletion
|
|
21
|
+
} = require('../lib/scorecard');
|
|
16
22
|
|
|
17
23
|
const pkg = require('../package.json');
|
|
18
24
|
|
|
@@ -204,7 +210,7 @@ async function suggestNextTask(cwd, skipped = new Set()) {
|
|
|
204
210
|
if (suggestions.length === 0) {
|
|
205
211
|
try {
|
|
206
212
|
const candidates = await proposeCandidateHorizons(cwd);
|
|
207
|
-
const top =
|
|
213
|
+
const top = scoreEndgameCandidates(cwd, candidates);
|
|
208
214
|
return {
|
|
209
215
|
task: top.title,
|
|
210
216
|
why: top.rationale,
|
|
@@ -525,10 +531,54 @@ If broken beyond quick fix, reply: failed — [reason].`;
|
|
|
525
531
|
return '';
|
|
526
532
|
}
|
|
527
533
|
|
|
534
|
+
/**
|
|
535
|
+
* Write a lesson to atris/lessons.md
|
|
536
|
+
* Appends a line in format: - **[YYYY-MM-DD] slug** — pass/fail — explanation
|
|
537
|
+
*/
|
|
538
|
+
function writeLesson(cwd, slug, status, explanation) {
|
|
539
|
+
const lessonsPath = path.join(cwd, 'atris', 'lessons.md');
|
|
540
|
+
const today = new Date().toISOString().split('T')[0]; // YYYY-MM-DD
|
|
541
|
+
const lessonLine = `- **[${today}] ${slug}** — ${status} — ${explanation}`;
|
|
542
|
+
|
|
543
|
+
if (!fs.existsSync(lessonsPath)) {
|
|
544
|
+
fs.writeFileSync(lessonsPath, `# lessons.md — What We Learned\n\n> Append-only. One line per lesson.\n\n---\n\n${lessonLine}\n`);
|
|
545
|
+
return;
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
let content = fs.readFileSync(lessonsPath, 'utf8');
|
|
549
|
+
// Append after the --- separator
|
|
550
|
+
if (content.includes('---\n')) {
|
|
551
|
+
content = content.replace(/---\n/, `---\n\n${lessonLine}\n`);
|
|
552
|
+
} else {
|
|
553
|
+
content += `\n${lessonLine}\n`;
|
|
554
|
+
}
|
|
555
|
+
fs.writeFileSync(lessonsPath, content);
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
/**
|
|
559
|
+
* Get the verify command for a task from TODO.md
|
|
560
|
+
* Reads TODO.md, finds the task by title across active/completed sections,
|
|
561
|
+
* and extracts the verify field.
|
|
562
|
+
* Defaults to 'npm test' if no verify field found.
|
|
563
|
+
*/
|
|
564
|
+
function getVerifyCommand(cwd, taskTitle) {
|
|
565
|
+
const todoPath = path.join(cwd, 'atris', 'TODO.md');
|
|
566
|
+
if (!fs.existsSync(todoPath)) return 'npm test';
|
|
567
|
+
|
|
568
|
+
const todo = parseTodo(todoPath);
|
|
569
|
+
const task = [...todo.inProgress, ...todo.backlog, ...todo.completed]
|
|
570
|
+
.find(t => t.title === taskTitle);
|
|
571
|
+
|
|
572
|
+
if (!task) return 'npm test';
|
|
573
|
+
if (task.verify) return task.verify;
|
|
574
|
+
return 'npm test';
|
|
575
|
+
}
|
|
576
|
+
|
|
528
577
|
function runTaskOnce(context, options = {}) {
|
|
529
|
-
const { verbose = false } = options;
|
|
578
|
+
const { verbose = false, cwd = process.cwd() } = options;
|
|
530
579
|
const phaseResults = {};
|
|
531
580
|
const startedAt = Date.now();
|
|
581
|
+
const verifyCmd = getVerifyCommand(cwd, context.task);
|
|
532
582
|
|
|
533
583
|
for (const phase of ['plan', 'do', 'review']) {
|
|
534
584
|
const t0 = Date.now();
|
|
@@ -542,11 +592,41 @@ function runTaskOnce(context, options = {}) {
|
|
|
542
592
|
|
|
543
593
|
const reviewOutput = phaseResults.review.output || '';
|
|
544
594
|
|
|
595
|
+
// After review succeeds, run verify command if present
|
|
596
|
+
let verifyPass = false;
|
|
597
|
+
let verifyRan = false;
|
|
598
|
+
if (!reviewOutput.includes('failed') && verifyCmd) {
|
|
599
|
+
verifyRan = true;
|
|
600
|
+
let t0 = Date.now();
|
|
601
|
+
try {
|
|
602
|
+
execSync(verifyCmd, { cwd, stdio: 'pipe' });
|
|
603
|
+
verifyPass = true;
|
|
604
|
+
const verifyTime = Math.round((Date.now() - t0) / 1000);
|
|
605
|
+
phaseResults.verify = {
|
|
606
|
+
output: `Verify passed (${verifyTime}s)`,
|
|
607
|
+
elapsedSeconds: verifyTime,
|
|
608
|
+
};
|
|
609
|
+
} catch (e) {
|
|
610
|
+
const verifyTime = Math.round((Date.now() - t0) / 1000);
|
|
611
|
+
phaseResults.verify = {
|
|
612
|
+
output: `Verify failed: ${e.message}`,
|
|
613
|
+
elapsedSeconds: verifyTime,
|
|
614
|
+
};
|
|
615
|
+
try {
|
|
616
|
+
const slug = (context.task || 'unknown').replace(/\s+/g, '-').toLowerCase().slice(0, 40);
|
|
617
|
+
writeLesson(cwd, `verify-fail-${slug}`, 'fail', `Verify command \`${verifyCmd}\` failed: ${e.message.split('\n')[0]}`);
|
|
618
|
+
} catch { /* lesson write must not crash the tick */ }
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
|
|
545
622
|
return {
|
|
546
|
-
success: !reviewOutput.includes('failed'),
|
|
623
|
+
success: !reviewOutput.includes('failed') && (!verifyRan || verifyPass),
|
|
547
624
|
elapsedSeconds: Math.round((Date.now() - startedAt) / 1000),
|
|
548
625
|
phaseResults,
|
|
549
626
|
reviewOutput,
|
|
627
|
+
verifyCmd,
|
|
628
|
+
verifyPass,
|
|
629
|
+
verifyRan,
|
|
550
630
|
};
|
|
551
631
|
}
|
|
552
632
|
|
|
@@ -579,6 +659,47 @@ function logCompletion(description) {
|
|
|
579
659
|
fs.writeFileSync(logFile, content);
|
|
580
660
|
}
|
|
581
661
|
|
|
662
|
+
/**
|
|
663
|
+
* Compute per-tick reward score based on execution signals.
|
|
664
|
+
* Rewards:
|
|
665
|
+
* - commit landed: +1
|
|
666
|
+
* - verify passed: +3
|
|
667
|
+
* - npm test passed: +2
|
|
668
|
+
* - validator clean (review passed): +1
|
|
669
|
+
* - halt caught hallucination: -3
|
|
670
|
+
*/
|
|
671
|
+
function computeTickReward(execution, tickOutcome, verifyCmd) {
|
|
672
|
+
let reward = 0;
|
|
673
|
+
|
|
674
|
+
// Validator clean: review passed without 'failed'
|
|
675
|
+
if (!execution.reviewOutput || !execution.reviewOutput.includes('failed')) {
|
|
676
|
+
reward += 1;
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
// Verify passed: +3
|
|
680
|
+
if (execution.verifyRan && execution.verifyPass) {
|
|
681
|
+
reward += 3;
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
// npm test passed: +2
|
|
685
|
+
if (execution.verifyRan && execution.verifyPass && verifyCmd === 'npm test') {
|
|
686
|
+
reward += 2;
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
// Commit landed: check do phase output for git commit patterns
|
|
690
|
+
const doOutput = execution.phaseResults.do.output || '';
|
|
691
|
+
if (doOutput.match(/\[.*\s\d+\sfile.*changed/i) || doOutput.includes('git commit') || doOutput.includes('committed')) {
|
|
692
|
+
reward += 1;
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
// Halt caught hallucination: -3
|
|
696
|
+
if (tickOutcome === 'halted') {
|
|
697
|
+
reward -= 3;
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
return reward;
|
|
701
|
+
}
|
|
702
|
+
|
|
582
703
|
/**
|
|
583
704
|
* Append a plain-language tick summary block to today's journal `## Notes`.
|
|
584
705
|
* Fields:
|
|
@@ -588,9 +709,10 @@ function logCompletion(description) {
|
|
|
588
709
|
* - nextStep: what the next tick will do
|
|
589
710
|
* - idle: when true, block must contain literal "0 tasks in 0s"
|
|
590
711
|
* so getIdleTickCount still works.
|
|
712
|
+
* - reward: optional tick reward score (from computeTickReward)
|
|
591
713
|
* Safe to call inside a try/catch — a write failure must never crash a tick.
|
|
592
714
|
*/
|
|
593
|
-
function appendTickSummary(cwd, { time, outcome, horizon, nextStep, idle } = {}) {
|
|
715
|
+
function appendTickSummary(cwd, { time, outcome, horizon, nextStep, idle, reward } = {}) {
|
|
594
716
|
const now = new Date();
|
|
595
717
|
const yyyy = now.getFullYear();
|
|
596
718
|
const mm = String(now.getMonth() + 1).padStart(2, '0');
|
|
@@ -623,6 +745,10 @@ function appendTickSummary(cwd, { time, outcome, horizon, nextStep, idle } = {})
|
|
|
623
745
|
` ${horizonLine}`,
|
|
624
746
|
` ${nextLine}`,
|
|
625
747
|
];
|
|
748
|
+
// Add reward score if present
|
|
749
|
+
if (reward !== undefined && reward !== null) {
|
|
750
|
+
blockLines.push(` Reward: ${reward}`);
|
|
751
|
+
}
|
|
626
752
|
// Idle marker must be the last non-empty line so getIdleTickCount, which
|
|
627
753
|
// scans bottom-up, counts this block when idle=true.
|
|
628
754
|
if (idleLine) blockLines.push(` ${idleLine}`);
|
|
@@ -644,20 +770,62 @@ function appendTickSummary(cwd, { time, outcome, horizon, nextStep, idle } = {})
|
|
|
644
770
|
}
|
|
645
771
|
|
|
646
772
|
/**
|
|
647
|
-
* Read the current endgame
|
|
773
|
+
* Read the current endgame state from atris/TODO.md.
|
|
648
774
|
*/
|
|
649
|
-
function
|
|
775
|
+
function readEndgameState(cwd) {
|
|
650
776
|
try {
|
|
651
777
|
const todoPath = path.join(cwd, 'atris', 'TODO.md');
|
|
652
|
-
if (!fs.existsSync(todoPath))
|
|
778
|
+
if (!fs.existsSync(todoPath)) {
|
|
779
|
+
return { slug: 'unset', pickedAt: null, horizon: '', remaining: 0, completed: 0 };
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
const todo = parseTodo(todoPath);
|
|
653
783
|
const content = fs.readFileSync(todoPath, 'utf8');
|
|
654
|
-
const
|
|
655
|
-
|
|
784
|
+
const endgameMatch = content.match(/##\s+Endgame\s*\n([\s\S]*?)(?=\n##|$)/);
|
|
785
|
+
const section = endgameMatch ? endgameMatch[1] : '';
|
|
786
|
+
const slugMatch = section.match(/\*\*Slug:\*\*\s*(\S+)/);
|
|
787
|
+
const pickedMatch = section.match(/\*\*Picked:\*\*\s*(.+)/);
|
|
788
|
+
const horizonMatch = section.match(/\*\*Horizon:\*\*\s*(.+)/);
|
|
789
|
+
|
|
790
|
+
return {
|
|
791
|
+
slug: slugMatch ? slugMatch[1].trim() : 'unset',
|
|
792
|
+
pickedAt: pickedMatch ? pickedMatch[1].trim() : null,
|
|
793
|
+
horizon: horizonMatch ? horizonMatch[1].trim() : '',
|
|
794
|
+
remaining: todo.backlog.filter(t => t.tag === 'endgame').length
|
|
795
|
+
+ todo.inProgress.filter(t => t.tag === 'endgame').length,
|
|
796
|
+
completed: todo.completed.filter(t => t.tag === 'endgame').length,
|
|
797
|
+
};
|
|
656
798
|
} catch {
|
|
657
|
-
return 'unset';
|
|
799
|
+
return { slug: 'unset', pickedAt: null, horizon: '', remaining: 0, completed: 0 };
|
|
658
800
|
}
|
|
659
801
|
}
|
|
660
802
|
|
|
803
|
+
function readHorizonSlug(cwd) {
|
|
804
|
+
return readEndgameState(cwd).slug;
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
function maybeWriteCompletedEndgameScorecard(cwd, startingEndgame) {
|
|
808
|
+
if (!startingEndgame || startingEndgame.slug === 'unset' || startingEndgame.remaining === 0) {
|
|
809
|
+
return false;
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
const atrisDir = path.join(cwd, 'atris');
|
|
813
|
+
if (!fs.existsSync(atrisDir)) return false;
|
|
814
|
+
|
|
815
|
+
const { complete, endgameSlug } = detectEndgameCompletion(atrisDir);
|
|
816
|
+
if (!complete || endgameSlug !== startingEndgame.slug) return false;
|
|
817
|
+
|
|
818
|
+
const alreadyWritten = readScorecards(atrisDir).some(sc => sc.slug === endgameSlug);
|
|
819
|
+
if (alreadyWritten) return false;
|
|
820
|
+
|
|
821
|
+
const data = buildScorecardData(atrisDir, {
|
|
822
|
+
slug: endgameSlug,
|
|
823
|
+
pickedAt: startingEndgame.pickedAt,
|
|
824
|
+
});
|
|
825
|
+
writeScorecard(atrisDir, data);
|
|
826
|
+
return true;
|
|
827
|
+
}
|
|
828
|
+
|
|
661
829
|
/**
|
|
662
830
|
* Main loop. Suggest → justify → approve → execute, one at a time.
|
|
663
831
|
*/
|
|
@@ -737,24 +905,11 @@ function getTickStatus(cwd) {
|
|
|
737
905
|
}
|
|
738
906
|
}
|
|
739
907
|
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
const
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
if (fs.existsSync(todoPath)) {
|
|
746
|
-
const todoContent = fs.readFileSync(todoPath, 'utf8');
|
|
747
|
-
const endgameMatch = todoContent.match(/##\s+Endgame\s*\n([\s\S]*?)(?=\n##|$)/);
|
|
748
|
-
if (endgameMatch) {
|
|
749
|
-
const slugMatch = endgameMatch[1].match(/\*\*Slug:\*\*\s*(.+)/);
|
|
750
|
-
const horizonMatch = endgameMatch[1].match(/\*\*Horizon:\*\*\s*(.+)/);
|
|
751
|
-
if (slugMatch) slug = slugMatch[1].trim();
|
|
752
|
-
if (horizonMatch) horizon = horizonMatch[1].trim();
|
|
753
|
-
}
|
|
754
|
-
const todo = parseTodo(todoPath);
|
|
755
|
-
remaining = todo.backlog.filter(t => t.tag === 'endgame').length;
|
|
756
|
-
completedEndgame = todo.completed.filter(t => /^[A-Z]\d+[a-z]?[:\s]/.test((t.title || '').trim())).length;
|
|
757
|
-
}
|
|
908
|
+
const endgame = readEndgameState(cwd);
|
|
909
|
+
const slug = endgame.slug === 'unset' ? '(no endgame active — feed inbox or /endgame)' : endgame.slug;
|
|
910
|
+
const horizon = endgame.horizon;
|
|
911
|
+
const remaining = endgame.remaining;
|
|
912
|
+
const completedEndgame = endgame.completed;
|
|
758
913
|
|
|
759
914
|
const total = remaining + completedEndgame;
|
|
760
915
|
const done = completedEndgame;
|
|
@@ -912,6 +1067,93 @@ function getRecentSignals(cwd) {
|
|
|
912
1067
|
return { recentCommits, wikiHealth, recentLessons };
|
|
913
1068
|
}
|
|
914
1069
|
|
|
1070
|
+
/**
|
|
1071
|
+
* Score endgame candidates by historical reward of similar horizon types.
|
|
1072
|
+
* Reads last 10 scorecards, infers type from slug prefix, calculates mean
|
|
1073
|
+
* reward per type, scores candidates by expected value, applies 80/20 exploit/explore.
|
|
1074
|
+
*
|
|
1075
|
+
* @param {string} cwd - Current working directory
|
|
1076
|
+
* @param {array} candidates - Array of { title, confidence, rationale }
|
|
1077
|
+
* @returns {object} - Single candidate: { title, confidence, rationale, scored: true, reason }
|
|
1078
|
+
*/
|
|
1079
|
+
function scoreEndgameCandidates(cwd, candidates) {
|
|
1080
|
+
const atrisDir = path.join(cwd, 'atris');
|
|
1081
|
+
if (!fs.existsSync(atrisDir)) {
|
|
1082
|
+
// No atris folder yet - can't score, return best by confidence
|
|
1083
|
+
const best = candidates.reduce((a, b) => (a.confidence > b.confidence ? a : b), candidates[0]);
|
|
1084
|
+
return { ...best, scored: false, reason: 'no atris folder' };
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
try {
|
|
1088
|
+
const scorecards = readScorecards(atrisDir).slice(-10); // Last 10
|
|
1089
|
+
if (scorecards.length === 0) {
|
|
1090
|
+
// No scorecards yet - return best by confidence
|
|
1091
|
+
const best = candidates.reduce((a, b) => (a.confidence > b.confidence ? a : b), candidates[0]);
|
|
1092
|
+
return { ...best, scored: false, reason: 'no scorecards' };
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
// Infer type from slug/title by taking prefix before first dash
|
|
1096
|
+
const typeToRewards = {};
|
|
1097
|
+
for (const sc of scorecards) {
|
|
1098
|
+
const type = sc.slug.split('-')[0];
|
|
1099
|
+
if (!typeToRewards[type]) typeToRewards[type] = [];
|
|
1100
|
+
typeToRewards[type].push(sc.totalReward);
|
|
1101
|
+
}
|
|
1102
|
+
|
|
1103
|
+
// Calculate mean reward per type
|
|
1104
|
+
const typeMeans = {};
|
|
1105
|
+
for (const [type, rewards] of Object.entries(typeToRewards)) {
|
|
1106
|
+
const mean = rewards.reduce((a, b) => a + b, 0) / rewards.length;
|
|
1107
|
+
typeMeans[type] = mean;
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
// Score each candidate by expected value based on historical type mean
|
|
1111
|
+
const scored = candidates.map(c => {
|
|
1112
|
+
// Infer type from title keywords that match scorecard slug prefixes
|
|
1113
|
+
const titleLower = (c.title || '').toLowerCase();
|
|
1114
|
+
const cType = Object.keys(typeMeans).find(t => titleLower.includes(t)) || titleLower.split(/[\s\-]+/)[0];
|
|
1115
|
+
const historicalMean = typeMeans[cType] !== undefined ? typeMeans[cType] : 0;
|
|
1116
|
+
const expectedValue = historicalMean * c.confidence;
|
|
1117
|
+
return {
|
|
1118
|
+
...c,
|
|
1119
|
+
expectedValue,
|
|
1120
|
+
type: cType,
|
|
1121
|
+
historicalMean
|
|
1122
|
+
};
|
|
1123
|
+
});
|
|
1124
|
+
|
|
1125
|
+
// Sort by expected value (descending)
|
|
1126
|
+
scored.sort((a, b) => b.expectedValue - a.expectedValue);
|
|
1127
|
+
|
|
1128
|
+
// 80/20 split: 80% exploit (best), 20% explore (random)
|
|
1129
|
+
const choice = Math.random();
|
|
1130
|
+
let selected;
|
|
1131
|
+
if (choice < 0.8) {
|
|
1132
|
+
// Exploit: return highest expected value
|
|
1133
|
+
selected = scored[0];
|
|
1134
|
+
} else {
|
|
1135
|
+
// Explore: return random candidate
|
|
1136
|
+
selected = scored[Math.floor(Math.random() * scored.length)];
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
const reason = choice < 0.8
|
|
1140
|
+
? `exploit: type=${selected.type} mean-reward=${selected.historicalMean.toFixed(1)} expected-value=${selected.expectedValue.toFixed(1)}`
|
|
1141
|
+
: `explore: random-candidate type=${selected.type}`;
|
|
1142
|
+
|
|
1143
|
+
return {
|
|
1144
|
+
title: selected.title,
|
|
1145
|
+
confidence: selected.confidence,
|
|
1146
|
+
rationale: selected.rationale,
|
|
1147
|
+
scored: true,
|
|
1148
|
+
reason
|
|
1149
|
+
};
|
|
1150
|
+
} catch (err) {
|
|
1151
|
+
// If scoring fails, fall back to best by confidence
|
|
1152
|
+
const best = candidates.reduce((a, b) => (a.confidence > b.confidence ? a : b), candidates[0]);
|
|
1153
|
+
return { ...best, scored: false, reason: `scoring error: ${err.message}` };
|
|
1154
|
+
}
|
|
1155
|
+
}
|
|
1156
|
+
|
|
915
1157
|
/**
|
|
916
1158
|
* Propose 3 candidate next horizons for the autopilot loop. Combines
|
|
917
1159
|
* `getIdleTickCount` + `getRecentSignals` into a prompt asking the LLM
|
|
@@ -1100,6 +1342,8 @@ async function autopilotAtris(description, options = {}) {
|
|
|
1100
1342
|
let tickOutcomeText = 'I stopped for a manual check.';
|
|
1101
1343
|
let tickNextStep = 'look for new work';
|
|
1102
1344
|
let lastTaskTitle = null;
|
|
1345
|
+
let lastExecution = null;
|
|
1346
|
+
let lastVerifyCmd = null;
|
|
1103
1347
|
|
|
1104
1348
|
for (let i = 0; i < maxIterations; i++) {
|
|
1105
1349
|
// Check time budget
|
|
@@ -1212,6 +1456,7 @@ async function autopilotAtris(description, options = {}) {
|
|
|
1212
1456
|
// Execute: plan → do → review
|
|
1213
1457
|
lastTaskTitle = suggestion.task;
|
|
1214
1458
|
const context = { task: suggestion.task, kind: suggestion.kind };
|
|
1459
|
+
const startingEndgame = readEndgameState(cwd);
|
|
1215
1460
|
|
|
1216
1461
|
try {
|
|
1217
1462
|
if (verbose) {
|
|
@@ -1224,7 +1469,9 @@ async function autopilotAtris(description, options = {}) {
|
|
|
1224
1469
|
'Next I will report what happened and whether review passed.'
|
|
1225
1470
|
].join('\n'));
|
|
1226
1471
|
}
|
|
1227
|
-
const execution = runTaskOnce(context, { verbose });
|
|
1472
|
+
const execution = runTaskOnce(context, { verbose, cwd });
|
|
1473
|
+
lastExecution = execution;
|
|
1474
|
+
lastVerifyCmd = execution.verifyCmd;
|
|
1228
1475
|
const planTime = execution.phaseResults.plan.elapsedSeconds;
|
|
1229
1476
|
if (verbose) console.log(` planned (${planTime}s)`);
|
|
1230
1477
|
|
|
@@ -1253,11 +1500,32 @@ async function autopilotAtris(description, options = {}) {
|
|
|
1253
1500
|
}
|
|
1254
1501
|
if (verbose) console.log(` reviewed (${reviewTime}s)`);
|
|
1255
1502
|
|
|
1503
|
+
// Handle verify failure
|
|
1504
|
+
if (!execution.verifyPass) {
|
|
1505
|
+
tickOutcome = 'halted';
|
|
1506
|
+
tickOutcomeText = `I planned, built, and reviewed "${lastTaskTitle}" but verify failed.`;
|
|
1507
|
+
tickNextStep = 'verify failed, halting';
|
|
1508
|
+
writeLesson(cwd, 'verify-failed', 'fail', `Task "${lastTaskTitle}" passed review but failed verify command.`);
|
|
1509
|
+
if (verbose) {
|
|
1510
|
+
console.log(` verify failed. stopping for manual check.`);
|
|
1511
|
+
} else {
|
|
1512
|
+
printPlainBlock([
|
|
1513
|
+
`I planned, built, and reviewed the task, but the verify check failed.`,
|
|
1514
|
+
'',
|
|
1515
|
+
'Next I stopped for a manual check.'
|
|
1516
|
+
].join('\n'));
|
|
1517
|
+
}
|
|
1518
|
+
break;
|
|
1519
|
+
}
|
|
1520
|
+
|
|
1256
1521
|
completed++;
|
|
1257
1522
|
tickOutcome = 'built';
|
|
1258
1523
|
tickOutcomeText = `I planned, built, and reviewed "${suggestion.task}".`;
|
|
1259
1524
|
tickNextStep = 'pick the next endgame task';
|
|
1260
1525
|
logCompletion(suggestion.task);
|
|
1526
|
+
if (maybeWriteCompletedEndgameScorecard(cwd, startingEndgame)) {
|
|
1527
|
+
tickNextStep = 'pick the next horizon';
|
|
1528
|
+
}
|
|
1261
1529
|
if (verbose) {
|
|
1262
1530
|
console.log(` done. ${completed} task${completed > 1 ? 's' : ''} completed.`);
|
|
1263
1531
|
console.log('');
|
|
@@ -1301,12 +1569,20 @@ async function autopilotAtris(description, options = {}) {
|
|
|
1301
1569
|
minute: '2-digit'
|
|
1302
1570
|
}).toLowerCase();
|
|
1303
1571
|
const idle = tickOutcome === 'idle' || (completed === 0 && tickOutcome !== 'halted');
|
|
1572
|
+
|
|
1573
|
+
// Compute reward score if we had an execution
|
|
1574
|
+
let tickReward = undefined;
|
|
1575
|
+
if (lastExecution && lastVerifyCmd) {
|
|
1576
|
+
tickReward = computeTickReward(lastExecution, tickOutcome, lastVerifyCmd);
|
|
1577
|
+
}
|
|
1578
|
+
|
|
1304
1579
|
appendTickSummary(cwd, {
|
|
1305
1580
|
time,
|
|
1306
1581
|
outcome: tickOutcomeText,
|
|
1307
1582
|
horizon: horizonSlug === 'unset' ? null : horizonSlug,
|
|
1308
1583
|
nextStep: tickNextStep,
|
|
1309
|
-
idle
|
|
1584
|
+
idle,
|
|
1585
|
+
reward: tickReward
|
|
1310
1586
|
});
|
|
1311
1587
|
} catch {
|
|
1312
1588
|
/* journal write failure must not crash the tick */
|
|
@@ -1341,9 +1617,14 @@ module.exports = {
|
|
|
1341
1617
|
getIdleTickCount,
|
|
1342
1618
|
getRecentSignals,
|
|
1343
1619
|
getTickStatus,
|
|
1620
|
+
getVerifyCommand,
|
|
1621
|
+
computeTickReward,
|
|
1622
|
+
maybeWriteCompletedEndgameScorecard,
|
|
1344
1623
|
renderHumanSuggestion,
|
|
1345
1624
|
renderHumanTickIntro,
|
|
1346
1625
|
proposeCandidateHorizons,
|
|
1347
1626
|
runTaskOnce,
|
|
1348
|
-
|
|
1627
|
+
scoreEndgameCandidates,
|
|
1628
|
+
suggestNextTask,
|
|
1629
|
+
writeLesson
|
|
1349
1630
|
};
|