atris 3.2.0 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/GETTING_STARTED.md +65 -131
- package/README.md +18 -2
- package/atris/GETTING_STARTED.md +65 -131
- package/atris/PERSONA.md +5 -1
- package/atris/atris.md +122 -153
- package/atris/skills/aeo/SKILL.md +117 -0
- package/atris/skills/atris/SKILL.md +49 -25
- package/atris/skills/create-member/SKILL.md +29 -9
- package/atris/skills/endgame/SKILL.md +9 -0
- package/atris/skills/research-search/SKILL.md +167 -0
- package/atris/skills/research-search/arxiv_search.py +157 -0
- package/atris/skills/research-search/program.md +48 -0
- package/atris/skills/research-search/results.tsv +6 -0
- package/atris/skills/research-search/scholar_search.py +154 -0
- package/atris/skills/tidy/SKILL.md +36 -21
- package/atris/team/_template/MEMBER.md +2 -0
- package/atris/team/validator/MEMBER.md +35 -1
- package/atris.md +118 -178
- package/bin/atris.js +30 -5
- package/cli/__pycache__/atris_code.cpython-314.pyc +0 -0
- package/cli/__pycache__/runtime_guard.cpython-312.pyc +0 -0
- package/cli/__pycache__/runtime_guard.cpython-314.pyc +0 -0
- package/cli/atris_code.py +889 -0
- package/cli/runtime_guard.py +693 -0
- package/commands/align.js +15 -0
- package/commands/app.js +316 -0
- package/commands/autopilot.js +390 -7
- package/commands/business.js +677 -2
- package/commands/computer.js +1979 -43
- package/commands/context-sync.js +5 -0
- package/commands/lifecycle.js +12 -0
- package/commands/plugin.js +24 -0
- package/commands/pull.js +40 -1
- package/commands/push.js +44 -0
- package/commands/serve.js +1 -0
- package/commands/sync.js +272 -76
- package/commands/verify.js +50 -1
- package/commands/wiki.js +27 -2
- package/lib/file-ops.js +13 -1
- package/lib/journal.js +23 -0
- package/lib/scorecard.js +42 -4
- package/lib/sync-telemetry.js +59 -0
- package/lib/todo.js +6 -0
- package/lib/wiki.js +150 -6
- package/package.json +2 -1
- package/utils/api.js +19 -0
- package/utils/auth.js +25 -1
- package/utils/config.js +24 -0
- package/utils/update-check.js +16 -0
package/commands/autopilot.js
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
const fs = require('fs');
|
|
10
10
|
const path = require('path');
|
|
11
|
-
const { execSync, execFileSync } = require('child_process');
|
|
11
|
+
const { execSync, execFileSync, spawnSync } = require('child_process');
|
|
12
12
|
const readline = require('readline');
|
|
13
13
|
const { getLogPath, ensureLogDirectory, createLogFile } = require('../lib/journal');
|
|
14
14
|
const { parseTodo } = require('../lib/todo');
|
|
@@ -731,6 +731,303 @@ function verifyJudgeIntegrity() {
|
|
|
731
731
|
return { ok: actual === REWARD_CHECKSUM, expected: REWARD_CHECKSUM, actual };
|
|
732
732
|
}
|
|
733
733
|
|
|
734
|
+
/**
|
|
735
|
+
* Build the validator's plan-review prompt. Fresh context — the validator
|
|
736
|
+
* reads the plan output and the contract fields as if it has never seen them.
|
|
737
|
+
*/
|
|
738
|
+
function buildPlanReviewPrompt(context, planOutput) {
|
|
739
|
+
const files = Array.isArray(context.files) && context.files.length
|
|
740
|
+
? context.files.join(', ')
|
|
741
|
+
: 'none declared in context';
|
|
742
|
+
return `You are the validator in plan-review mode. You have NOT seen the planning context — read everything fresh.
|
|
743
|
+
|
|
744
|
+
Task: "${context.task}"
|
|
745
|
+
Kind: ${context.kind || 'unknown'}
|
|
746
|
+
Files declared in context: ${files}
|
|
747
|
+
|
|
748
|
+
Plan output from the navigator:
|
|
749
|
+
---
|
|
750
|
+
${planOutput || '(no plan output captured)'}
|
|
751
|
+
---
|
|
752
|
+
|
|
753
|
+
Read from disk:
|
|
754
|
+
- atris/atris.md (the workspace protocol — operating rules and task shape)
|
|
755
|
+
- atris/TODO.md (find this task; inspect Files, Exit, Verify, After, Rollback)
|
|
756
|
+
- atris/lessons.md (recent failures — last 20 lines)
|
|
757
|
+
|
|
758
|
+
Decide if the plan is safe to execute. Check:
|
|
759
|
+
1. Verify points at a falsifiable rubric or test (not \`true\`, \`echo ok\`, or similar).
|
|
760
|
+
Prefer \`atris verify <slug> --section <name>\`.
|
|
761
|
+
2. Files are explicitly declared (not empty, not vague).
|
|
762
|
+
3. Rollback is named (commit, checkpoint, or \`git revert\`).
|
|
763
|
+
4. The plan's claims match the declared Task fields.
|
|
764
|
+
5. Nothing in lessons.md contradicts this plan.
|
|
765
|
+
|
|
766
|
+
Output EXACTLY one of these two formats as the LAST thing in your response. No preamble before the verdict line.
|
|
767
|
+
|
|
768
|
+
SIGNOFF: <one sentence on why the plan is safe>
|
|
769
|
+
|
|
770
|
+
or
|
|
771
|
+
|
|
772
|
+
REJECT: <one sentence on what is wrong>
|
|
773
|
+
FIX: <one sentence on what must change>
|
|
774
|
+
PROPOSED:
|
|
775
|
+
Files: <concrete path list, or omit this line if original is fine>
|
|
776
|
+
Exit: <sharp observable done condition, or omit this line if original is fine>
|
|
777
|
+
Verify: <falsifiable shell command, or omit this line if original is fine>
|
|
778
|
+
Rollback: <git revert <sha> or concrete checkpoint, or omit this line if original is fine>
|
|
779
|
+
|
|
780
|
+
Be a drafting partner, not just a critic. When you REJECT, write the PROPOSED block as a concrete draft the human can accept as-is, edit, or reject. Include each PROPOSED line only for fields that need changing; skip a line if the original is correct. Omit the entire PROPOSED block only if the rejection is about scope or intent rather than a draftable field.
|
|
781
|
+
`;
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
/**
|
|
785
|
+
* Parse the validator's verdict line(s) from their output. Returns one of:
|
|
786
|
+
* { verdict: 'SIGNOFF', reason }
|
|
787
|
+
* { verdict: 'REJECT', reason, fix }
|
|
788
|
+
* If neither format is present, treats it as a REJECT with a parse-fail reason.
|
|
789
|
+
*/
|
|
790
|
+
function parseVerdict(output) {
|
|
791
|
+
const text = String(output || '');
|
|
792
|
+
const rawLines = text.split('\n');
|
|
793
|
+
const lines = rawLines.map((l) => l.trim()).filter(Boolean);
|
|
794
|
+
// Scan from the end backwards — the verdict is supposed to be LAST.
|
|
795
|
+
for (let i = lines.length - 1; i >= 0; i--) {
|
|
796
|
+
const line = lines[i];
|
|
797
|
+
if (/^SIGNOFF\s*:/i.test(line)) {
|
|
798
|
+
return { verdict: 'SIGNOFF', reason: line.replace(/^SIGNOFF\s*:\s*/i, ''), fix: '', proposed: null };
|
|
799
|
+
}
|
|
800
|
+
if (/^REJECT\s*:/i.test(line)) {
|
|
801
|
+
const reason = line.replace(/^REJECT\s*:\s*/i, '');
|
|
802
|
+
// Fix line is usually immediately after REJECT.
|
|
803
|
+
const tail = lines.slice(i);
|
|
804
|
+
const fixLine = tail.find((l) => /^FIX\s*:/i.test(l));
|
|
805
|
+
const fix = fixLine ? fixLine.replace(/^FIX\s*:\s*/i, '') : '';
|
|
806
|
+
const proposed = parseProposedBlock(rawLines.slice(rawLines.findIndex((l) => /PROPOSED\s*:/i.test(l))));
|
|
807
|
+
return { verdict: 'REJECT', reason, fix, proposed };
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
return {
|
|
811
|
+
verdict: 'REJECT',
|
|
812
|
+
reason: 'validator output did not contain SIGNOFF or REJECT',
|
|
813
|
+
fix: 'ensure validator emits machine-parseable verdict as the last line',
|
|
814
|
+
proposed: null,
|
|
815
|
+
};
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
/**
|
|
819
|
+
* Parse the PROPOSED block: 4 optional indented fields (Files, Exit, Verify,
|
|
820
|
+
* Rollback). Returns null if no block, or an object with only the fields the
|
|
821
|
+
* validator chose to propose.
|
|
822
|
+
*/
|
|
823
|
+
function parseProposedBlock(lines) {
|
|
824
|
+
if (!lines || !lines.length || !/PROPOSED\s*:/i.test(lines[0] || '')) return null;
|
|
825
|
+
const proposed = {};
|
|
826
|
+
const fieldMatchers = {
|
|
827
|
+
files: /^\s*Files\s*:\s*(.+)$/i,
|
|
828
|
+
exit: /^\s*Exit\s*:\s*(.+)$/i,
|
|
829
|
+
verify: /^\s*Verify\s*:\s*(.+)$/i,
|
|
830
|
+
rollback: /^\s*Rollback\s*:\s*(.+)$/i,
|
|
831
|
+
};
|
|
832
|
+
for (let j = 1; j < lines.length; j++) {
|
|
833
|
+
const raw = lines[j];
|
|
834
|
+
// Stop at a blank line or a new top-level marker (no leading whitespace
|
|
835
|
+
// and a known verb). Keep scanning through indented lines.
|
|
836
|
+
if (/^\S/.test(raw) && !/^(Files|Exit|Verify|Rollback)\s*:/i.test(raw)) break;
|
|
837
|
+
for (const [key, matcher] of Object.entries(fieldMatchers)) {
|
|
838
|
+
const m = raw.match(matcher);
|
|
839
|
+
if (m) proposed[key] = m[1].trim();
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
return Object.keys(proposed).length ? proposed : null;
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
/**
|
|
846
|
+
* Default executor for plan-review: spawn a fresh claude -p call.
|
|
847
|
+
* Kept thin so tests can inject a stub via options.planReviewExec.
|
|
848
|
+
*/
|
|
849
|
+
function defaultPlanReviewExecutor(prompt, { cwd, timeout = 180000 } = {}) {
|
|
850
|
+
const tmpFile = path.join(cwd, '.autopilot-plan-review.tmp');
|
|
851
|
+
fs.writeFileSync(tmpFile, prompt);
|
|
852
|
+
try {
|
|
853
|
+
const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Grep,Glob"`;
|
|
854
|
+
const env = { ...process.env };
|
|
855
|
+
delete env.CLAUDECODE;
|
|
856
|
+
const output = execSync(cmd, {
|
|
857
|
+
cwd,
|
|
858
|
+
encoding: 'utf8',
|
|
859
|
+
timeout,
|
|
860
|
+
stdio: 'pipe',
|
|
861
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
862
|
+
env,
|
|
863
|
+
});
|
|
864
|
+
return output || '';
|
|
865
|
+
} catch (err) {
|
|
866
|
+
if (err.stdout) return err.stdout;
|
|
867
|
+
throw err;
|
|
868
|
+
} finally {
|
|
869
|
+
try { fs.unlinkSync(tmpFile); } catch {}
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
/**
|
|
874
|
+
* Default executor for codex: spawn `codex` with the prompt via stdin.
|
|
875
|
+
* Users can override with ATRIS_CODEX_CMD env var; tests inject via options.codexExec.
|
|
876
|
+
*/
|
|
877
|
+
function defaultCodexExecutor(prompt, { cwd, timeout = 180000 } = {}) {
|
|
878
|
+
const cmd = process.env.ATRIS_CODEX_CMD || 'codex';
|
|
879
|
+
const proc = spawnSync(cmd, ['-p', prompt], {
|
|
880
|
+
cwd,
|
|
881
|
+
encoding: 'utf8',
|
|
882
|
+
timeout,
|
|
883
|
+
stdio: 'pipe',
|
|
884
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
885
|
+
});
|
|
886
|
+
if (proc.status !== 0 && !proc.stdout) {
|
|
887
|
+
throw new Error(`codex exited with status ${proc.status}: ${proc.stderr || 'no output'}`);
|
|
888
|
+
}
|
|
889
|
+
return proc.stdout || '';
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
/**
|
|
893
|
+
* Check if codex is available on PATH (or ATRIS_CODEX_CMD points to something runnable).
|
|
894
|
+
* Kept simple: `which` probe. Tests override via options.hasCodex.
|
|
895
|
+
*/
|
|
896
|
+
function hasCodex() {
|
|
897
|
+
const cmd = process.env.ATRIS_CODEX_CMD || 'codex';
|
|
898
|
+
try {
|
|
899
|
+
const r = spawnSync('which', [cmd], { stdio: 'pipe' });
|
|
900
|
+
return r.status === 0;
|
|
901
|
+
} catch {
|
|
902
|
+
return false;
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
/**
|
|
907
|
+
* Run plan-review: the validator (and optionally codex) read the plan and
|
|
908
|
+
* decide if it is safe to execute. Returns { verdict, reason, fix, signers, notes }.
|
|
909
|
+
*
|
|
910
|
+
* Codex is invoked only when the task explicitly opts in:
|
|
911
|
+
* - env ATRIS_USE_CODEX=1, or
|
|
912
|
+
* - context.tags includes 'codex', or
|
|
913
|
+
* - context.kind === 'endgame' AND context.tags includes 'gray' or 'high-risk'
|
|
914
|
+
*
|
|
915
|
+
* If codex is opted-in but not installed, we skip gracefully and surface a note.
|
|
916
|
+
* If both signers run and disagree, verdict is REJECT with both opinions in reason.
|
|
917
|
+
*/
|
|
918
|
+
function runPlanReview({ cwd, context, planOutput, options = {} }) {
|
|
919
|
+
const prompt = buildPlanReviewPrompt(context, planOutput);
|
|
920
|
+
const tags = Array.isArray(context.tags) ? context.tags : [];
|
|
921
|
+
|
|
922
|
+
// Primary signer: validator.
|
|
923
|
+
const validatorExec = options.planReviewExec || defaultPlanReviewExecutor;
|
|
924
|
+
const validatorOutput = validatorExec(prompt, { cwd, role: 'validator' });
|
|
925
|
+
const primary = parseVerdict(validatorOutput);
|
|
926
|
+
|
|
927
|
+
// Codex: opted in explicitly, not inferred.
|
|
928
|
+
const codexOptIn =
|
|
929
|
+
process.env.ATRIS_USE_CODEX === '1' ||
|
|
930
|
+
tags.includes('codex') ||
|
|
931
|
+
tags.includes('gray') ||
|
|
932
|
+
tags.includes('high-risk');
|
|
933
|
+
|
|
934
|
+
if (!codexOptIn) {
|
|
935
|
+
return { ...primary, signers: ['validator'], proposed: primary.proposed || null };
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
const codexCheck = options.hasCodex != null ? options.hasCodex : hasCodex();
|
|
939
|
+
if (!codexCheck) {
|
|
940
|
+
return {
|
|
941
|
+
...primary,
|
|
942
|
+
signers: ['validator'],
|
|
943
|
+
proposed: primary.proposed || null,
|
|
944
|
+
notes: 'codex was requested but not on PATH; skipped gracefully',
|
|
945
|
+
};
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
const codexExec = options.codexExec || defaultCodexExecutor;
|
|
949
|
+
let codexOutput;
|
|
950
|
+
try {
|
|
951
|
+
codexOutput = codexExec(prompt, { cwd, role: 'codex' });
|
|
952
|
+
} catch (err) {
|
|
953
|
+
return {
|
|
954
|
+
...primary,
|
|
955
|
+
signers: ['validator'],
|
|
956
|
+
notes: `codex invocation failed: ${err.message}; falling back to single signer`,
|
|
957
|
+
};
|
|
958
|
+
}
|
|
959
|
+
const codex = parseVerdict(codexOutput);
|
|
960
|
+
|
|
961
|
+
if (primary.verdict === 'SIGNOFF' && codex.verdict === 'SIGNOFF') {
|
|
962
|
+
return {
|
|
963
|
+
verdict: 'SIGNOFF',
|
|
964
|
+
reason: primary.reason,
|
|
965
|
+
fix: '',
|
|
966
|
+
proposed: null,
|
|
967
|
+
signers: ['validator', 'codex'],
|
|
968
|
+
};
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
// Any disagreement or joint reject → halt with both opinions surfaced.
|
|
972
|
+
// If either signer wrote a PROPOSED draft, surface the validator's first
|
|
973
|
+
// (or codex's if validator didn't propose one).
|
|
974
|
+
return {
|
|
975
|
+
verdict: 'REJECT',
|
|
976
|
+
reason: `Split verdict. validator=${primary.verdict} (${primary.reason || 'no reason'}); codex=${codex.verdict} (${codex.reason || 'no reason'}).`,
|
|
977
|
+
fix: primary.fix || codex.fix || 'reconcile the two signers before re-planning',
|
|
978
|
+
proposed: primary.proposed || codex.proposed || null,
|
|
979
|
+
signers: ['validator', 'codex'],
|
|
980
|
+
split: true,
|
|
981
|
+
};
|
|
982
|
+
}
|
|
983
|
+
|
|
984
|
+
/**
|
|
985
|
+
* Append a plan-review rejection to today's journal under ## Notes.
|
|
986
|
+
* Intentionally does NOT write to lessons.md — rejections only become lessons
|
|
987
|
+
* if a human spots a reusable failure pattern.
|
|
988
|
+
*/
|
|
989
|
+
function appendPlanRejection(cwd, context, review) {
|
|
990
|
+
try {
|
|
991
|
+
// Compute the journal path from the passed cwd so tests and isolated
|
|
992
|
+
// workspaces both work. getLogPath() resolves against process.cwd()
|
|
993
|
+
// which isn't always the task's workspace.
|
|
994
|
+
const date = new Date();
|
|
995
|
+
const year = date.getFullYear();
|
|
996
|
+
const month = String(date.getMonth() + 1).padStart(2, '0');
|
|
997
|
+
const day = String(date.getDate()).padStart(2, '0');
|
|
998
|
+
const logFile = path.join(cwd, 'atris', 'logs', String(year), `${year}-${month}-${day}.md`);
|
|
999
|
+
if (!fs.existsSync(logFile)) return;
|
|
1000
|
+
const now = new Date().toISOString().slice(0, 16).replace('T', ' ');
|
|
1001
|
+
const signers = (review.signers || []).join(' + ');
|
|
1002
|
+
const proposedBlock = review.proposed
|
|
1003
|
+
? `**Proposed draft:**\n` +
|
|
1004
|
+
(review.proposed.files ? `- Files: ${review.proposed.files}\n` : '') +
|
|
1005
|
+
(review.proposed.exit ? `- Exit: ${review.proposed.exit}\n` : '') +
|
|
1006
|
+
(review.proposed.verify ? `- Verify: ${review.proposed.verify}\n` : '') +
|
|
1007
|
+
(review.proposed.rollback ? `- Rollback: ${review.proposed.rollback}\n` : '')
|
|
1008
|
+
: '';
|
|
1009
|
+
const block =
|
|
1010
|
+
`\n### Plan rejected — ${now}\n\n` +
|
|
1011
|
+
`**Task:** ${context.task}\n` +
|
|
1012
|
+
`**Signers:** ${signers}\n` +
|
|
1013
|
+
`**Reason:** ${review.reason}\n` +
|
|
1014
|
+
(review.fix ? `**Fix:** ${review.fix}\n` : '') +
|
|
1015
|
+
(proposedBlock ? `${proposedBlock}` : '') +
|
|
1016
|
+
(review.notes ? `**Notes:** ${review.notes}\n` : '');
|
|
1017
|
+
let content = fs.readFileSync(logFile, 'utf8');
|
|
1018
|
+
const notesIdx = content.indexOf('## Notes');
|
|
1019
|
+
if (notesIdx === -1) {
|
|
1020
|
+
content = content.replace(/\s*$/, '') + `\n\n## Notes\n${block}\n`;
|
|
1021
|
+
} else {
|
|
1022
|
+
const eol = content.indexOf('\n', notesIdx);
|
|
1023
|
+
content = content.slice(0, eol + 1) + block + content.slice(eol + 1);
|
|
1024
|
+
}
|
|
1025
|
+
fs.writeFileSync(logFile, content);
|
|
1026
|
+
} catch {
|
|
1027
|
+
// journaling must never crash the tick
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
|
|
734
1031
|
function runTaskOnce(context, options = {}) {
|
|
735
1032
|
const { verbose = false, cwd = process.cwd() } = options;
|
|
736
1033
|
|
|
@@ -754,10 +1051,11 @@ function runTaskOnce(context, options = {}) {
|
|
|
754
1051
|
const verifyResult = getVerifyCommand(cwd, context.task);
|
|
755
1052
|
const verifyCmd = verifyResult.cmd;
|
|
756
1053
|
|
|
757
|
-
// Guard:
|
|
758
|
-
|
|
1054
|
+
// Guard: endgame tasks must have an explicit Verify field.
|
|
1055
|
+
// Reactive signals (inbox, staleness, imagined) use npm test as default.
|
|
1056
|
+
if (!verifyResult.explicit && context.kind === 'endgame') {
|
|
759
1057
|
writeLesson(cwd, 'no-verify-field', 'fail',
|
|
760
|
-
`Task "${context.task}" has no explicit **Verify:** field in TODO.md. Tick halted — every task must declare how to verify it.`);
|
|
1058
|
+
`Task "${context.task}" has no explicit **Verify:** field in TODO.md. Tick halted — every endgame task must declare how to verify it.`);
|
|
761
1059
|
return {
|
|
762
1060
|
outcome: 'halted',
|
|
763
1061
|
reason: 'no-verify-field',
|
|
@@ -768,10 +1066,92 @@ function runTaskOnce(context, options = {}) {
|
|
|
768
1066
|
};
|
|
769
1067
|
}
|
|
770
1068
|
|
|
771
|
-
|
|
1069
|
+
// Falsifiability gate (endgame + explicit Verify only).
|
|
1070
|
+
// Run Verify BEFORE the work. If it passes, the rubric is trivial or the
|
|
1071
|
+
// task is already done — either way, halt. This is the keystone that makes
|
|
1072
|
+
// Verify load-bearing. The cmd is captured here and reused post-execute so
|
|
1073
|
+
// an agent cannot swap the rubric mid-tick.
|
|
1074
|
+
const skipFalsifiability = options.skipFalsifiability === true;
|
|
1075
|
+
if (!skipFalsifiability && verifyResult.explicit && context.kind === 'endgame' && verifyCmd) {
|
|
1076
|
+
try {
|
|
1077
|
+
execSync(verifyCmd, { cwd, stdio: 'pipe', timeout: 60000 });
|
|
1078
|
+
writeLesson(cwd, 'verify-not-falsifiable', 'fail',
|
|
1079
|
+
`Verify \`${verifyCmd}\` passed before work started on "${context.task}". Either the rubric is trivial or the task is already done. Tick halted.`);
|
|
1080
|
+
return {
|
|
1081
|
+
outcome: 'halted',
|
|
1082
|
+
reason: 'verify-not-falsifiable',
|
|
1083
|
+
phaseResults: {},
|
|
1084
|
+
elapsedSeconds: 0,
|
|
1085
|
+
verifyRan: true,
|
|
1086
|
+
verifyPass: false,
|
|
1087
|
+
};
|
|
1088
|
+
} catch {
|
|
1089
|
+
// Pre-verify failed — good, the rubric is falsifiable. Proceed.
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
// Phase: plan
|
|
1094
|
+
{
|
|
1095
|
+
const t0 = Date.now();
|
|
1096
|
+
const result = (options.phaseExec || executePhaseDetailed)('plan', context, options);
|
|
1097
|
+
phaseResults.plan = {
|
|
1098
|
+
prompt: result.prompt,
|
|
1099
|
+
output: result.output || '',
|
|
1100
|
+
elapsedSeconds: Math.round((Date.now() - t0) / 1000),
|
|
1101
|
+
};
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
// Phase: plan-review — validator reads the plan fresh and signs off or rejects.
|
|
1105
|
+
// Can be skipped via options.skipPlanReview (tests only). Codex is optional,
|
|
1106
|
+
// opt-in via env var / tags. On REJECT, the tick halts and the rejection is
|
|
1107
|
+
// journaled; lessons.md is NOT touched (only promoted lessons go there).
|
|
1108
|
+
if (!options.skipPlanReview) {
|
|
1109
|
+
const t0 = Date.now();
|
|
1110
|
+
const review = runPlanReview({
|
|
1111
|
+
cwd,
|
|
1112
|
+
context,
|
|
1113
|
+
planOutput: phaseResults.plan.output,
|
|
1114
|
+
options,
|
|
1115
|
+
});
|
|
1116
|
+
const elapsed = Math.round((Date.now() - t0) / 1000);
|
|
1117
|
+
phaseResults['plan-review'] = {
|
|
1118
|
+
output:
|
|
1119
|
+
`${review.verdict}: ${review.reason || ''}` +
|
|
1120
|
+
(review.fix ? `\nFIX: ${review.fix}` : '') +
|
|
1121
|
+
(review.notes ? `\n(${review.notes})` : ''),
|
|
1122
|
+
signers: review.signers,
|
|
1123
|
+
elapsedSeconds: elapsed,
|
|
1124
|
+
};
|
|
1125
|
+
|
|
1126
|
+
if (review.verdict === 'REJECT') {
|
|
1127
|
+
appendPlanRejection(cwd, context, review);
|
|
1128
|
+
return {
|
|
1129
|
+
outcome: 'halted',
|
|
1130
|
+
reason: 'plan-rejected-at-review',
|
|
1131
|
+
phaseResults,
|
|
1132
|
+
elapsedSeconds: Math.round((Date.now() - startedAt) / 1000),
|
|
1133
|
+
verifyRan: false,
|
|
1134
|
+
verifyPass: false,
|
|
1135
|
+
};
|
|
1136
|
+
}
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
// Phase: do
|
|
1140
|
+
{
|
|
1141
|
+
const t0 = Date.now();
|
|
1142
|
+
const result = (options.phaseExec || executePhaseDetailed)('do', context, options);
|
|
1143
|
+
phaseResults.do = {
|
|
1144
|
+
prompt: result.prompt,
|
|
1145
|
+
output: result.output || '',
|
|
1146
|
+
elapsedSeconds: Math.round((Date.now() - t0) / 1000),
|
|
1147
|
+
};
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
// Phase: review
|
|
1151
|
+
{
|
|
772
1152
|
const t0 = Date.now();
|
|
773
|
-
const result = executePhaseDetailed(
|
|
774
|
-
phaseResults
|
|
1153
|
+
const result = (options.phaseExec || executePhaseDetailed)('review', context, options);
|
|
1154
|
+
phaseResults.review = {
|
|
775
1155
|
prompt: result.prompt,
|
|
776
1156
|
output: result.output || '',
|
|
777
1157
|
elapsedSeconds: Math.round((Date.now() - t0) / 1000),
|
|
@@ -2146,7 +2526,10 @@ module.exports = {
|
|
|
2146
2526
|
proposeCandidateHorizons,
|
|
2147
2527
|
recordTickCommit,
|
|
2148
2528
|
regressionCheck,
|
|
2529
|
+
runPlanReview,
|
|
2149
2530
|
runTaskOnce,
|
|
2531
|
+
buildPlanReviewPrompt,
|
|
2532
|
+
parseVerdict,
|
|
2150
2533
|
scoreEndgameCandidates,
|
|
2151
2534
|
suggestNextTask,
|
|
2152
2535
|
writeLesson
|