atris 3.2.0 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/GETTING_STARTED.md +65 -131
  2. package/README.md +18 -2
  3. package/atris/GETTING_STARTED.md +65 -131
  4. package/atris/PERSONA.md +5 -1
  5. package/atris/atris.md +122 -153
  6. package/atris/skills/aeo/SKILL.md +117 -0
  7. package/atris/skills/atris/SKILL.md +49 -25
  8. package/atris/skills/create-member/SKILL.md +29 -9
  9. package/atris/skills/endgame/SKILL.md +9 -0
  10. package/atris/skills/research-search/SKILL.md +167 -0
  11. package/atris/skills/research-search/arxiv_search.py +157 -0
  12. package/atris/skills/research-search/program.md +48 -0
  13. package/atris/skills/research-search/results.tsv +6 -0
  14. package/atris/skills/research-search/scholar_search.py +154 -0
  15. package/atris/skills/tidy/SKILL.md +36 -21
  16. package/atris/team/_template/MEMBER.md +2 -0
  17. package/atris/team/validator/MEMBER.md +35 -1
  18. package/atris.md +118 -178
  19. package/bin/atris.js +30 -5
  20. package/cli/__pycache__/atris_code.cpython-314.pyc +0 -0
  21. package/cli/__pycache__/runtime_guard.cpython-312.pyc +0 -0
  22. package/cli/__pycache__/runtime_guard.cpython-314.pyc +0 -0
  23. package/cli/atris_code.py +889 -0
  24. package/cli/runtime_guard.py +693 -0
  25. package/commands/align.js +15 -0
  26. package/commands/app.js +316 -0
  27. package/commands/autopilot.js +390 -7
  28. package/commands/business.js +677 -2
  29. package/commands/computer.js +1979 -43
  30. package/commands/context-sync.js +5 -0
  31. package/commands/lifecycle.js +12 -0
  32. package/commands/plugin.js +24 -0
  33. package/commands/pull.js +40 -1
  34. package/commands/push.js +44 -0
  35. package/commands/serve.js +1 -0
  36. package/commands/sync.js +272 -76
  37. package/commands/verify.js +50 -1
  38. package/commands/wiki.js +27 -2
  39. package/lib/file-ops.js +13 -1
  40. package/lib/journal.js +23 -0
  41. package/lib/scorecard.js +42 -4
  42. package/lib/sync-telemetry.js +59 -0
  43. package/lib/todo.js +6 -0
  44. package/lib/wiki.js +150 -6
  45. package/package.json +2 -1
  46. package/utils/api.js +19 -0
  47. package/utils/auth.js +25 -1
  48. package/utils/config.js +24 -0
  49. package/utils/update-check.js +16 -0
@@ -8,7 +8,7 @@
8
8
 
9
9
  const fs = require('fs');
10
10
  const path = require('path');
11
- const { execSync, execFileSync } = require('child_process');
11
+ const { execSync, execFileSync, spawnSync } = require('child_process');
12
12
  const readline = require('readline');
13
13
  const { getLogPath, ensureLogDirectory, createLogFile } = require('../lib/journal');
14
14
  const { parseTodo } = require('../lib/todo');
@@ -731,6 +731,303 @@ function verifyJudgeIntegrity() {
731
731
  return { ok: actual === REWARD_CHECKSUM, expected: REWARD_CHECKSUM, actual };
732
732
  }
733
733
 
734
+ /**
735
+ * Build the validator's plan-review prompt. Fresh context — the validator
736
+ * reads the plan output and the contract fields as if it has never seen them.
737
+ */
738
+ function buildPlanReviewPrompt(context, planOutput) {
739
+ const files = Array.isArray(context.files) && context.files.length
740
+ ? context.files.join(', ')
741
+ : 'none declared in context';
742
+ return `You are the validator in plan-review mode. You have NOT seen the planning context — read everything fresh.
743
+
744
+ Task: "${context.task}"
745
+ Kind: ${context.kind || 'unknown'}
746
+ Files declared in context: ${files}
747
+
748
+ Plan output from the navigator:
749
+ ---
750
+ ${planOutput || '(no plan output captured)'}
751
+ ---
752
+
753
+ Read from disk:
754
+ - atris/atris.md (the workspace protocol — operating rules and task shape)
755
+ - atris/TODO.md (find this task; inspect Files, Exit, Verify, After, Rollback)
756
+ - atris/lessons.md (recent failures — last 20 lines)
757
+
758
+ Decide if the plan is safe to execute. Check:
759
+ 1. Verify points at a falsifiable rubric or test (not \`true\`, \`echo ok\`, or similar).
760
+ Prefer \`atris verify <slug> --section <name>\`.
761
+ 2. Files are explicitly declared (not empty, not vague).
762
+ 3. Rollback is named (commit, checkpoint, or \`git revert\`).
763
+ 4. The plan's claims match the declared Task fields.
764
+ 5. Nothing in lessons.md contradicts this plan.
765
+
766
+ Output EXACTLY one of these two formats as the LAST thing in your response. No preamble before the verdict line.
767
+
768
+ SIGNOFF: <one sentence on why the plan is safe>
769
+
770
+ or
771
+
772
+ REJECT: <one sentence on what is wrong>
773
+ FIX: <one sentence on what must change>
774
+ PROPOSED:
775
+ Files: <concrete path list, or omit this line if original is fine>
776
+ Exit: <sharp observable done condition, or omit this line if original is fine>
777
+ Verify: <falsifiable shell command, or omit this line if original is fine>
778
+ Rollback: <git revert <sha> or concrete checkpoint, or omit this line if original is fine>
779
+
780
+ Be a drafting partner, not just a critic. When you REJECT, write the PROPOSED block as a concrete draft the human can accept as-is, edit, or reject. Include each PROPOSED line only for fields that need changing; skip a line if the original is correct. Omit the entire PROPOSED block only if the rejection is about scope or intent rather than a draftable field.
781
+ `;
782
+ }
783
+
784
+ /**
785
+ * Parse the validator's verdict line(s) from their output. Returns one of:
786
+ * { verdict: 'SIGNOFF', reason }
787
+ * { verdict: 'REJECT', reason, fix }
788
+ * If neither format is present, treats it as a REJECT with a parse-fail reason.
789
+ */
790
+ function parseVerdict(output) {
791
+ const text = String(output || '');
792
+ const rawLines = text.split('\n');
793
+ const lines = rawLines.map((l) => l.trim()).filter(Boolean);
794
+ // Scan from the end backwards — the verdict is supposed to be LAST.
795
+ for (let i = lines.length - 1; i >= 0; i--) {
796
+ const line = lines[i];
797
+ if (/^SIGNOFF\s*:/i.test(line)) {
798
+ return { verdict: 'SIGNOFF', reason: line.replace(/^SIGNOFF\s*:\s*/i, ''), fix: '', proposed: null };
799
+ }
800
+ if (/^REJECT\s*:/i.test(line)) {
801
+ const reason = line.replace(/^REJECT\s*:\s*/i, '');
802
+ // Fix line is usually immediately after REJECT.
803
+ const tail = lines.slice(i);
804
+ const fixLine = tail.find((l) => /^FIX\s*:/i.test(l));
805
+ const fix = fixLine ? fixLine.replace(/^FIX\s*:\s*/i, '') : '';
806
+ const proposed = parseProposedBlock(rawLines.slice(rawLines.findIndex((l) => /PROPOSED\s*:/i.test(l))));
807
+ return { verdict: 'REJECT', reason, fix, proposed };
808
+ }
809
+ }
810
+ return {
811
+ verdict: 'REJECT',
812
+ reason: 'validator output did not contain SIGNOFF or REJECT',
813
+ fix: 'ensure validator emits machine-parseable verdict as the last line',
814
+ proposed: null,
815
+ };
816
+ }
817
+
818
+ /**
819
+ * Parse the PROPOSED block: 4 optional indented fields (Files, Exit, Verify,
820
+ * Rollback). Returns null if no block, or an object with only the fields the
821
+ * validator chose to propose.
822
+ */
823
+ function parseProposedBlock(lines) {
824
+ if (!lines || !lines.length || !/PROPOSED\s*:/i.test(lines[0] || '')) return null;
825
+ const proposed = {};
826
+ const fieldMatchers = {
827
+ files: /^\s*Files\s*:\s*(.+)$/i,
828
+ exit: /^\s*Exit\s*:\s*(.+)$/i,
829
+ verify: /^\s*Verify\s*:\s*(.+)$/i,
830
+ rollback: /^\s*Rollback\s*:\s*(.+)$/i,
831
+ };
832
+ for (let j = 1; j < lines.length; j++) {
833
+ const raw = lines[j];
834
+ // Stop at a blank line or a new top-level marker (no leading whitespace
835
+ // and a known verb). Keep scanning through indented lines.
836
+ if (/^\S/.test(raw) && !/^(Files|Exit|Verify|Rollback)\s*:/i.test(raw)) break;
837
+ for (const [key, matcher] of Object.entries(fieldMatchers)) {
838
+ const m = raw.match(matcher);
839
+ if (m) proposed[key] = m[1].trim();
840
+ }
841
+ }
842
+ return Object.keys(proposed).length ? proposed : null;
843
+ }
844
+
845
+ /**
846
+ * Default executor for plan-review: spawn a fresh claude -p call.
847
+ * Kept thin so tests can inject a stub via options.planReviewExec.
848
+ */
849
+ function defaultPlanReviewExecutor(prompt, { cwd, timeout = 180000 } = {}) {
850
+ const tmpFile = path.join(cwd, '.autopilot-plan-review.tmp');
851
+ fs.writeFileSync(tmpFile, prompt);
852
+ try {
853
+ const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Grep,Glob"`;
854
+ const env = { ...process.env };
855
+ delete env.CLAUDECODE;
856
+ const output = execSync(cmd, {
857
+ cwd,
858
+ encoding: 'utf8',
859
+ timeout,
860
+ stdio: 'pipe',
861
+ maxBuffer: 10 * 1024 * 1024,
862
+ env,
863
+ });
864
+ return output || '';
865
+ } catch (err) {
866
+ if (err.stdout) return err.stdout;
867
+ throw err;
868
+ } finally {
869
+ try { fs.unlinkSync(tmpFile); } catch {}
870
+ }
871
+ }
872
+
873
+ /**
874
+ * Default executor for codex: spawn `codex` with the prompt via stdin.
875
+ * Users can override with ATRIS_CODEX_CMD env var; tests inject via options.codexExec.
876
+ */
877
+ function defaultCodexExecutor(prompt, { cwd, timeout = 180000 } = {}) {
878
+ const cmd = process.env.ATRIS_CODEX_CMD || 'codex';
879
+ const proc = spawnSync(cmd, ['-p', prompt], {
880
+ cwd,
881
+ encoding: 'utf8',
882
+ timeout,
883
+ stdio: 'pipe',
884
+ maxBuffer: 10 * 1024 * 1024,
885
+ });
886
+ if (proc.status !== 0 && !proc.stdout) {
887
+ throw new Error(`codex exited with status ${proc.status}: ${proc.stderr || 'no output'}`);
888
+ }
889
+ return proc.stdout || '';
890
+ }
891
+
892
+ /**
893
+ * Check if codex is available on PATH (or ATRIS_CODEX_CMD points to something runnable).
894
+ * Kept simple: `which` probe. Tests override via options.hasCodex.
895
+ */
896
+ function hasCodex() {
897
+ const cmd = process.env.ATRIS_CODEX_CMD || 'codex';
898
+ try {
899
+ const r = spawnSync('which', [cmd], { stdio: 'pipe' });
900
+ return r.status === 0;
901
+ } catch {
902
+ return false;
903
+ }
904
+ }
905
+
906
+ /**
907
+ * Run plan-review: the validator (and optionally codex) read the plan and
908
+ * decide if it is safe to execute. Returns { verdict, reason, fix, signers, notes }.
909
+ *
910
+ * Codex is invoked only when the task explicitly opts in:
911
+ * - env ATRIS_USE_CODEX=1, or
912
+ * - context.tags includes 'codex', or
913
+ * - context.kind === 'endgame' AND context.tags includes 'gray' or 'high-risk'
914
+ *
915
+ * If codex is opted-in but not installed, we skip gracefully and surface a note.
916
+ * If both signers run and disagree, verdict is REJECT with both opinions in reason.
917
+ */
918
+ function runPlanReview({ cwd, context, planOutput, options = {} }) {
919
+ const prompt = buildPlanReviewPrompt(context, planOutput);
920
+ const tags = Array.isArray(context.tags) ? context.tags : [];
921
+
922
+ // Primary signer: validator.
923
+ const validatorExec = options.planReviewExec || defaultPlanReviewExecutor;
924
+ const validatorOutput = validatorExec(prompt, { cwd, role: 'validator' });
925
+ const primary = parseVerdict(validatorOutput);
926
+
927
+ // Codex: opted in explicitly, not inferred.
928
+ const codexOptIn =
929
+ process.env.ATRIS_USE_CODEX === '1' ||
930
+ tags.includes('codex') ||
931
+ tags.includes('gray') ||
932
+ tags.includes('high-risk');
933
+
934
+ if (!codexOptIn) {
935
+ return { ...primary, signers: ['validator'], proposed: primary.proposed || null };
936
+ }
937
+
938
+ const codexCheck = options.hasCodex != null ? options.hasCodex : hasCodex();
939
+ if (!codexCheck) {
940
+ return {
941
+ ...primary,
942
+ signers: ['validator'],
943
+ proposed: primary.proposed || null,
944
+ notes: 'codex was requested but not on PATH; skipped gracefully',
945
+ };
946
+ }
947
+
948
+ const codexExec = options.codexExec || defaultCodexExecutor;
949
+ let codexOutput;
950
+ try {
951
+ codexOutput = codexExec(prompt, { cwd, role: 'codex' });
952
+ } catch (err) {
953
+ return {
954
+ ...primary,
955
+ signers: ['validator'],
956
+ notes: `codex invocation failed: ${err.message}; falling back to single signer`,
957
+ };
958
+ }
959
+ const codex = parseVerdict(codexOutput);
960
+
961
+ if (primary.verdict === 'SIGNOFF' && codex.verdict === 'SIGNOFF') {
962
+ return {
963
+ verdict: 'SIGNOFF',
964
+ reason: primary.reason,
965
+ fix: '',
966
+ proposed: null,
967
+ signers: ['validator', 'codex'],
968
+ };
969
+ }
970
+
971
+ // Any disagreement or joint reject → halt with both opinions surfaced.
972
+ // If either signer wrote a PROPOSED draft, surface the validator's first
973
+ // (or codex's if validator didn't propose one).
974
+ return {
975
+ verdict: 'REJECT',
976
+ reason: `Split verdict. validator=${primary.verdict} (${primary.reason || 'no reason'}); codex=${codex.verdict} (${codex.reason || 'no reason'}).`,
977
+ fix: primary.fix || codex.fix || 'reconcile the two signers before re-planning',
978
+ proposed: primary.proposed || codex.proposed || null,
979
+ signers: ['validator', 'codex'],
980
+ split: true,
981
+ };
982
+ }
983
+
984
+ /**
985
+ * Append a plan-review rejection to today's journal under ## Notes.
986
+ * Intentionally does NOT write to lessons.md — rejections only become lessons
987
+ * if a human spots a reusable failure pattern.
988
+ */
989
+ function appendPlanRejection(cwd, context, review) {
990
+ try {
991
+ // Compute the journal path from the passed cwd so tests and isolated
992
+ // workspaces both work. getLogPath() resolves against process.cwd()
993
+ // which isn't always the task's workspace.
994
+ const date = new Date();
995
+ const year = date.getFullYear();
996
+ const month = String(date.getMonth() + 1).padStart(2, '0');
997
+ const day = String(date.getDate()).padStart(2, '0');
998
+ const logFile = path.join(cwd, 'atris', 'logs', String(year), `${year}-${month}-${day}.md`);
999
+ if (!fs.existsSync(logFile)) return;
1000
+ const now = new Date().toISOString().slice(0, 16).replace('T', ' ');
1001
+ const signers = (review.signers || []).join(' + ');
1002
+ const proposedBlock = review.proposed
1003
+ ? `**Proposed draft:**\n` +
1004
+ (review.proposed.files ? `- Files: ${review.proposed.files}\n` : '') +
1005
+ (review.proposed.exit ? `- Exit: ${review.proposed.exit}\n` : '') +
1006
+ (review.proposed.verify ? `- Verify: ${review.proposed.verify}\n` : '') +
1007
+ (review.proposed.rollback ? `- Rollback: ${review.proposed.rollback}\n` : '')
1008
+ : '';
1009
+ const block =
1010
+ `\n### Plan rejected — ${now}\n\n` +
1011
+ `**Task:** ${context.task}\n` +
1012
+ `**Signers:** ${signers}\n` +
1013
+ `**Reason:** ${review.reason}\n` +
1014
+ (review.fix ? `**Fix:** ${review.fix}\n` : '') +
1015
+ (proposedBlock ? `${proposedBlock}` : '') +
1016
+ (review.notes ? `**Notes:** ${review.notes}\n` : '');
1017
+ let content = fs.readFileSync(logFile, 'utf8');
1018
+ const notesIdx = content.indexOf('## Notes');
1019
+ if (notesIdx === -1) {
1020
+ content = content.replace(/\s*$/, '') + `\n\n## Notes\n${block}\n`;
1021
+ } else {
1022
+ const eol = content.indexOf('\n', notesIdx);
1023
+ content = content.slice(0, eol + 1) + block + content.slice(eol + 1);
1024
+ }
1025
+ fs.writeFileSync(logFile, content);
1026
+ } catch {
1027
+ // journaling must never crash the tick
1028
+ }
1029
+ }
1030
+
734
1031
  function runTaskOnce(context, options = {}) {
735
1032
  const { verbose = false, cwd = process.cwd() } = options;
736
1033
 
@@ -754,10 +1051,11 @@ function runTaskOnce(context, options = {}) {
754
1051
  const verifyResult = getVerifyCommand(cwd, context.task);
755
1052
  const verifyCmd = verifyResult.cmd;
756
1053
 
757
- // Guard: refuse to run ticks without an explicit Verify field
758
- if (!verifyResult.explicit) {
1054
+ // Guard: endgame tasks must have an explicit Verify field.
1055
+ // Reactive signals (inbox, staleness, imagined) use npm test as default.
1056
+ if (!verifyResult.explicit && context.kind === 'endgame') {
759
1057
  writeLesson(cwd, 'no-verify-field', 'fail',
760
- `Task "${context.task}" has no explicit **Verify:** field in TODO.md. Tick halted — every task must declare how to verify it.`);
1058
+ `Task "${context.task}" has no explicit **Verify:** field in TODO.md. Tick halted — every endgame task must declare how to verify it.`);
761
1059
  return {
762
1060
  outcome: 'halted',
763
1061
  reason: 'no-verify-field',
@@ -768,10 +1066,92 @@ function runTaskOnce(context, options = {}) {
768
1066
  };
769
1067
  }
770
1068
 
771
- for (const phase of ['plan', 'do', 'review']) {
1069
+ // Falsifiability gate (endgame + explicit Verify only).
1070
+ // Run Verify BEFORE the work. If it passes, the rubric is trivial or the
1071
+ // task is already done — either way, halt. This is the keystone that makes
1072
+ // Verify load-bearing. The cmd is captured here and reused post-execute so
1073
+ // an agent cannot swap the rubric mid-tick.
1074
+ const skipFalsifiability = options.skipFalsifiability === true;
1075
+ if (!skipFalsifiability && verifyResult.explicit && context.kind === 'endgame' && verifyCmd) {
1076
+ try {
1077
+ execSync(verifyCmd, { cwd, stdio: 'pipe', timeout: 60000 });
1078
+ writeLesson(cwd, 'verify-not-falsifiable', 'fail',
1079
+ `Verify \`${verifyCmd}\` passed before work started on "${context.task}". Either the rubric is trivial or the task is already done. Tick halted.`);
1080
+ return {
1081
+ outcome: 'halted',
1082
+ reason: 'verify-not-falsifiable',
1083
+ phaseResults: {},
1084
+ elapsedSeconds: 0,
1085
+ verifyRan: true,
1086
+ verifyPass: false,
1087
+ };
1088
+ } catch {
1089
+ // Pre-verify failed — good, the rubric is falsifiable. Proceed.
1090
+ }
1091
+ }
1092
+
1093
+ // Phase: plan
1094
+ {
1095
+ const t0 = Date.now();
1096
+ const result = (options.phaseExec || executePhaseDetailed)('plan', context, options);
1097
+ phaseResults.plan = {
1098
+ prompt: result.prompt,
1099
+ output: result.output || '',
1100
+ elapsedSeconds: Math.round((Date.now() - t0) / 1000),
1101
+ };
1102
+ }
1103
+
1104
+ // Phase: plan-review — validator reads the plan fresh and signs off or rejects.
1105
+ // Can be skipped via options.skipPlanReview (tests only). Codex is optional,
1106
+ // opt-in via env var / tags. On REJECT, the tick halts and the rejection is
1107
+ // journaled; lessons.md is NOT touched (only promoted lessons go there).
1108
+ if (!options.skipPlanReview) {
1109
+ const t0 = Date.now();
1110
+ const review = runPlanReview({
1111
+ cwd,
1112
+ context,
1113
+ planOutput: phaseResults.plan.output,
1114
+ options,
1115
+ });
1116
+ const elapsed = Math.round((Date.now() - t0) / 1000);
1117
+ phaseResults['plan-review'] = {
1118
+ output:
1119
+ `${review.verdict}: ${review.reason || ''}` +
1120
+ (review.fix ? `\nFIX: ${review.fix}` : '') +
1121
+ (review.notes ? `\n(${review.notes})` : ''),
1122
+ signers: review.signers,
1123
+ elapsedSeconds: elapsed,
1124
+ };
1125
+
1126
+ if (review.verdict === 'REJECT') {
1127
+ appendPlanRejection(cwd, context, review);
1128
+ return {
1129
+ outcome: 'halted',
1130
+ reason: 'plan-rejected-at-review',
1131
+ phaseResults,
1132
+ elapsedSeconds: Math.round((Date.now() - startedAt) / 1000),
1133
+ verifyRan: false,
1134
+ verifyPass: false,
1135
+ };
1136
+ }
1137
+ }
1138
+
1139
+ // Phase: do
1140
+ {
1141
+ const t0 = Date.now();
1142
+ const result = (options.phaseExec || executePhaseDetailed)('do', context, options);
1143
+ phaseResults.do = {
1144
+ prompt: result.prompt,
1145
+ output: result.output || '',
1146
+ elapsedSeconds: Math.round((Date.now() - t0) / 1000),
1147
+ };
1148
+ }
1149
+
1150
+ // Phase: review
1151
+ {
772
1152
  const t0 = Date.now();
773
- const result = executePhaseDetailed(phase, context, options);
774
- phaseResults[phase] = {
1153
+ const result = (options.phaseExec || executePhaseDetailed)('review', context, options);
1154
+ phaseResults.review = {
775
1155
  prompt: result.prompt,
776
1156
  output: result.output || '',
777
1157
  elapsedSeconds: Math.round((Date.now() - t0) / 1000),
@@ -2146,7 +2526,10 @@ module.exports = {
2146
2526
  proposeCandidateHorizons,
2147
2527
  recordTickCommit,
2148
2528
  regressionCheck,
2529
+ runPlanReview,
2149
2530
  runTaskOnce,
2531
+ buildPlanReviewPrompt,
2532
+ parseVerdict,
2150
2533
  scoreEndgameCandidates,
2151
2534
  suggestNextTask,
2152
2535
  writeLesson