ai-collab-open-system 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. package/.aict/START_HERE.md +127 -0
  2. package/.aict/WORKSPACE_MANIFEST.json +91 -0
  3. package/.aict/acceptance/EXAMPLE.synthetic.md +49 -0
  4. package/.aict/acceptance/FAILURE_MODES.md +40 -0
  5. package/.aict/acceptance/PROMPT.md +47 -0
  6. package/.aict/acceptance/README.md +44 -0
  7. package/.aict/acceptance/TEMPLATE.md +57 -0
  8. package/.aict/adapters/SHARED_CORE_CONTRACT.md +106 -0
  9. package/.aict/adapters/claude-code/ADAPTER.md +28 -0
  10. package/.aict/adapters/cline/ADAPTER.md +28 -0
  11. package/.aict/adapters/codex/ADAPTER.md +28 -0
  12. package/.aict/adapters/copilot/ADAPTER.md +28 -0
  13. package/.aict/adapters/cursor/ADAPTER.md +28 -0
  14. package/.aict/adapters/windsurf/ADAPTER.md +28 -0
  15. package/.aict/context/EXAMPLE.synthetic.md +53 -0
  16. package/.aict/context/FAILURE_MODES.md +40 -0
  17. package/.aict/context/PROMPT.md +47 -0
  18. package/.aict/context/README.md +44 -0
  19. package/.aict/context/TEMPLATE.md +63 -0
  20. package/.aict/cookbook/README.md +8 -0
  21. package/.aict/cookbook/bridge-to-a-second-family.md +103 -0
  22. package/.aict/cookbook/connect-a-tool.md +67 -0
  23. package/.aict/cookbook/review-a-half-product.md +79 -0
  24. package/.aict/cookbook/run-a-first-loop.md +81 -0
  25. package/.aict/examples/README.md +21 -0
  26. package/.aict/examples/ai-coding-long-task/CASE.md +161 -0
  27. package/.aict/examples/ai-coding-long-task/artifacts/acceptance-card.md +36 -0
  28. package/.aict/examples/ai-coding-long-task/artifacts/context-package.md +30 -0
  29. package/.aict/examples/ai-coding-long-task/artifacts/execution-prompt.md +30 -0
  30. package/.aict/examples/ai-coding-long-task/artifacts/first-ai-output.md +109 -0
  31. package/.aict/examples/ai-coding-long-task/artifacts/guard-review.md +40 -0
  32. package/.aict/examples/ai-coding-long-task/artifacts/handoff-note.md +28 -0
  33. package/.aict/examples/ai-coding-long-task/artifacts/harvest-seed.md +28 -0
  34. package/.aict/examples/ai-coding-long-task/artifacts/revised-output.md +62 -0
  35. package/.aict/examples/content-production-harvest/CASE.md +87 -0
  36. package/.aict/examples/content-production-harvest/artifacts/acceptance-card.md +28 -0
  37. package/.aict/examples/content-production-harvest/artifacts/context-package.md +28 -0
  38. package/.aict/examples/content-production-harvest/artifacts/execution-prompt.md +30 -0
  39. package/.aict/examples/content-production-harvest/artifacts/guard-review.md +28 -0
  40. package/.aict/examples/content-production-harvest/artifacts/handoff-note.md +28 -0
  41. package/.aict/examples/content-production-harvest/artifacts/harvest-seed.md +28 -0
  42. package/.aict/examples/multi-tool-collaboration/CASE.md +87 -0
  43. package/.aict/examples/multi-tool-collaboration/artifacts/acceptance-card.md +28 -0
  44. package/.aict/examples/multi-tool-collaboration/artifacts/context-package.md +28 -0
  45. package/.aict/examples/multi-tool-collaboration/artifacts/execution-prompt.md +30 -0
  46. package/.aict/examples/multi-tool-collaboration/artifacts/guard-review.md +28 -0
  47. package/.aict/examples/multi-tool-collaboration/artifacts/handoff-note.md +28 -0
  48. package/.aict/examples/multi-tool-collaboration/artifacts/harvest-seed.md +28 -0
  49. package/.aict/examples/personal-judgment-growth-assistant/CASE.md +87 -0
  50. package/.aict/examples/personal-judgment-growth-assistant/artifacts/acceptance-card.md +28 -0
  51. package/.aict/examples/personal-judgment-growth-assistant/artifacts/context-package.md +28 -0
  52. package/.aict/examples/personal-judgment-growth-assistant/artifacts/execution-prompt.md +30 -0
  53. package/.aict/examples/personal-judgment-growth-assistant/artifacts/guard-review.md +28 -0
  54. package/.aict/examples/personal-judgment-growth-assistant/artifacts/handoff-note.md +28 -0
  55. package/.aict/examples/personal-judgment-growth-assistant/artifacts/harvest-seed.md +28 -0
  56. package/.aict/examples/research-knowledge-synthesis/CASE.md +87 -0
  57. package/.aict/examples/research-knowledge-synthesis/artifacts/acceptance-card.md +28 -0
  58. package/.aict/examples/research-knowledge-synthesis/artifacts/context-package.md +28 -0
  59. package/.aict/examples/research-knowledge-synthesis/artifacts/execution-prompt.md +30 -0
  60. package/.aict/examples/research-knowledge-synthesis/artifacts/guard-review.md +28 -0
  61. package/.aict/examples/research-knowledge-synthesis/artifacts/handoff-note.md +28 -0
  62. package/.aict/examples/research-knowledge-synthesis/artifacts/harvest-seed.md +28 -0
  63. package/.aict/guard/EXAMPLE.synthetic.md +51 -0
  64. package/.aict/guard/FAILURE_MODES.md +40 -0
  65. package/.aict/guard/PROMPT.md +47 -0
  66. package/.aict/guard/README.md +44 -0
  67. package/.aict/guard/TEMPLATE.md +60 -0
  68. package/.aict/handoff/EXAMPLE.synthetic.md +51 -0
  69. package/.aict/handoff/FAILURE_MODES.md +40 -0
  70. package/.aict/handoff/PROMPT.md +47 -0
  71. package/.aict/handoff/README.md +44 -0
  72. package/.aict/handoff/TEMPLATE.md +60 -0
  73. package/.aict/harvest/EXAMPLE.synthetic.md +51 -0
  74. package/.aict/harvest/FAILURE_MODES.md +40 -0
  75. package/.aict/harvest/PROMPT.md +47 -0
  76. package/.aict/harvest/README.md +44 -0
  77. package/.aict/harvest/TEMPLATE.md +60 -0
  78. package/.aict/mechanisms/README.md +34 -0
  79. package/.aict/mechanisms/anti-drift-partner/EXAMPLE.synthetic.md +46 -0
  80. package/.aict/mechanisms/anti-drift-partner/FAILURE_MODES.md +25 -0
  81. package/.aict/mechanisms/anti-drift-partner/PROMPT.md +75 -0
  82. package/.aict/mechanisms/anti-drift-partner/README.md +82 -0
  83. package/.aict/mechanisms/anti-drift-partner/TEMPLATE.md +74 -0
  84. package/.aict/mechanisms/blind-spot-scan/EXAMPLE.synthetic.md +39 -0
  85. package/.aict/mechanisms/blind-spot-scan/FAILURE_MODES.md +25 -0
  86. package/.aict/mechanisms/blind-spot-scan/PROMPT.md +72 -0
  87. package/.aict/mechanisms/blind-spot-scan/README.md +79 -0
  88. package/.aict/mechanisms/blind-spot-scan/TEMPLATE.md +70 -0
  89. package/.aict/mechanisms/collaboration-coach/EXAMPLE.synthetic.md +40 -0
  90. package/.aict/mechanisms/collaboration-coach/FAILURE_MODES.md +25 -0
  91. package/.aict/mechanisms/collaboration-coach/PROMPT.md +72 -0
  92. package/.aict/mechanisms/collaboration-coach/README.md +79 -0
  93. package/.aict/mechanisms/collaboration-coach/TEMPLATE.md +61 -0
  94. package/.aict/mechanisms/do-not-handle-yet/EXAMPLE.synthetic.md +15 -0
  95. package/.aict/mechanisms/do-not-handle-yet/FAILURE_MODES.md +16 -0
  96. package/.aict/mechanisms/do-not-handle-yet/PROMPT.md +41 -0
  97. package/.aict/mechanisms/do-not-handle-yet/README.md +30 -0
  98. package/.aict/mechanisms/do-not-handle-yet/TEMPLATE.md +38 -0
  99. package/.aict/mechanisms/dual-guard/EXAMPLE.synthetic.md +54 -0
  100. package/.aict/mechanisms/dual-guard/FAILURE_MODES.md +25 -0
  101. package/.aict/mechanisms/dual-guard/PROMPT.md +76 -0
  102. package/.aict/mechanisms/dual-guard/README.md +81 -0
  103. package/.aict/mechanisms/dual-guard/TEMPLATE.md +73 -0
  104. package/.aict/mechanisms/feedback-absorption-ledger/EXAMPLE.synthetic.md +49 -0
  105. package/.aict/mechanisms/feedback-absorption-ledger/FAILURE_MODES.md +25 -0
  106. package/.aict/mechanisms/feedback-absorption-ledger/PROMPT.md +74 -0
  107. package/.aict/mechanisms/feedback-absorption-ledger/README.md +81 -0
  108. package/.aict/mechanisms/feedback-absorption-ledger/TEMPLATE.md +69 -0
  109. package/.aict/mechanisms/half-product-review/EXAMPLE.synthetic.md +15 -0
  110. package/.aict/mechanisms/half-product-review/FAILURE_MODES.md +16 -0
  111. package/.aict/mechanisms/half-product-review/PROMPT.md +41 -0
  112. package/.aict/mechanisms/half-product-review/README.md +30 -0
  113. package/.aict/mechanisms/half-product-review/TEMPLATE.md +38 -0
  114. package/.aict/mechanisms/handoff-abc/EXAMPLE.synthetic.md +47 -0
  115. package/.aict/mechanisms/handoff-abc/FAILURE_MODES.md +25 -0
  116. package/.aict/mechanisms/handoff-abc/PROMPT.md +75 -0
  117. package/.aict/mechanisms/handoff-abc/README.md +82 -0
  118. package/.aict/mechanisms/handoff-abc/TEMPLATE.md +60 -0
  119. package/.aict/mechanisms/harvest-and-erc/EXAMPLE.synthetic.md +43 -0
  120. package/.aict/mechanisms/harvest-and-erc/FAILURE_MODES.md +25 -0
  121. package/.aict/mechanisms/harvest-and-erc/PROMPT.md +74 -0
  122. package/.aict/mechanisms/harvest-and-erc/README.md +81 -0
  123. package/.aict/mechanisms/harvest-and-erc/TEMPLATE.md +60 -0
  124. package/.aict/mechanisms/honest-calibration/EXAMPLE.synthetic.md +43 -0
  125. package/.aict/mechanisms/honest-calibration/FAILURE_MODES.md +25 -0
  126. package/.aict/mechanisms/honest-calibration/PROMPT.md +74 -0
  127. package/.aict/mechanisms/honest-calibration/README.md +81 -0
  128. package/.aict/mechanisms/honest-calibration/TEMPLATE.md +66 -0
  129. package/.aict/mechanisms/one-click-dispatch/EXAMPLE.synthetic.md +15 -0
  130. package/.aict/mechanisms/one-click-dispatch/FAILURE_MODES.md +16 -0
  131. package/.aict/mechanisms/one-click-dispatch/PROMPT.md +41 -0
  132. package/.aict/mechanisms/one-click-dispatch/README.md +30 -0
  133. package/.aict/mechanisms/one-click-dispatch/TEMPLATE.md +38 -0
  134. package/.aict/mechanisms/plain-language-first-screen/EXAMPLE.synthetic.md +15 -0
  135. package/.aict/mechanisms/plain-language-first-screen/FAILURE_MODES.md +16 -0
  136. package/.aict/mechanisms/plain-language-first-screen/PROMPT.md +41 -0
  137. package/.aict/mechanisms/plain-language-first-screen/README.md +30 -0
  138. package/.aict/mechanisms/plain-language-first-screen/TEMPLATE.md +38 -0
  139. package/.aict/mechanisms/root-cause-brake/EXAMPLE.synthetic.md +55 -0
  140. package/.aict/mechanisms/root-cause-brake/FAILURE_MODES.md +25 -0
  141. package/.aict/mechanisms/root-cause-brake/PROMPT.md +73 -0
  142. package/.aict/mechanisms/root-cause-brake/README.md +79 -0
  143. package/.aict/mechanisms/root-cause-brake/TEMPLATE.md +74 -0
  144. package/.aict/mechanisms/scout-review-controller/EXAMPLE.synthetic.md +15 -0
  145. package/.aict/mechanisms/scout-review-controller/FAILURE_MODES.md +16 -0
  146. package/.aict/mechanisms/scout-review-controller/PROMPT.md +41 -0
  147. package/.aict/mechanisms/scout-review-controller/README.md +30 -0
  148. package/.aict/mechanisms/scout-review-controller/TEMPLATE.md +38 -0
  149. package/.aict/mechanisms/single-tool-guard/EXAMPLE.synthetic.md +54 -0
  150. package/.aict/mechanisms/single-tool-guard/FAILURE_MODES.md +25 -0
  151. package/.aict/mechanisms/single-tool-guard/PROMPT.md +76 -0
  152. package/.aict/mechanisms/single-tool-guard/README.md +83 -0
  153. package/.aict/mechanisms/single-tool-guard/TEMPLATE.md +75 -0
  154. package/.aict/mechanisms/task-splitting/EXAMPLE.synthetic.md +53 -0
  155. package/.aict/mechanisms/task-splitting/FAILURE_MODES.md +25 -0
  156. package/.aict/mechanisms/task-splitting/PROMPT.md +72 -0
  157. package/.aict/mechanisms/task-splitting/README.md +79 -0
  158. package/.aict/mechanisms/task-splitting/TEMPLATE.md +76 -0
  159. package/.aict/modes/README.md +11 -0
  160. package/.aict/modes/execute.md +31 -0
  161. package/.aict/modes/handoff.md +29 -0
  162. package/.aict/modes/harvest.md +30 -0
  163. package/.aict/modes/review.md +28 -0
  164. package/.aict/modes/shape.md +34 -0
  165. package/.aict/privacy/COMMERCIAL_BOUNDARY.md +34 -0
  166. package/.aict/privacy/PRIVACY.md +36 -0
  167. package/.aict/privacy/REDACTION_CHECKLIST.md +12 -0
  168. package/.aict/profile/CANDIDATES.md +44 -0
  169. package/.aict/profile/EXAMPLE.synthetic.md +49 -0
  170. package/.aict/profile/FAILURE_MODES.md +40 -0
  171. package/.aict/profile/PROMPT.md +47 -0
  172. package/.aict/profile/README.md +44 -0
  173. package/.aict/profile/TEMPLATE.md +57 -0
  174. package/.aict/prompts/acceptance-definition.md +109 -0
  175. package/.aict/prompts/guard-review.md +116 -0
  176. package/.aict/prompts/handoff-generation.md +110 -0
  177. package/.aict/prompts/harvest-extraction.md +110 -0
  178. package/.aict/prompts/mode-switching.md +66 -0
  179. package/.aict/prompts/profile-creation.md +66 -0
  180. package/.aict/prompts/profile-refinement.md +66 -0
  181. package/.aict/prompts/project-context-packaging.md +113 -0
  182. package/.aict/prompts/red-team-challenge.md +106 -0
  183. package/.aict/prompts/rule-update-proposal.md +114 -0
  184. package/.aict/prompts/workflow-reset.md +109 -0
  185. package/.aict/roles/README.md +18 -0
  186. package/.aict/roles/executor.md +34 -0
  187. package/.aict/roles/harvester.md +33 -0
  188. package/.aict/roles/owner-controller.md +38 -0
  189. package/.aict/roles/scout.md +33 -0
  190. package/.aict/roles/supervisor.md +34 -0
  191. package/.aict/roles/system-guardian.md +34 -0
  192. package/.aict/skills/acceptance/SKILL.md +43 -0
  193. package/.aict/skills/context/SKILL.md +44 -0
  194. package/.aict/skills/evidence-pack/SKILL.md +42 -0
  195. package/.aict/skills/guard/SKILL.md +46 -0
  196. package/.aict/skills/handoff/SKILL.md +44 -0
  197. package/.aict/skills/harvest/SKILL.md +44 -0
  198. package/.aict/skills/mode-switch/SKILL.md +42 -0
  199. package/.aict/skills/profile/SKILL.md +42 -0
  200. package/.aict/skills/red-team/SKILL.md +42 -0
  201. package/.aict/skills/single-tool-guard/SKILL.md +42 -0
  202. package/.aict/state/CURRENT_STATE.md +13 -0
  203. package/.aict/state/DECISIONS.md +7 -0
  204. package/.aict/state/TASK_LOG.md +7 -0
  205. package/.aict/state/evidence.jsonl +2 -0
  206. package/.aict/state/learning-ledger.jsonl +1 -0
  207. package/.aict/state/receipts.jsonl +1 -0
  208. package/.aict/state/runs.jsonl +1 -0
  209. package/.aict/state/tasks.jsonl +1 -0
  210. package/.aict/walkthroughs/10-minute-your-task.md +107 -0
  211. package/.aict/walkthroughs/10-minute.md +43 -0
  212. package/.aict/walkthroughs/30-minute.md +22 -0
  213. package/.aict/walkthroughs/60-minute.md +27 -0
  214. package/.aict/walkthroughs/synthetic-loop-transcript.md +43 -0
  215. package/CHANGELOG.md +23 -0
  216. package/CODE_OF_CONDUCT.md +20 -0
  217. package/CONTRIBUTING.md +30 -0
  218. package/KNOWN_LIMITATIONS.md +54 -0
  219. package/LICENSE +199 -0
  220. package/PRODUCT_CONTRACT.md +446 -0
  221. package/README.md +245 -0
  222. package/RELEASE_CHECKLIST.md +78 -0
  223. package/SECURITY.md +56 -0
  224. package/START_HERE.md +89 -0
  225. package/bin/ai-collab.js +2 -0
  226. package/docs/DOGFOOD.md +85 -0
  227. package/docs/FEEDBACK.md +61 -0
  228. package/docs/FIRST_EXPERIENCE_SPEC.md +32 -0
  229. package/docs/FREE_VS_PAID.md +53 -0
  230. package/docs/PUBLIC_BOUNDARY.md +36 -0
  231. package/docs/PUBLIC_MAPPING.md +178 -0
  232. package/docs/RELEASE_PRIORITY.md +23 -0
  233. package/docs/WHY_THIS_EXISTS.md +36 -0
  234. package/docs/open-system/00-start-here.md +60 -0
  235. package/docs/open-system/01-ai-collaboration-os.md +33 -0
  236. package/docs/open-system/02-six-layer-architecture.md +45 -0
  237. package/docs/open-system/03-role-system.md +33 -0
  238. package/docs/open-system/04-core-mechanisms.md +34 -0
  239. package/docs/open-system/05-failure-patterns.md +31 -0
  240. package/docs/open-system/06-how-to-adapt-to-your-workflow.md +31 -0
  241. package/package.json +69 -0
  242. package/privacy-manifest.json +78 -0
  243. package/privacy-scan.local.json.example +18 -0
  244. package/scripts/lib/forbidden-in-pack.js +55 -0
  245. package/scripts/pack-check.js +154 -0
  246. package/scripts/privacy-scan.js +487 -0
  247. package/scripts/validate-contract.js +160 -0
  248. package/src/adapters.js +590 -0
  249. package/src/bootstrap.js +1184 -0
  250. package/src/catalog.js +2723 -0
  251. package/src/cli.js +2899 -0
  252. package/src/dialogue.js +470 -0
  253. package/src/i18n.js +1034 -0
  254. package/src/ledger.js +2011 -0
  255. package/src/render.js +1381 -0
  256. package/src/sendmodel.js +452 -0
  257. package/src/validate.js +1307 -0
  258. package/src/workspace.js +1679 -0
  259. package/tests/contract.test.js +8514 -0
package/src/cli.js ADDED
@@ -0,0 +1,2899 @@
1
+ #!/usr/bin/env node
2
+ import { existsSync, mkdtempSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from "node:fs";
3
+ import { spawnSync } from "node:child_process";
4
+ import { tmpdir } from "node:os";
5
+ import path from "node:path";
6
+ import { installAdapters } from "./adapters.js";
7
+ import { createWorkspace } from "./workspace.js";
8
+ import { validateWorkspace } from "./validate.js";
9
+ import { resolveLocale, t } from "./i18n.js";
10
+ import {
11
+ scanLocalStructure,
12
+ buildBootstrapModel,
13
+ renderBootstrapReport,
14
+ renderConsentPreview
15
+ } from "./bootstrap.js";
16
+ import { scanDialogueAndLogs } from "./dialogue.js";
17
+ import {
18
+ collectRedactedSnippets,
19
+ buildModelPrompt,
20
+ buildSendPreview,
21
+ assertPayloadRedacted,
22
+ runExternalModelPass,
23
+ DEFAULT_MODEL_CMD
24
+ } from "./sendmodel.js";
25
+ import {
26
+ readLedger,
27
+ appendLedger,
28
+ writeLedger,
29
+ nextId,
30
+ appendWithNextId,
31
+ rewriteLedgerUnderLock,
32
+ receiptStatusFor,
33
+ ownedEvidenceIds,
34
+ ownedRerunEvidenceIds,
35
+ ownedCrossFamilyGuardEvidenceIds,
36
+ guardLevelVerdictError,
37
+ ownerAcceptanceError,
38
+ doneRequiresEvidence,
39
+ specialEvidenceStructureError,
40
+ rerunRunReconcileError,
41
+ computeGuardLevel,
42
+ familyHonestyMarker,
43
+ guardLevelRank,
44
+ computeCapability,
45
+ TOOL_SIGNALS,
46
+ TOOL_FAMILY,
47
+ CAPABILITY_TIERS,
48
+ hasOwnedRunEvidence,
49
+ learningRecordError,
50
+ latestConfirmedProfileLearning,
51
+ latestConfirmedHarvestLearning,
52
+ summarizeTasks,
53
+ buildHandoffModel,
54
+ countSeedRows,
55
+ isSeedRow,
56
+ guardLevelExplanation,
57
+ taskHasAcceptedReceipt,
58
+ outputSha256,
59
+ outputByteLength,
60
+ isRecognizedEvidenceKind,
61
+ EVIDENCE_KIND_RERUN,
62
+ EVIDENCE_KIND_CROSS_FAMILY_GUARD,
63
+ RECEIPT_VERDICTS,
64
+ GUARD_LEVELS,
65
+ REVIEW_MODES,
66
+ TASK_STATUSES,
67
+ LEARNING_TYPES,
68
+ LEARNING_STATUSES
69
+ } from "./ledger.js";
70
+
71
+ function parseArgs(argv) {
72
+ const args = { _: [] };
73
+ for (let index = 0; index < argv.length; index += 1) {
74
+ const arg = argv[index];
75
+ // Value-taking flags. The first group are the pre-existing install flags;
76
+ // the run-layer group (--task .. --command) feed the P1 task/evidence/run/
77
+ // receipt commands. Every value flag must be listed here or parseArgs throws
78
+ // "Unknown option"; only the "--key value" form is supported (no --key=value).
79
+ if (
80
+ arg === "--target" ||
81
+ arg === "--workspace" ||
82
+ arg === "--tool" ||
83
+ arg === "--task" ||
84
+ arg === "--title" ||
85
+ arg === "--status" ||
86
+ arg === "--kind" ||
87
+ arg === "--summary" ||
88
+ arg === "--detail" ||
89
+ arg === "--exit" ||
90
+ arg === "--verdict" ||
91
+ arg === "--evidence" ||
92
+ arg === "--id" ||
93
+ arg === "--guard-level" ||
94
+ arg === "--claimed-level" ||
95
+ arg === "--review-mode" ||
96
+ arg === "--rerun" ||
97
+ arg === "--run" ||
98
+ arg === "--command" ||
99
+ arg === "--cwd" ||
100
+ arg === "--output" ||
101
+ arg === "--reviewer" ||
102
+ arg === "--family" ||
103
+ arg === "--ref" ||
104
+ arg === "--runner" ||
105
+ arg === "--type" ||
106
+ arg === "--content" ||
107
+ arg === "--tools" ||
108
+ arg === "--families" ||
109
+ arg === "--project" ||
110
+ // --dialogue <path[,path]> / --logs <path[,path]> are bootstrap's OPT-IN local
111
+ // connectors: comma-separated paths to chat/log exports the user EXPLICITLY hands
112
+ // over for the deterministic dialogue scan. Value-taking; the value is parsed
113
+ // (split on commas) where bootstrap consumes it. Recognized globally so a typo
114
+ // still fails loudly via "Unknown option"; commands that do not use them ignore them.
115
+ arg === "--dialogue" ||
116
+ arg === "--logs" ||
117
+ // --model <cmd> overrides the external-model command bootstrap --send-to-model
118
+ // shells out to (default: claude over stdin). Value-taking; the prompt is always
119
+ // delivered on STDIN to whatever this names, never interpolated into the command
120
+ // line, so the value is a plain command token list (e.g. "claude -p" / "llm").
121
+ // Recognized globally so a typo fails loudly via "Unknown option"; only bootstrap
122
+ // --send-to-model consumes it.
123
+ arg === "--model" ||
124
+ // --lang <en|zh> is the global i18n override (highest-precedence language
125
+ // source; see resolveLocale). Value-taking like the rest; an unsupported value
126
+ // is simply ignored by resolveLocale (falls through to env/OS/default), so a
127
+ // typo never hard-fails a command — it just is not applied.
128
+ arg === "--lang"
129
+ ) {
130
+ const value = argv[index + 1];
131
+ if (value === undefined || value.startsWith("--")) {
132
+ throw new Error(`Option ${arg} requires a value.`);
133
+ }
134
+ // Normalize a couple of dashed flags to camelCase arg keys; the rest map
135
+ // 1:1 from --foo to args.foo. A1: --guard-level and --claimed-level both
136
+ // feed args.claimedLevel — the level the AUTHOR claims, which the CLI then
137
+ // RECOMPUTES (it never directly becomes the stored guardLevel). --review-mode
138
+ // becomes args.reviewMode (the load-bearing input to the level computation).
139
+ let key;
140
+ if (arg === "--guard-level" || arg === "--claimed-level") key = "claimedLevel";
141
+ else if (arg === "--review-mode") key = "reviewMode";
142
+ else key = arg.slice(2);
143
+ args[key] = value;
144
+ index += 1;
145
+ } else if (arg === "--help" || arg === "-h") {
146
+ // A help token ANYWHERE means "show help": set a flag instead of pushing it into
147
+ // args._. Before, only a bare leading `--help` printed help; `demo --help` fell
148
+ // through and RAN demo (writing a throwaway workspace — a help flag with a side
149
+ // effect), and `init --help` errored on a missing --target. main() short-circuits
150
+ // on this flag so every `<command> --help` shows help with zero side effects.
151
+ args.help = true;
152
+ } else if (arg === "--version" || arg === "-v") {
153
+ args.version = true;
154
+ } else if (arg === "--force" || arg === "--json" || arg === "--dry-run") {
155
+ args[arg.slice(2)] = true;
156
+ } else if (arg === "--yes" || arg === "--report-only") {
157
+ // bootstrap flags. --yes confirms the local-scan consent gate (the CLI is
158
+ // non-interactive, so consent is an explicit opt-in flag, not a y/n prompt).
159
+ // --report-only pins bootstrap to its v1 read-only mode (no write-back);
160
+ // mapped to camelCase args.reportOnly. Both are recognized globally so a typo
161
+ // still fails loudly via the "Unknown option" branch below; commands that do
162
+ // not use them simply ignore them.
163
+ if (arg === "--yes") args.yes = true;
164
+ else args.reportOnly = true;
165
+ } else if (arg === "--send-to-model" || arg === "--dry-run-send") {
166
+ // bootstrap's OPT-IN external-model connector — the ONLY path that can send the
167
+ // user's data off this machine. OFF by default: without --send-to-model the report
168
+ // is byte-identical to the pure-local one. --send-to-model enters the consent +
169
+ // redaction + send path; --dry-run-send builds and prints the EXACT redacted
170
+ // payload WITHOUT sending (so the user can audit what would go out). Both are
171
+ // camelCase-mapped (args.sendToModel / args.dryRunSend) and recognized globally so
172
+ // a typo still fails loudly via the "Unknown option" branch below.
173
+ if (arg === "--send-to-model") args.sendToModel = true;
174
+ else args.dryRunSend = true;
175
+ } else if (
176
+ arg === "--subagents" ||
177
+ arg === "--can-switch-model" ||
178
+ arg === "--can-rerun" ||
179
+ arg === "--no-new-conversation"
180
+ ) {
181
+ // Boolean self-report flags for `capability detect` (A2). They let a user
182
+ // OVERRIDE / augment the project-signal probe, since the CLI is
183
+ // non-interactive and cannot ask. camelCase-mapped so the command reads them
184
+ // cleanly: --can-switch-model -> canSwitchModel, --no-new-conversation ->
185
+ // noNewConversation (the one negation; a new conversation is assumed possible
186
+ // by default, so this flag models the strict "one locked conversation" case).
187
+ if (arg === "--subagents") args.subagents = true;
188
+ else if (arg === "--can-switch-model") args.canSwitchModel = true;
189
+ else if (arg === "--can-rerun") args.canRerun = true;
190
+ else if (arg === "--no-new-conversation") args.noNewConversation = true;
191
+ } else if (arg === "--enable-hooks") {
192
+ // Opt-in flag for the adapters install hook layer (OFF by default). Mapped
193
+ // to camelCase args.enableHooks so the rest of the CLI reads it cleanly.
194
+ args.enableHooks = true;
195
+ } else if (arg === "--clean-env") {
196
+ // Opt-in flag for `run exec` (OFF by default, so the default env behavior is
197
+ // unchanged and backward compatible). When set, the spawned command runs with
198
+ // a MINIMAL environment (PATH/HOME + a small necessary set) instead of the
199
+ // caller's full process.env, so a command an AI suggested cannot read your
200
+ // API keys/tokens out of the inherited environment. Mapped to camelCase
201
+ // args.cleanEnv so the rest of the CLI reads it cleanly.
202
+ args.cleanEnv = true;
203
+ } else if (arg === "--owner" || arg === "--owner-accepted") {
204
+ // --owner / --owner-accepted both mark an owner acceptance (the human
205
+ // signing off on a pass_with_risk). The flag is a boolean (presence = an
206
+ // acceptance happened), but it OPTIONALLY takes the actor's name as its
207
+ // next token (`--owner alice`) so the acceptance can be attributed in the
208
+ // audit trail. Backward compatible: a bare `--owner` (end of argv, or
209
+ // immediately followed by another --flag) keeps the old no-name behavior.
210
+ args.ownerAccepted = true;
211
+ const next = argv[index + 1];
212
+ if (next !== undefined && !next.startsWith("--")) {
213
+ args.ownerName = next;
214
+ index += 1;
215
+ }
216
+ } else if (arg.startsWith("--")) {
217
+ throw new Error(`Unknown option: ${arg}`);
218
+ } else {
219
+ args._.push(arg);
220
+ }
221
+ }
222
+ return args;
223
+ }
224
+
225
+ // The locale resolved ONCE at the top of main() (flag > AI_COLLAB_LANG > OS locale >
226
+ // 'en'), held module-level so every user-visible string — including errors thrown
227
+ // deep inside a command and caught back in main() — renders in the same language
228
+ // without threading `locale` through every signature. Defaults to 'en' so any code
229
+ // path that runs before main() resolves it (or in a test importing a helper) still
230
+ // gets the canonical English. resolveLocale itself stays pure (env is passed in).
231
+ let CURRENT_LOCALE = "en";
232
+
233
+ // Bind t() to the current run's locale. ALL user-facing CLI text goes through tr()
234
+ // (or t(..., CURRENT_LOCALE) for the bilingual render helpers). English is the
235
+ // canonical fallback baked into the catalog, so an un-translated key degrades to
236
+ // English, never to an empty line.
237
+ function tr(key, params = {}) {
238
+ return t(key, params, CURRENT_LOCALE);
239
+ }
240
+
241
+ function emit(args, text, payload) {
242
+ if (args.json) {
243
+ console.log(JSON.stringify(payload, null, 2));
244
+ return;
245
+ }
246
+ console.log(text);
247
+ }
248
+
249
+ // The no-argument first screen. Deliberately NOT the same as --help: running the
250
+ // bare command (a brand-new user's very first contact) used to dump the entire
251
+ // 100+ line reference — including the L0-L4 guard-level theory — which buries the
252
+ // one thing a newcomer needs (how to start). This prints a short quickstart: a
253
+ // one-line positioning sentence, the three commands that actually get you moving
254
+ // (init / guide / demo), and a pointer to the full reference + the new-user fast
255
+ // path. The full command list and the L0-L4 levels live in `--help` (printHelp),
256
+ // which a user reaches only by explicitly asking for everything.
257
+ function printQuickstart() {
258
+ console.log(`AI Collaboration Open System — make AI productive on real work through a profile -> context -> acceptance -> guard -> handoff -> harvest loop (local-first; no network).
259
+
260
+ Not published to npm yet, so run from a clone with: node bin/ai-collab.js <command>
261
+
262
+ Start with one of these three:
263
+ node bin/ai-collab.js init --target <dir> Create your workspace (then run one real task through the loop).
264
+ node bin/ai-collab.js guide Read the guided first run, step by step.
265
+ node bin/ai-collab.js demo Watch the flow on a prepared example (writes to a temp dir).
266
+
267
+ New here? Run: node bin/ai-collab.js guide
268
+ Full command list: node bin/ai-collab.js --help
269
+ `);
270
+ }
271
+
272
+ // `welcome` HARD-PRINTS the fixed onboarding intro (welcome.intro) verbatim. The
273
+ // flow: the AI finishes installing the pack, runs this command, and shows the user
274
+ // its output as-is — so the full "what this collaboration pack adds" intro is
275
+ // GUARANTEED to appear complete and accurate, instead of being re-summarized (and
276
+ // possibly trimmed/garbled) by the model. The text itself lives in the i18n catalog
277
+ // (en canonical + the Owner-locked zh 4th draft), so `--lang zh` prints Chinese via
278
+ // the same locale machinery init/bootstrap use; this handler only selects + prints
279
+ // it. --json wraps the same text so an integrating tool can capture it.
280
+ function welcome(args) {
281
+ emit(args, tr("welcome.intro"), {
282
+ command: "welcome",
283
+ locale: CURRENT_LOCALE,
284
+ text: tr("welcome.intro"),
285
+ network: "not used"
286
+ });
287
+ }
288
+
289
+ function printHelp() {
290
+ console.log(tr("help.main"));
291
+ }
292
+
293
+ // The full L0-L4 guard-level reference, split out of printHelp() so the main --help
294
+ // first screen is not a term wall. Reached on demand via `--help levels` / `help levels`.
295
+ // Plain-language summary first, then the precise ladder + family-honesty caveats.
296
+ function printLevelsHelp() {
297
+ console.log(tr("help.levels"));
298
+ }
299
+
300
+ function guide(args) {
301
+ emit(args, `Start here (recommended first run: your own real task):
302
+
303
+ The source is on GitHub (CI green); not published to npm yet, so run from a clone with node bin/ai-collab.js.
304
+ After publish, the same commands work as the global ai-collab command.
305
+
306
+ 1. Run: node bin/ai-collab.js init --target ./my-ai-workspace
307
+ (after publish: ai-collab init --target ./my-ai-workspace)
308
+ 2. Open: ./my-ai-workspace/.aict/walkthroughs/10-minute-your-task.md
309
+ 3. Follow its steps on one real (lightly redacted) task of your own:
310
+ define done -> do only that slice -> independent re-check -> handoff -> harvest.
311
+ 4. Watch an independent re-check reject a thin "done" on work you actually care about.
312
+ 5. Install the adapter guidance for your AI tool so the same rules drive every session.
313
+
314
+ Prefer to watch the flow on a prepared example before using your own task?
315
+ Open ./my-ai-workspace/.aict/walkthroughs/10-minute.md (the demo preview) first, then come back to your task.
316
+
317
+ Track the task lifecycle with the run-layer commands (all take --workspace <dir>):
318
+ task create -> evidence add -> run start/finish -> task update --status <state> -> receipt create.
319
+ A task can only become "done" once it has evidence; with none, use blocked/partial/unverified.
320
+ A receipt states a verdict AND a guard level (L0-L4) for the evidence the guard saw: L0 summary-only can
321
+ only be insufficient_evidence, L1/L2 cannot pass, a clean pass needs the cross-family L3 Evidence Pack, and
322
+ an L4 pass must show the reviewer's own rerun RECONCILED to a recorded run exec (--rerun citing a rerun row whose
323
+ --run points at a finished runs.jsonl run exec with matching exitCode + command + outputSha256). A pass_with_risk receipt is
324
+ created pending and needs an explicit owner sign-off (receipt accept --id <id> --owner) before it is accepted.
325
+
326
+ Close the loop so it compounds (learning ledger):
327
+ learning add --type harvest --content "..." captures one reusable lesson; --type profile proposes one
328
+ standing preference. Both land "proposed" until you keep them: learning confirm (as-is), learning edit
329
+ (reword), or learning drop (discard). Only confirmed/edited rows graduate into your profile — at most one
330
+ harvest + one profile per task. Next time, "status" echoes back the one preference you last confirmed.
331
+
332
+ Workspace map: ./my-ai-workspace/.aict/START_HERE.md
333
+ Free/open: complete generic workspace.
334
+ Paid help, if offered by a maintainer, is only for calibration and saving time.
335
+ Network: not used.
336
+ `, {
337
+ command: "guide",
338
+ published: false,
339
+ steps: [
340
+ "node bin/ai-collab.js init --target ./my-ai-workspace",
341
+ "open ./my-ai-workspace/.aict/walkthroughs/10-minute-your-task.md",
342
+ "run the 10-minute loop on your own real (lightly redacted) task",
343
+ "watch an independent re-check reject a thin \"done\" on your own task",
344
+ "install adapter guidance so the same rules drive every session"
345
+ ],
346
+ demoPreview: "open ./my-ai-workspace/.aict/walkthroughs/10-minute.md to watch the flow on a prepared example first",
347
+ afterPublish: "ai-collab init --target ./my-ai-workspace",
348
+ network: "not used"
349
+ });
350
+ }
351
+
352
+ function demo(args) {
353
+ // demo needs a writable temp dir to scaffold a throwaway workspace. In a
354
+ // read-only / locked-down environment mkdtemp fails (EPERM/EACCES). Catch it
355
+ // HERE and print a human pointer + exit cleanly, instead of letting the raw
356
+ // "EPERM: operation not permitted, mkdtemp ..." errno bubble up as a stack-y
357
+ // message. The actionable path is `init --target <writable dir>` (or the
358
+ // already-committed walkthrough), which both work without a temp dir.
359
+ let target;
360
+ try {
361
+ target = mkdtempSync(path.join(tmpdir(), "aicos-demo-"));
362
+ } catch (error) {
363
+ const readOnly = error && (error.code === "EPERM" || error.code === "EACCES" || error.code === "EROFS");
364
+ const reason = readOnly
365
+ ? "this looks like a read-only environment (could not create a temp directory)"
366
+ : `could not create a temp directory (${error && error.code ? error.code : error && error.message ? error.message : "unknown error"})`;
367
+ const message = `Demo needs to write a throwaway workspace to a temp directory, but ${reason}.
368
+ Instead, scaffold a real workspace in a writable dir:
369
+ node bin/ai-collab.js init --target <writable-dir>
370
+ Or read the committed walkthrough directly: .aict/walkthroughs/10-minute.md
371
+ Network: not used.`;
372
+ if (args.json) {
373
+ console.error(JSON.stringify({
374
+ command: "demo",
375
+ ok: false,
376
+ reason: "temp-dir-unwritable",
377
+ readOnly: Boolean(readOnly),
378
+ errorCode: error && error.code ? error.code : null,
379
+ hint: "node bin/ai-collab.js init --target <writable-dir>",
380
+ walkthrough: ".aict/walkthroughs/10-minute.md",
381
+ network: "not used"
382
+ }, null, 2));
383
+ } else {
384
+ console.error(message);
385
+ }
386
+ process.exitCode = 1;
387
+ return;
388
+ }
389
+ const result = createWorkspace(target, { force: true });
390
+ emit(args, `Demo workspace created.
391
+
392
+ Note: demo writes a throwaway workspace to a new temp directory to show the layout without touching your project. In a read-only environment this write can fail (EPERM/EACCES); if so, run "node bin/ai-collab.js init --target <writable-dir>" instead.
393
+ Workspace (temporary): ${result.workspaceRoot}
394
+ Watch the prepared demo (a worked example, not your task): ${path.join(result.workspaceRoot, "walkthroughs", "10-minute.md")}
395
+ It drives the flagship example: ${path.join(result.workspaceRoot, "examples", "ai-coding-long-task", "CASE.md")}
396
+ Your real first run goes through your own task: ${path.join(result.workspaceRoot, "walkthroughs", "10-minute-your-task.md")}
397
+ Workspace map: ${path.join(result.workspaceRoot, "START_HERE.md")}
398
+ Network: not used.
399
+ `, {
400
+ command: "demo",
401
+ workspaceRoot: result.workspaceRoot,
402
+ temporary: true,
403
+ note: "demo creates a throwaway temp workspace; in a read-only environment this write can fail (use init --target <writable-dir> instead)",
404
+ demoPreview: path.join(result.workspaceRoot, "walkthroughs", "10-minute.md"),
405
+ firstRun: path.join(result.workspaceRoot, "walkthroughs", "10-minute-your-task.md"),
406
+ startHere: path.join(result.workspaceRoot, "START_HERE.md"),
407
+ flagshipCase: path.join(result.workspaceRoot, "examples", "ai-coding-long-task", "CASE.md"),
408
+ network: "not used"
409
+ });
410
+ }
411
+
412
+ const DEFAULT_DRY_RUN_TARGET = "./my-ai-workspace";
413
+
414
+ function init(args) {
415
+ const dryRun = Boolean(args["dry-run"]);
416
+
417
+ // A real (writing) init must always be given an explicit target so it never
418
+ // silently scaffolds into the current directory. A --dry-run preview writes
419
+ // nothing, so when no --target is given we preview against the default
420
+ // example path instead of erroring out, and tell the user to pass --target
421
+ // when they actually want to write.
422
+ const usedDefaultTarget = dryRun && !args.target;
423
+ if (!args.target && !dryRun) {
424
+ throw new Error(tr("error.missingTarget"));
425
+ }
426
+
427
+ const target = path.resolve(args.target ?? DEFAULT_DRY_RUN_TARGET);
428
+ const result = createWorkspace(target, {
429
+ force: Boolean(args.force),
430
+ dryRun,
431
+ // init writes a workspace the user KEEPS, so it also drops a .gitignore that
432
+ // keeps the runtime ledgers (their private task data) out of a routine
433
+ // `git add .`. demo (a throwaway temp preview) and the template generator do not.
434
+ gitignore: true
435
+ });
436
+
437
+ if (result.dryRun) {
438
+ const dryBody = tr("init.dryRun.body", {
439
+ workspaceRoot: result.workspaceRoot,
440
+ files: result.files,
441
+ existing: result.existingWorkspace ? tr("init.dryRun.existing.yes") : tr("init.dryRun.existing.no"),
442
+ defaultTargetLine: usedDefaultTarget
443
+ ? tr("init.dryRun.defaultTargetLine", { target: DEFAULT_DRY_RUN_TARGET })
444
+ : "",
445
+ network: tr("common.networkNotUsed")
446
+ });
447
+ emit(args, `${tr("init.dryRun.title")}\n\n${dryBody}\n`, {
448
+ command: "init",
449
+ dryRun: true,
450
+ written: false,
451
+ targetRoot: result.targetRoot,
452
+ workspaceRoot: result.workspaceRoot,
453
+ filesPlanned: result.files,
454
+ existingWorkspace: result.existingWorkspace,
455
+ usedDefaultTarget,
456
+ network: "not used"
457
+ });
458
+ return;
459
+ }
460
+
461
+ const createdBody = tr("init.created.body", {
462
+ workspaceRoot: result.workspaceRoot,
463
+ startHere: path.join(result.workspaceRoot, "START_HERE.md"),
464
+ files: result.files,
465
+ backupLine: result.backupPath ? tr("init.backupLine", { backupPath: result.backupPath }) : "",
466
+ network: tr("common.networkNotUsed"),
467
+ walkthroughYourTask: path.join(result.workspaceRoot, "walkthroughs", "10-minute-your-task.md"),
468
+ walkthrough: path.join(result.workspaceRoot, "walkthroughs", "10-minute.md")
469
+ });
470
+ emit(args, `${tr("init.created.title")}\n\n${createdBody}\n`, {
471
+ command: "init",
472
+ dryRun: false,
473
+ written: true,
474
+ targetRoot: result.targetRoot,
475
+ workspaceRoot: result.workspaceRoot,
476
+ startHere: path.join(result.workspaceRoot, "START_HERE.md"),
477
+ filesWritten: result.files,
478
+ backupPath: result.backupPath,
479
+ why: "docs/WHY_THIS_EXISTS.md",
480
+ // First-experience tie-in: point a brand-new user at the value report built
481
+ // from their own work (report-only, local). Surfaced in --json too so an
482
+ // integrating tool can chain it.
483
+ nextValueReport: "node bin/ai-collab.js bootstrap --yes",
484
+ network: "not used"
485
+ });
486
+ }
487
+
488
+ // Resolve the workspace `check` should validate. Same marker as findWorkspace
489
+ // (WORKSPACE_MANIFEST.json, which only ever exists inside a real .aict), so that
490
+ // `check <project-root>` descends into <root>/.aict instead of being fooled by the
491
+ // project root's own START_HERE.md doc. Differs from findWorkspace only in the
492
+ // fallback: it returns the input path (not null) so `check` can still validate an
493
+ // arbitrary directory and surface its contract errors.
494
+ function resolveWorkspace(workspaceArg) {
495
+ const input = path.resolve(workspaceArg);
496
+ if (existsSync(path.join(input, "WORKSPACE_MANIFEST.json"))) return input;
497
+ if (existsSync(path.join(input, ".aict", "WORKSPACE_MANIFEST.json"))) return path.join(input, ".aict");
498
+ return input;
499
+ }
500
+
501
+ function check(args) {
502
+ if (!args.workspace) {
503
+ throw new Error("Missing --workspace. Run: node bin/ai-collab.js check --workspace <dir>");
504
+ }
505
+
506
+ const workspace = resolveWorkspace(args.workspace);
507
+ const result = validateWorkspace(workspace);
508
+ if (!result.ok) {
509
+ if (args.json) {
510
+ console.error(JSON.stringify({ command: "check", ok: false, workspace, errors: result.errors }, null, 2));
511
+ } else {
512
+ console.error(`Contract check failed:\n${result.errors.map((error) => `- ${error}`).join("\n")}`);
513
+ }
514
+ process.exitCode = 1;
515
+ return;
516
+ }
517
+ emit(args, `Contract check passed.
518
+ Workspace: ${workspace}
519
+ Checks: ${result.checks}
520
+ `, {
521
+ command: "check",
522
+ ok: true,
523
+ workspace,
524
+ checks: result.checks
525
+ });
526
+ }
527
+
528
+ function formatAdapterPlan(plan) {
529
+ if (!plan || plan.length === 0) return "(none)";
530
+ return plan
531
+ .map((entry) => {
532
+ const verb = entry.action === "backup-replace" ? "backup+replace" : "create";
533
+ return ` - ${verb}: ${entry.path} (${entry.tool})`;
534
+ })
535
+ .join("\n");
536
+ }
537
+
538
+ // Human-readable summary of the opt-in hook layer. When hooks were not requested
539
+ // or do not apply, this is a single explanatory line (so the default install
540
+ // output gains nothing scary). When requested, it lists exactly which local files
541
+ // would be created/merged and the one-line uninstall path — meeting the
542
+ // "list-before-install + uninstallable + local-only" contract promise.
543
+ function formatHookSection(hooks) {
544
+ if (!hooks || !hooks.enabled) {
545
+ return "Hooks: not enabled (opt in with --enable-hooks; never installs a global hook).";
546
+ }
547
+ if (!hooks.applicable) {
548
+ return `Hooks: requested but skipped — ${hooks.reason}.`;
549
+ }
550
+ const verb = (action) => {
551
+ if (action === "backup-replace") return "backup+replace";
552
+ if (action === "merge") return "merge into";
553
+ if (action === "already-present") return "already present, leave";
554
+ if (action === "skip-unparseable") return "unparseable, leave untouched";
555
+ return "create";
556
+ };
557
+ const lines = hooks.plan.map((item) => ` - ${verb(item.action)}: ${item.relativePath}`);
558
+ return [
559
+ "Hooks: project-local Claude Code Stop hook (local-only; no global hook):",
560
+ ...lines,
561
+ " Reminds you to capture evidence + run `node bin/ai-collab.js receipt create` when you claim a task is done.",
562
+ " Uninstall: remove the \"ai-collab-receipt-reminder\" entry from the local settings.json above."
563
+ ].join("\n");
564
+ }
565
+
566
+ // Shared message when --tool auto finds no AI tool in the target: never write
567
+ // all six silently; tell the user to pick a tool or pass --tool all.
568
+ function autoFoundNothingMessage(targetRoot) {
569
+ return `No AI tool detected in ${targetRoot}, so nothing was written.
570
+ Pass --tool to choose explicitly, e.g.:
571
+ --tool cursor install only Cursor rules
572
+ --tool claude,codex install for several tools (comma-separated)
573
+ --tool all install for all supported tools (cursor, codex, claude, copilot, cline, windsurf)
574
+ Detection looks for: .cursor/ or .cursorrules (cursor), .claude/ or CLAUDE.md (claude), AGENTS.md (codex), .github/copilot-instructions.md (copilot), .clinerules (cline), .windsurf/ (windsurf).`;
575
+ }
576
+
577
+ function adapters(args) {
578
+ const subcommand = args._[1];
579
+ if (subcommand !== "install") {
580
+ throw new Error("Unknown adapters command. Run: node bin/ai-collab.js adapters install --target <dir>");
581
+ }
582
+ if (!args.target) {
583
+ throw new Error("Missing --target. Run: node bin/ai-collab.js adapters install --target <dir>");
584
+ }
585
+
586
+ const result = installAdapters(args.target, {
587
+ force: Boolean(args.force),
588
+ dryRun: Boolean(args["dry-run"]),
589
+ tool: args.tool,
590
+ enableHooks: Boolean(args.enableHooks)
591
+ });
592
+
593
+ // --tool auto matched no tool: report and stop, in both dry-run and real runs.
594
+ if (result.autoFoundNothing) {
595
+ emit(args, `${result.dryRun ? "Dry run. " : ""}${autoFoundNothingMessage(result.targetRoot)}
596
+ `, {
597
+ command: "adapters install",
598
+ dryRun: result.dryRun,
599
+ written: false,
600
+ targetRoot: result.targetRoot,
601
+ toolMode: result.toolMode,
602
+ detected: result.detected,
603
+ autoFoundNothing: true,
604
+ filesPlanned: 0,
605
+ plan: [],
606
+ hooks: result.hooks
607
+ });
608
+ return;
609
+ }
610
+
611
+ if (result.dryRun) {
612
+ emit(args, `Dry run. No adapter files written.
613
+
614
+ Target: ${result.targetRoot}
615
+ Tool selection: ${result.toolMode}${result.detected ? ` (detected: ${result.detected.join(", ") || "none"})` : ""}
616
+ Files planned: ${result.files}
617
+ ${formatAdapterPlan(result.plan)}
618
+ ${formatHookSection(result.hooks)}
619
+ `, {
620
+ command: "adapters install",
621
+ dryRun: true,
622
+ written: false,
623
+ targetRoot: result.targetRoot,
624
+ toolMode: result.toolMode,
625
+ detected: result.detected,
626
+ filesPlanned: result.files,
627
+ plan: result.plan,
628
+ hooks: result.hooks
629
+ });
630
+ return;
631
+ }
632
+
633
+ emit(args, `Adapter guidance installed.
634
+
635
+ Target: ${result.targetRoot}
636
+ Tool selection: ${result.toolMode}${result.detected ? ` (detected: ${result.detected.join(", ") || "none"})` : ""}
637
+ Files written: ${result.files}
638
+ ${formatAdapterPlan(result.plan)}
639
+ ${formatHookSection(result.hooks)}
640
+ Backups: ${result.backups.length}
641
+ `, {
642
+ command: "adapters install",
643
+ dryRun: false,
644
+ written: true,
645
+ targetRoot: result.targetRoot,
646
+ toolMode: result.toolMode,
647
+ detected: result.detected,
648
+ filesWritten: result.files,
649
+ plan: result.plan,
650
+ backups: result.backups,
651
+ hooks: result.hooks
652
+ });
653
+ }
654
+
655
+ // --- Run-layer commands (P1) ----------------------------------------------
656
+ //
657
+ // These five command groups (task / evidence / run / receipt / status) drive the
658
+ // JSONL ledgers under <workspace>/state/. They all resolve the same state dir,
659
+ // reuse the shared ledger.js read/append helpers (so the on-disk shape can never
660
+ // drift from what validate.js reads), and emit via the existing text/--json
661
+ // helper. learning-ledger has a generated skeleton + validation but no write
662
+ // command yet (its writer is P4).
663
+
664
+ // Find an EXISTING generated workspace for the run-layer state commands. Unlike
665
+ // resolveWorkspace (which falls back to the input path so `check` can validate an
666
+ // arbitrary directory and report its contract errors), this returns null when no
667
+ // real workspace is present. The marker is WORKSPACE_MANIFEST.json — the machine
668
+ // file workspace.js writes into every generated .aict and ONLY there. We must NOT
669
+ // key off START_HERE.md: that doc also ships at the PROJECT ROOT (the repo's own
670
+ // "start here" guide), so a bare-START_HERE.md probe matches the root and silently
671
+ // resolves stateDir to <root>/state instead of the real <root>/.aict/state —
672
+ // reading an empty/stray ledger. WORKSPACE_MANIFEST.json never lands at the root,
673
+ // so it pins detection to the actual workspace.
674
+ function findWorkspace(workspaceArg) {
675
+ const input = path.resolve(workspaceArg);
676
+ if (existsSync(path.join(input, "WORKSPACE_MANIFEST.json"))) return input;
677
+ if (existsSync(path.join(input, ".aict", "WORKSPACE_MANIFEST.json"))) return path.join(input, ".aict");
678
+ return null;
679
+ }
680
+
681
+ // Resolve the state dir the run-layer ledgers live in (always <workspace>/state).
682
+ // This REQUIRES a real workspace (one created by init): when none is found we refuse
683
+ // with actionable init guidance and write nothing. The old behavior silently
684
+ // defaulted to <cwd>/state, scaffolding a stray ./state ledger dir into whatever
685
+ // directory the command ran in — including a user's own git repo, where a routine
686
+ // `git add .` would then commit their private task data. Refusing is the safe default.
687
+ function resolveStateDir(workspaceArg) {
688
+ const root = findWorkspace(workspaceArg ?? ".");
689
+ if (!root) {
690
+ const where = workspaceArg !== undefined
691
+ ? `"${path.resolve(workspaceArg)}"`
692
+ : tr("error.noWorkspace.currentDir");
693
+ throw new Error(tr("error.noWorkspace", { where }));
694
+ }
695
+ return path.join(root, "state");
696
+ }
697
+
698
+ // Generate a real (non-synthetic) timestamp at runtime. The committed templates
699
+ // use a fixed synthetic value instead (see workspace.js) so the contract diff
700
+ // stays deterministic; only live command runs stamp real time.
701
+ function now() {
702
+ return new Date().toISOString();
703
+ }
704
+
705
+ function requireOption(args, name) {
706
+ const value = args[name];
707
+ if (value === undefined || value === "") {
708
+ throw new Error(tr("error.missingOption", { name }));
709
+ }
710
+ return value;
711
+ }
712
+
713
+ // B6a-1: a CONSERVATIVE, NARROW detector of known-destructive command shapes for
714
+ // `run exec`. The goal is a high-signal guard that very rarely false-positives on
715
+ // ordinary commands: each pattern is anchored to a recognizably dangerous form
716
+ // (whole-word / boundary matched), not a loose substring. Returns the list of human
717
+ // labels for every pattern the command matches (empty list = not flagged, so the
718
+ // default behavior is completely unchanged). Detection only — the caller decides
719
+ // whether to prompt, refuse, or run. Conservative by design: a destructive command
720
+ // written in an exotic way may slip through (we accept misses to avoid false alarms).
721
+ //
722
+ // TWO false-positive guards (B6a-1 hardening) applied BEFORE the patterns run, so a
723
+ // dangerous WORD that is not actually a command-to-run does not trip the guard:
724
+ // 1. QUOTED LITERALS are neutralized. `echo 'rm -rf is dangerous'` mentions rm -rf
725
+ // inside a string — it deletes nothing. stripQuotedLiterals blanks single- and
726
+ // double-quoted spans so a danger token quoted as data is not seen.
727
+ // 2. COMMAND POSITION is required for command-name dangers. `grep sudo file.txt` has
728
+ // `sudo` as a SEARCH ARGUMENT, not the command being run. A command-name danger
729
+ // (rm / sudo / dd / mkfs / chmod / curl|wget-pipe) only counts when the word sits
730
+ // at a COMMAND position: the start of the string or right after a shell separator
731
+ // (`;`, `|`, `&`, newline, or `(`) — never merely after another word. Redirect and
732
+ // fork-bomb shapes are operator-based, so they are inherently positional already.
733
+ // These narrow the guard further; truly destructive commands (a real `rm -rf /`,
734
+ // `curl ... | bash`, `> /dev/sda`) still trip it.
735
+
736
+ // Blank out single- and double-quoted spans, preserving length (quoted content -> spaces)
737
+ // so a danger token that appears only as quoted DATA is not matched, while separators and
738
+ // command words OUTSIDE quotes keep their positions. Unbalanced quotes: we blank to end of
739
+ // string (a dangling quote is treated as opening a literal, the safe-for-FP direction).
740
+ function stripQuotedLiterals(command) {
741
+ let out = "";
742
+ let quote = null; // current open quote char, or null
743
+ for (let i = 0; i < command.length; i += 1) {
744
+ const ch = command[i];
745
+ if (quote) {
746
+ if (ch === quote) { quote = null; out += " "; } // closing quote -> space
747
+ else out += ch === "\n" ? "\n" : " "; // quoted content -> space (keep \n)
748
+ } else if (ch === "'" || ch === '"') {
749
+ quote = ch; out += " "; // opening quote -> space
750
+ } else {
751
+ out += ch;
752
+ }
753
+ }
754
+ return out;
755
+ }
756
+
757
+ // A command-name danger must sit at a COMMAND position: start-of-string, or just after a
758
+ // shell command separator (`;` `|` `&` newline `(`), with optional whitespace. Crucially
759
+ // this does NOT include "after a bare space following a word" — so `grep sudo` (sudo as an
760
+ // argument) is NOT a match, while `sudo ...`, `foo; sudo ...`, `a && sudo`, `a | sudo` are.
761
+ // `body` is the regex source for what follows the command word (flags/args/etc.).
762
+ const CMD_POS = "(?:^|[\\n;|&(])\\s*";
763
+ function atCommandPosition(word, bodyAfter = "") {
764
+ return new RegExp(`${CMD_POS}${word}${bodyAfter}`);
765
+ }
766
+
767
+ const DANGEROUS_COMMAND_PATTERNS = [
768
+ // rm -rf / rm -fr (recursive + force). `rm` must be the COMMAND (command position),
769
+ // then the combined flag in either order, bundled (`-rf`/`-fr`) or split, or long forms.
770
+ { label: "rm -rf (recursive force delete)", test: (c) =>
771
+ atCommandPosition("rm\\b", "[^\\n|;&]*(?:-[a-z]*r[a-z]*f|-[a-z]*f[a-z]*r)\\b").test(c) ||
772
+ (atCommandPosition("rm\\b").test(c) && /(?:^|\s)-(?:-recursive|r)\b/.test(c) && /(?:^|\s)-(?:-force|f)\b/.test(c)) },
773
+ // sudo (privilege escalation) — only as the command word, not as an argument to another.
774
+ { label: "sudo (privilege escalation)", test: (c) => atCommandPosition("sudo\\s").test(c) },
775
+ // classic bash fork bomb :(){ ... } — an operator shape, inherently positional.
776
+ { label: "fork bomb", test: (c) => /:\(\)\s*\{/.test(c) },
777
+ // piping a network download straight into a shell: curl|sh, wget|bash, etc. curl/wget
778
+ // must be the command of its segment; the pipe-into-a-shell is the destructive part.
779
+ { label: "curl/wget piped into a shell", test: (c) =>
780
+ atCommandPosition("(?:curl|wget)\\b", "[^\\n]*\\|\\s*(?:sudo\\s+)?(?:sh|bash|zsh|dash|ksh|fish)\\b").test(c) },
781
+ // raw disk writer — `dd` as the command word.
782
+ { label: "dd (raw disk write)", test: (c) => atCommandPosition("dd\\s").test(c) },
783
+ // filesystem maker (mkfs, mkfs.ext4, …) — as the command word.
784
+ { label: "mkfs (format filesystem)", test: (c) => atCommandPosition("mkfs(?:\\.\\w+)?\\b").test(c) },
785
+ // redirecting into a device node (> /dev/sda, >/dev/null is excluded as harmless). The
786
+ // `>` operator is inherently a command-position construct (and quotes were stripped).
787
+ { label: "redirect into a /dev/ device", test: (c) =>
788
+ />\s*\/dev\/(?!null\b|zero\b|stdout\b|stderr\b|tty\b|stdin\b|fd\b|random\b|urandom\b)\S+/.test(c) },
789
+ // world-writable recursive chmod — `chmod` as the command word.
790
+ { label: "chmod -R 777 (world-writable recursive)", test: (c) =>
791
+ atCommandPosition("chmod\\b", "[^\\n|;&]*-[a-z]*R[a-z]*\\s+[0-7]*7{3}\\b").test(c) ||
792
+ atCommandPosition("chmod\\b", "[^\\n|;&]*\\s7{3}\\b[^\\n|;&]*-[a-z]*R").test(c) },
793
+ // redirecting a write into a SYSTEM path (clobbers /etc, /usr, /bin, …). Append (>>)
794
+ // is included; reads (<) are not. We exclude the device-node case (handled above).
795
+ { label: "redirect (>) into a system path", test: (c) =>
796
+ /(?<!\d)>>?\s*\/(?:etc|usr|bin|sbin|boot|lib|lib64|sys|proc|var\/(?:lib|spool|log)|System|Library)\b/.test(c) }
797
+ ];
798
+
799
+ // Return the labels of every dangerous pattern the command matches (empty = safe to
800
+ // run with no extra ceremony). The command is first run through stripQuotedLiterals so a
801
+ // danger token quoted as DATA (e.g. echo 'rm -rf ...') is not seen; the command-position
802
+ // anchoring (above) handles a danger token used as an ARGUMENT (e.g. grep sudo file).
803
+ // Pure; exported-shape kept simple for testing via the CLI.
804
+ function detectDangerousCommand(command) {
805
+ if (typeof command !== "string" || command.length === 0) return [];
806
+ const scanned = stripQuotedLiterals(command);
807
+ const hits = [];
808
+ for (const { label, test } of DANGEROUS_COMMAND_PATTERNS) {
809
+ let matched = false;
810
+ try {
811
+ matched = test(scanned);
812
+ } catch {
813
+ matched = false; // a pattern must never crash the guard
814
+ }
815
+ if (matched) hits.push(label);
816
+ }
817
+ return hits;
818
+ }
819
+
820
+ // Ask the user (on an interactive TTY) whether to run a flagged command. Resolves
821
+ // true only on an explicit y / yes (case-insensitive); ANY other input — including a
822
+ // bare Enter — resolves false (default-deny). Uses node:readline against stdin/stdout.
823
+ async function confirmDangerous(command, matchedLabels) {
824
+ const { createInterface } = await import("node:readline");
825
+ process.stdout.write(`${tr("danger.header")}\n`);
826
+ process.stdout.write(`${tr("danger.matched", { patterns: matchedLabels.join(", ") })}\n`);
827
+ process.stdout.write(`${tr("danger.command", { command })}\n`);
828
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
829
+ try {
830
+ const answer = await new Promise((resolve) => {
831
+ rl.question(tr("danger.prompt"), (reply) => resolve(reply));
832
+ });
833
+ return /^\s*y(es)?\s*$/i.test(answer);
834
+ } finally {
835
+ rl.close();
836
+ }
837
+ }
838
+
839
+ function taskCommand(args) {
840
+ const action = args._[1];
841
+ if (action === "create") return taskCreate(args);
842
+ if (action === "update") return taskUpdate(args);
843
+ throw new Error("Unknown task command. Run: node bin/ai-collab.js task create --title \"...\" | node bin/ai-collab.js task update --task <id> --status <status>");
844
+ }
845
+
846
+ function taskCreate(args) {
847
+ const title = requireOption(args, "title");
848
+ const stateDir = resolveStateDir(args.workspace);
849
+ // B6a-2: allocate the id + append UNDER the ledger lock so two concurrent
850
+ // `task create`s can never mint the same id (the read->nextId->append happens
851
+ // atomically; the ledger is re-read inside the lock).
852
+ const record = appendWithNextId(stateDir, "tasks", "t", (id) => ({
853
+ id, title, status: "open", createdAt: now()
854
+ }));
855
+ emit(args, `Task created.
856
+ id: ${record.id}
857
+ title: ${record.title}
858
+ status: ${record.status}
859
+ Ledger: ${path.join(stateDir, "tasks.jsonl")}
860
+ `, { command: "task create", ok: true, task: record, stateDir });
861
+ }
862
+
863
+ // task update: change a task's status. Mirrors run finish's read-all -> patch the
864
+ // matching row -> rewrite pattern so the ledger stays deterministic and ordered.
865
+ // The write-time checks use the SAME shared rules (ledger.js) the validator uses,
866
+ // so the CLI can never write a state the validator would reject: the status must
867
+ // be a legal task status, and marking a task "done" requires at least one piece
868
+ // of evidence (otherwise it is the thin "done" the system exists to catch).
869
+ function taskUpdate(args) {
870
+ const taskId = requireOption(args, "task");
871
+ const status = requireOption(args, "status");
872
+ if (!TASK_STATUSES.includes(status)) {
873
+ throw new Error(tr("error.taskStatusInvalid", { allowed: TASK_STATUSES.join(", "), got: status }));
874
+ }
875
+ const stateDir = resolveStateDir(args.workspace);
876
+
877
+ const tasks = readLedger(stateDir, "tasks");
878
+ const target = tasks.find((task) => task.id === taskId);
879
+ if (!target) {
880
+ throw new Error(tr("error.taskNotFound.update", { id: taskId }));
881
+ }
882
+
883
+ // A task may move to "done" only with evidence pointing at it. Same rule as
884
+ // validator check 5, applied here so a thin "done" is refused at write time.
885
+ let doneWarning = null;
886
+ if (doneRequiresEvidence(status)) {
887
+ const evidence = readLedger(stateDir, "evidence");
888
+ const hasEvidence = evidence.some((item) => item.taskId === taskId);
889
+ if (!hasEvidence) {
890
+ throw new Error(tr("error.doneNoEvidence", { id: taskId }));
891
+ }
892
+ const receipts = readLedger(stateDir, "receipts");
893
+ if (!taskHasAcceptedReceipt(taskId, receipts)) {
894
+ doneWarning = tr("warn.doneNoReceipt", { id: taskId });
895
+ console.warn(doneWarning);
896
+ }
897
+ }
898
+
899
+ target.status = status;
900
+ target.updatedAt = now();
901
+ writeLedger(stateDir, "tasks", tasks);
902
+ emit(args, `Task updated.
903
+ id: ${target.id}
904
+ status: ${target.status}
905
+ Ledger: ${path.join(stateDir, "tasks.jsonl")}
906
+ `, { command: "task update", ok: true, task: target, stateDir, warning: doneWarning });
907
+ }
908
+
909
+ function evidenceCommand(args) {
910
+ const action = args._[1];
911
+ if (action !== "add") {
912
+ throw new Error("Unknown evidence command. Run: node bin/ai-collab.js evidence add --task <id> --kind <k> --summary \"...\"");
913
+ }
914
+ const taskId = requireOption(args, "task");
915
+ const kind = requireOption(args, "kind");
916
+ const summary = requireOption(args, "summary");
917
+ const stateDir = resolveStateDir(args.workspace);
918
+
919
+ const tasks = readLedger(stateDir, "tasks");
920
+ if (!tasks.some((task) => task.id === taskId)) {
921
+ throw new Error(tr("error.taskNotFound.evidence", { id: taskId }));
922
+ }
923
+ const evidence = readLedger(stateDir, "evidence");
924
+ // A provisional id (re-stamped authoritatively inside the lock at append time, B6a-2)
925
+ // so the read->nextId->append is atomic against concurrent evidence adds.
926
+ const record = { id: nextId(evidence, "e"), taskId, kind, summary };
927
+ // Structured fields for the two load-bearing kinds (P2 structure gate). They
928
+ // are attached only when given so a generic kind row stays exactly as before
929
+ // (backward compatible). For kind:"rerun" the gate below requires command +
930
+ // exitCode; for kind:"cross_family_guard" at least one attribution field.
931
+ if (kind === EVIDENCE_KIND_RERUN) {
932
+ if (args.command !== undefined && args.command !== "") record.command = args.command;
933
+ if (args.exit !== undefined && args.exit !== "") {
934
+ const exitCode = Number(args.exit);
935
+ if (!Number.isInteger(exitCode)) {
936
+ throw new Error(`--exit must be an integer exit code, got "${args.exit}".`);
937
+ }
938
+ record.exitCode = exitCode;
939
+ }
940
+ // A1: the raw OUTPUT is required for a rerun (an L4 also needs a reconciled
941
+ // run; see --run below). Attached when given; the structure gate refuses a
942
+ // rerun without output.
943
+ if (args.output !== undefined && args.output !== "") record.output = args.output;
944
+ // A1 L4 reconciliation: --run links this rerun to a recorded run in runs.jsonl.
945
+ // OPTIONAL for a generic rerun row (one without it is still valid, it just
946
+ // cannot reach L4), but REQUIRED for the rerun to back an L4 pass. When given,
947
+ // it is validated below against the runs ledger (exists, same task, finished,
948
+ // executed:true, matching exitCode + command + output hash) so a rerun can never reference a run it does not
949
+ // reconcile with — the write-time twin of the validator's reconciliation check.
950
+ if (args.run !== undefined && args.run !== "") record.runId = args.run;
951
+ if (args.runner !== undefined && args.runner !== "") record.runner = args.runner;
952
+ }
953
+ if (kind === EVIDENCE_KIND_CROSS_FAMILY_GUARD) {
954
+ if (args.reviewer !== undefined && args.reviewer !== "") record.reviewer = args.reviewer;
955
+ if (args.family !== undefined && args.family !== "") record.family = args.family;
956
+ if (args.ref !== undefined && args.ref !== "") record.ref = args.ref;
957
+ }
958
+ if (args.detail !== undefined && args.detail !== "") record.detail = args.detail;
959
+ record.createdAt = now();
960
+ // Reject an empty-shell special kind at WRITE time, using the SAME shared
961
+ // predicate the validator applies, so the CLI can never emit a load-bearing
962
+ // evidence row the validator would reject (e.g. a cross_family_guard with no
963
+ // reviewer/family/ref, or a rerun with no command/exitCode). Generic kinds
964
+ // return null here and are unaffected.
965
+ const structureError = specialEvidenceStructureError(record);
966
+ if (structureError) {
967
+ throw new Error(`Cannot add evidence: ${structureError}.`);
968
+ }
969
+ // A1 L4 reconciliation (write-time): if a rerun row names a runId, that run must
970
+ // exist in runs.jsonl, belong to the same task, be finished, be recorded by run
971
+ // exec, and agree on exitCode + command + output hash — the SAME
972
+ // rerunRunReconcileError the validator applies, so
973
+ // a rerun that does not reconcile is refused at add-time rather than silently
974
+ // stored as an L4-incapable row whose runId looks valid. A rerun with NO runId is
975
+ // allowed through here (it is a valid generic rerun; it just cannot reach L4) — we
976
+ // only reconcile a runId that was actually supplied.
977
+ if (record.kind === EVIDENCE_KIND_RERUN && record.runId !== undefined) {
978
+ const runs = readLedger(stateDir, "runs");
979
+ const reconcileError = rerunRunReconcileError(record, runs);
980
+ if (reconcileError) {
981
+ throw new Error(`Cannot add evidence: ${reconcileError}.`);
982
+ }
983
+ }
984
+ // P2 soft kind-check (advisory, NOT a gate): evidence kind is free-form by
985
+ // design, so an unrecognized kind is still RECORDED — but a typo like
986
+ // "--kind reun" (meant "rerun") would otherwise become a silent generic row the
987
+ // author thinks is a rerun. Warn on stderr (so --json stdout stays clean) when
988
+ // the kind is not one the tool gives meaning to, naming that it was recorded as
989
+ // a generic row that does NOT raise the guard level. The row is appended either
990
+ // way; this never errors or blocks. (Empty-shell special kinds were already
991
+ // rejected above, so a known load-bearing kind that reaches here is well-formed.)
992
+ if (!isRecognizedEvidenceKind(record.kind)) {
993
+ console.warn(
994
+ `Warning: '${record.kind}' is not a recognized kind; recorded as a generic evidence row that won't raise the guard level.`
995
+ );
996
+ }
997
+ // B6a-2: re-stamp the authoritative id and append UNDER the lock so a concurrent
998
+ // evidence add cannot mint the same id.
999
+ appendWithNextId(stateDir, "evidence", "e", (id) => { record.id = id; return record; });
1000
+ emit(args, `Evidence added.
1001
+ id: ${record.id}
1002
+ task: ${record.taskId}
1003
+ kind: ${record.kind}
1004
+ summary: ${record.summary}
1005
+ Ledger: ${path.join(stateDir, "evidence.jsonl")}
1006
+ `, { command: "evidence add", ok: true, evidence: record, stateDir });
1007
+ }
1008
+
1009
+ // The minimal environment `run exec --clean-env` spawns a command with. By default
1010
+ // run exec inherits the caller's FULL process.env (which can hold API keys, tokens,
1011
+ // cloud credentials), so a command an AI suggested could read those secrets. With
1012
+ // --clean-env we hand the child only the small set a normal local command needs to
1013
+ // resolve binaries and a home dir — PATH and HOME above all — and DROP everything
1014
+ // else, so no inherited secret reaches the subprocess. We pass through only NAMED,
1015
+ // non-secret operational vars (never a wildcard), each carried only when actually
1016
+ // set in the parent:
1017
+ // PATH - find the command + its tools (without it almost nothing runs)
1018
+ // HOME - the user's home dir (tools resolve config/cache relative to it)
1019
+ // SHELL, LANG, LC_ALL, TERM, TMPDIR, TZ - locale/term/temp basics for normal CLIs
1020
+ // PATHEXT, SystemRoot, ComSpec, USERPROFILE - the Windows equivalents of PATH/HOME
1021
+ // Intentionally EXCLUDED: anything not on this list — that is where keys/tokens live
1022
+ // (e.g. *_API_KEY, *_TOKEN, AWS_*, OPENAI_API_KEY, GITHUB_TOKEN). The result is a
1023
+ // brand-new object, so a parent variable that is NOT named here simply does not exist
1024
+ // for the child.
1025
+ const CLEAN_ENV_PASSTHROUGH = [
1026
+ "PATH",
1027
+ "HOME",
1028
+ "SHELL",
1029
+ "LANG",
1030
+ "LC_ALL",
1031
+ "TERM",
1032
+ "TMPDIR",
1033
+ "TZ",
1034
+ // Windows equivalents, carried so --clean-env works cross-platform.
1035
+ "PATHEXT",
1036
+ "SystemRoot",
1037
+ "ComSpec",
1038
+ "USERPROFILE"
1039
+ ];
1040
+
1041
+ function buildCleanEnv(sourceEnv = process.env) {
1042
+ const clean = {};
1043
+ for (const key of CLEAN_ENV_PASSTHROUGH) {
1044
+ if (typeof sourceEnv[key] === "string") clean[key] = sourceEnv[key];
1045
+ }
1046
+ return clean;
1047
+ }
1048
+
1049
+ async function runCommand(args) {
1050
+ const action = args._[1];
1051
+ const stateDir = resolveStateDir(args.workspace);
1052
+
1053
+ if (action === "start") {
1054
+ const taskId = requireOption(args, "task");
1055
+ const tasks = readLedger(stateDir, "tasks");
1056
+ if (!tasks.some((task) => task.id === taskId)) {
1057
+ throw new Error(tr("error.taskNotFound.runStart", { id: taskId }));
1058
+ }
1059
+ const runs = readLedger(stateDir, "runs");
1060
+ // Provisional id; re-stamped authoritatively inside the lock at append (B6a-2).
1061
+ const record = { id: nextId(runs, "r"), taskId, startedAt: now(), status: "running" };
1062
+ if (args.command !== undefined && args.command !== "") record.command = args.command;
1063
+ // B6a-2: allocate id + append UNDER the lock (atomic vs concurrent run starts/execs).
1064
+ appendWithNextId(stateDir, "runs", "r", (id) => { record.id = id; return record; });
1065
+ emit(args, `Run started.
1066
+ id: ${record.id}
1067
+ task: ${record.taskId}
1068
+ status: ${record.status}${record.command ? `\n command: ${record.command}` : ""}
1069
+ Ledger: ${path.join(stateDir, "runs.jsonl")}
1070
+ `, { command: "run start", ok: true, run: record, stateDir });
1071
+ return;
1072
+ }
1073
+
1074
+ if (action === "finish") {
1075
+ const taskId = requireOption(args, "task");
1076
+ const exitRaw = requireOption(args, "exit");
1077
+ const exitCode = Number(exitRaw);
1078
+ if (!Number.isInteger(exitCode)) {
1079
+ throw new Error(`--exit must be an integer exit code, got "${exitRaw}".`);
1080
+ }
1081
+ // Finish the most recent still-running run for this task. Read-all -> patch
1082
+ // the matching line -> rewrite keeps the ledger deterministic and ordered.
1083
+ // B6a-2: the whole read-all -> patch -> rewrite runs UNDER the ledger lock so a
1084
+ // concurrent run start/exec append cannot be clobbered by the rewrite (the read
1085
+ // and the write are serialized against other writers). The ledger is re-read
1086
+ // INSIDE the lock, so we patch the freshest rows.
1087
+ let target = null;
1088
+ rewriteLedgerUnderLock(stateDir, "runs", (runs) => {
1089
+ for (let i = runs.length - 1; i >= 0; i -= 1) {
1090
+ if (runs[i].taskId === taskId && runs[i].status === "running") {
1091
+ target = runs[i];
1092
+ break;
1093
+ }
1094
+ }
1095
+ if (!target) {
1096
+ throw new Error(`No running run found for task ${taskId}. Start one first: node bin/ai-collab.js run start --task ${taskId}.`);
1097
+ }
1098
+ target.finishedAt = now();
1099
+ target.exitCode = exitCode;
1100
+ target.status = "finished";
1101
+ return runs;
1102
+ });
1103
+ emit(args, `Run finished.
1104
+ id: ${target.id}
1105
+ task: ${target.taskId}
1106
+ exitCode: ${target.exitCode}
1107
+ status: ${target.status}
1108
+ Ledger: ${path.join(stateDir, "runs.jsonl")}
1109
+ `, { command: "run finish", ok: true, run: target, stateDir });
1110
+ return;
1111
+ }
1112
+
1113
+ if (action === "exec") {
1114
+ // run exec ACTUALLY runs the command locally and records the REAL exit code —
1115
+ // unlike run start/finish, which RECORD a command + an exit code you report. This
1116
+ // is the honest, higher-authenticity path: the recorded run reflects a process
1117
+ // that genuinely ran on this machine (marked executed:true), so a rerun reconciled
1118
+ // to it is anchored to a real local execution rather than a typed number. Local
1119
+ // only — it spawns YOUR command in YOUR shell; it never goes online.
1120
+ const taskId = requireOption(args, "task");
1121
+ const command = requireOption(args, "command");
1122
+ const tasks = readLedger(stateDir, "tasks");
1123
+ if (!tasks.some((task) => task.id === taskId)) {
1124
+ throw new Error(tr("error.taskNotFound.runExec", { id: taskId }));
1125
+ }
1126
+ // B6a-1: conservative dangerous-command guard. Only a NARROW set of known
1127
+ // destructive shapes (rm -rf, sudo, fork bomb, curl|sh, dd, mkfs, > /dev/,
1128
+ // chmod -R 777, redirect into a system path) trips this; anything else runs
1129
+ // exactly as before (no behavior change). --yes/--force is an explicit opt-out
1130
+ // (run it anyway, skip confirmation). On a real TTY we ask y/N (default N). With
1131
+ // NO interactive terminal (a script / an AI calling the CLI) we REFUSE rather than
1132
+ // silently run a destructive command — the caller must pass --yes to proceed.
1133
+ // A refusal executes nothing and records nothing. `dangerousConfirmed` is stamped
1134
+ // on the run only when the guard actually fired and the user/flag confirmed it, so
1135
+ // the ledger shows a flagged command was knowingly approved.
1136
+ const dangerLabels = detectDangerousCommand(command);
1137
+ const preApproved = args.yes === true || args.force === true;
1138
+ let dangerousConfirmed = false;
1139
+ if (dangerLabels.length > 0 && !preApproved) {
1140
+ if (process.stdin.isTTY) {
1141
+ const ok = await confirmDangerous(command, dangerLabels);
1142
+ if (!ok) {
1143
+ // Default-deny: print the refusal to stderr (so --json stdout stays clean)
1144
+ // and exit non-zero. Nothing ran, nothing was recorded.
1145
+ console.error(tr("danger.declined"));
1146
+ process.exitCode = 1;
1147
+ return;
1148
+ }
1149
+ dangerousConfirmed = true;
1150
+ } else {
1151
+ // Non-interactive: refuse and tell the caller how to override. Exit non-zero;
1152
+ // nothing ran, nothing recorded.
1153
+ console.error(tr("danger.refusedNonTty", { patterns: dangerLabels.join(", ") }));
1154
+ process.exitCode = 1;
1155
+ return;
1156
+ }
1157
+ } else if (dangerLabels.length > 0 && preApproved) {
1158
+ // Flagged but explicitly pre-approved via --yes/--force: run it, and record that
1159
+ // it was a knowingly-confirmed dangerous command.
1160
+ dangerousConfirmed = true;
1161
+ }
1162
+ // B3-1: run the command in the WORKSPACE'S project root by default, not in the
1163
+ // caller's process.cwd(). stateDir is <root>/.aict/state, so the project root is
1164
+ // dirname(dirname(stateDir)) — the directory that holds the .aict the run is
1165
+ // recorded under. Running in process.cwd() let a command execute in an unrelated
1166
+ // directory (whatever you happened to be in) while being filed as evidence for
1167
+ // THIS workspace — silent evidence pollution (the rerun anchors to a run that did
1168
+ // not run where the task lives). --cwd <dir> overrides it explicitly. The resolved
1169
+ // cwd must exist and be a directory, and is RECORDED on the run + PRINTED, so the
1170
+ // evidence says exactly where it ran.
1171
+ const workspaceRoot = path.dirname(path.dirname(stateDir));
1172
+ const cwd = path.resolve(args.cwd ?? workspaceRoot);
1173
+ if (!existsSync(cwd) || !statSync(cwd).isDirectory()) {
1174
+ throw new Error(`--cwd "${cwd}" is not an existing directory. Point run exec at a real working directory (defaults to the workspace root). Nothing recorded.`);
1175
+ }
1176
+ // --clean-env (opt-in): run with a MINIMAL env (PATH/HOME + a small named set)
1177
+ // instead of inheriting the caller's full process.env, so a command an AI
1178
+ // suggested cannot read inherited API keys/tokens. DEFAULT is unchanged: the
1179
+ // full process.env is inherited (backward compatible). Whether the run was
1180
+ // sandboxed this way is recorded on the run + printed, so the evidence says how
1181
+ // it ran.
1182
+ const cleanEnv = args.cleanEnv === true;
1183
+ const childEnv = cleanEnv ? buildCleanEnv(process.env) : process.env;
1184
+ const startedAt = now();
1185
+ const result = spawnSync(command, {
1186
+ shell: true,
1187
+ cwd,
1188
+ encoding: "utf8",
1189
+ timeout: 10 * 60 * 1000, // 10 min cap so a hung command cannot wedge the CLI
1190
+ maxBuffer: 10 * 1024 * 1024,
1191
+ env: childEnv
1192
+ });
1193
+ const finishedAt = now();
1194
+ // Only a command that ran to completion has a numeric exit status. (Under shell:true
1195
+ // a missing command is the SHELL exiting 127 — a real, recorded exit.) status is null
1196
+ // only when the SHELL itself could not run it: failed to spawn, timed out, was killed
1197
+ // by a signal, or its output exceeded the buffer cap. In those cases record NOTHING and
1198
+ // surface a clear reason, so the ledger never gets a half-real run.
1199
+ if (result.status === null) {
1200
+ const why = result.error && result.error.code === "ETIMEDOUT"
1201
+ ? "timed out after 10 minutes"
1202
+ : result.error
1203
+ ? result.error.message
1204
+ : "did not return an exit code (killed, or output exceeded the 10 MB capture cap)";
1205
+ throw new Error(`Command did not complete (${why}): ${command}. Nothing recorded.`);
1206
+ }
1207
+ const exitCode = result.status;
1208
+ const stdout = result.stdout ?? "";
1209
+ const stderr = result.stderr ?? "";
1210
+ const rawOutput = `${stdout}${stderr}`.trim();
1211
+ const output = rawOutput.length > 4000 ? `${rawOutput.slice(0, 4000)}\n…[truncated]` : rawOutput;
1212
+ const runs = readLedger(stateDir, "runs");
1213
+ const record = {
1214
+ // Provisional id; re-stamped authoritatively inside the lock at append (B6a-2).
1215
+ id: nextId(runs, "r"),
1216
+ taskId,
1217
+ command,
1218
+ cwd,
1219
+ startedAt,
1220
+ finishedAt,
1221
+ exitCode,
1222
+ status: "finished",
1223
+ executed: true,
1224
+ // Records whether this run was sandboxed with --clean-env (minimal env, no
1225
+ // inherited secrets). Always present so the ledger says how every executed run
1226
+ // got its environment — false is the default full-env inherit.
1227
+ cleanEnv,
1228
+ // B6a-1: stamped true ONLY when the dangerous-command guard fired and the run
1229
+ // was knowingly approved (a y on the TTY prompt, or an explicit --yes/--force).
1230
+ // Omitted entirely on an ordinary run, so non-flagged runs are unchanged.
1231
+ ...(dangerousConfirmed ? { dangerousConfirmed: true } : {}),
1232
+ outputSha256: outputSha256(rawOutput),
1233
+ outputBytes: outputByteLength(rawOutput),
1234
+ stdoutBytes: outputByteLength(stdout.trim()),
1235
+ stderrBytes: outputByteLength(stderr.trim())
1236
+ };
1237
+ // B6a-2: re-stamp id + append UNDER the lock (atomic vs concurrent run execs/starts).
1238
+ appendWithNextId(stateDir, "runs", "r", (id) => { record.id = id; return record; });
1239
+ emit(args, `Run executed (real local run; the tool ran this command and captured the real exit code).
1240
+ id: ${record.id}
1241
+ task: ${record.taskId}
1242
+ command: ${record.command}
1243
+ cwd: ${record.cwd}
1244
+ exitCode: ${record.exitCode}
1245
+ status: ${record.status}
1246
+ executed: true
1247
+ cleanEnv: ${record.cleanEnv}${cleanEnv ? " (minimal env: PATH/HOME + basics only; inherited secrets withheld)" : " (inherited the full environment)"}${dangerousConfirmed ? "\n dangerousConfirmed: true (matched a known destructive pattern; run was explicitly approved)" : ""}
1248
+ Ledger: ${path.join(stateDir, "runs.jsonl")}
1249
+ `, { command: "run exec", ok: true, run: record, output, stateDir });
1250
+ return;
1251
+ }
1252
+
1253
+ throw new Error("Unknown run command. Run: node bin/ai-collab.js run start --task <id> | node bin/ai-collab.js run finish --task <id> --exit <code> | node bin/ai-collab.js run exec --task <id> --command \"...\"");
1254
+ }
1255
+
1256
+ function receiptCommand(args) {
1257
+ const action = args._[1];
1258
+ if (action === "create") return receiptCreate(args);
1259
+ if (action === "accept") return receiptAccept(args);
1260
+ throw new Error("Unknown receipt command. Run: node bin/ai-collab.js receipt create --task <id> --verdict <v> --guard-level <L0-L4> [--evidence <id,id>] [--rerun <id,id>] | node bin/ai-collab.js receipt accept --id <id> --owner");
1261
+ }
1262
+
1263
+ // Parse a comma-separated id list flag into a clean array (trimmed, no blanks).
1264
+ function parseIdList(value) {
1265
+ return value ? value.split(",").map((id) => id.trim()).filter((id) => id.length > 0) : [];
1266
+ }
1267
+
1268
+ function receiptCreate(args) {
1269
+ const taskId = requireOption(args, "task");
1270
+ const verdict = requireOption(args, "verdict");
1271
+ if (!RECEIPT_VERDICTS.includes(verdict)) {
1272
+ throw new Error(`--verdict must be one of: ${RECEIPT_VERDICTS.join(", ")} (got "${verdict}").`);
1273
+ }
1274
+ // A1: the author records the REVIEW METHOD (--review-mode) and MAY state a
1275
+ // claimed level (--claimed-level / legacy --guard-level), but neither becomes
1276
+ // the stored guard level directly. The CLI RECOMPUTES the real level from the
1277
+ // method + the evidence (see below). --review-mode is optional (inferred from
1278
+ // evidence when omitted, for backward compatibility); a claimed level, when
1279
+ // given, is only used to WARN if it overstated the truth.
1280
+ const reviewMode = args.reviewMode;
1281
+ if (reviewMode !== undefined && !REVIEW_MODES.includes(reviewMode)) {
1282
+ throw new Error(`--review-mode must be one of: ${REVIEW_MODES.join(", ")} (got "${reviewMode}").`);
1283
+ }
1284
+ const claimedLevel = args.claimedLevel; // legacy --guard-level maps here too.
1285
+ if (claimedLevel !== undefined && !GUARD_LEVELS.includes(claimedLevel)) {
1286
+ throw new Error(`--claimed-level (a.k.a. --guard-level) must be one of: ${GUARD_LEVELS.join(", ")} (got "${claimedLevel}").`);
1287
+ }
1288
+ const stateDir = resolveStateDir(args.workspace);
1289
+
1290
+ const tasks = readLedger(stateDir, "tasks");
1291
+ if (!tasks.some((task) => task.id === taskId)) {
1292
+ throw new Error(tr("error.taskNotFound.receipt", { id: taskId }));
1293
+ }
1294
+ const evidenceIds = parseIdList(args.evidence);
1295
+ const rerunEvidenceIds = parseIdList(args.rerun);
1296
+ const evidence = readLedger(stateDir, "evidence");
1297
+ const known = new Set(evidence.map((item) => item.id));
1298
+ // Every cited id (plain evidence + rerun evidence) must exist and belong to
1299
+ // this task. Reusing the SAME ownedEvidenceIds filter the validator applies
1300
+ // keeps write-time and read-time on one definition; rerun ids are validated on
1301
+ // the same footing so an L4 claim cannot borrow another task's run.
1302
+ for (const [label, ids] of [["evidence", evidenceIds], ["rerun evidence", rerunEvidenceIds]]) {
1303
+ if (ids.length === 0) continue;
1304
+ const missing = ids.filter((id) => !known.has(id));
1305
+ if (missing.length > 0) {
1306
+ throw new Error(`Receipt cites unknown ${label} id(s): ${missing.join(", ")}.`);
1307
+ }
1308
+ const owned = new Set(ownedEvidenceIds(ids, taskId, evidence));
1309
+ const foreign = ids.filter((id) => !owned.has(id));
1310
+ if (foreign.length > 0) {
1311
+ throw new Error(`Receipt for task ${taskId} cites ${label} from another task: ${foreign.join(", ")}. A receipt may only cite evidence belonging to its own task.`);
1312
+ }
1313
+ }
1314
+
1315
+ // A1 CORE: COMPUTE the real guard level from the review method + the evidence.
1316
+ // The level is NOT taken from --claimed-level; it is derived so an AI cannot
1317
+ // self-assert a high level. The evidence-strength flags are computed from rows
1318
+ // that actually BELONG to this task and carry the load-bearing kind (and, for
1319
+ // rerun, the required OUTPUT), so identity claims never raise the level — only
1320
+ // real evidence does. See computeGuardLevel for the min(method-ceiling, evidence)
1321
+ // rule that is the anti-silent-green mechanism. A1 L4 reconciliation: ownedRerun
1322
+ // is now gated on the runs ledger too — a cited rerun only counts toward L4 if it
1323
+ // references a recorded run exec that reconciles (same task, finished,
1324
+ // executed:true, exitCode + command + output hash agree), so a self-authored rerun with a fabricated output cannot reach
1325
+ // L4 here.
1326
+ const runs = readLedger(stateDir, "runs");
1327
+ const ownedRerun = ownedRerunEvidenceIds(rerunEvidenceIds, taskId, evidence, runs);
1328
+ const ownedCrossFamily = ownedCrossFamilyGuardEvidenceIds(evidenceIds, taskId, evidence);
1329
+ const computed = computeGuardLevel({
1330
+ reviewMode,
1331
+ hasCrossFamilyGuardEvidence: ownedCrossFamily.length > 0,
1332
+ hasRerunOutputEvidence: ownedRerun.length > 0,
1333
+ hasAuthorRunEvidence: hasOwnedRunEvidence(evidenceIds, taskId, evidence),
1334
+ hasAnyEvidence: ownedEvidenceIds(evidenceIds, taskId, evidence).length > 0
1335
+ });
1336
+ const guardLevel = computed.level; // the COMPUTED level, never the claimed one.
1337
+
1338
+ // If the author claimed a HIGHER level than the evidence + method support, do
1339
+ // not silently accept it: keep the computed (lower) level and warn. A claim that
1340
+ // matches or undersells the computed level is fine (we never bump it up to a
1341
+ // claim, only down to the evidence). This is "AI cannot self-declare the level".
1342
+ let claimDowngradeNote = null;
1343
+ if (claimedLevel !== undefined && guardLevelRank(claimedLevel) > guardLevelRank(guardLevel)) {
1344
+ claimDowngradeNote = `claimed ${claimedLevel} but evidence supports ${guardLevel} — recorded as ${guardLevel} (${computed.reason})`;
1345
+ }
1346
+
1347
+ // P2 core (now applied to the COMPUTED level): enforce the verdict x guardLevel
1348
+ // consistency rule at WRITE time with the SAME shared predicate the validator
1349
+ // uses, so the CLI can never emit a row the validator would reject (an L0/L1/L2/
1350
+ // L2.5 "pass", an L3 "pass" with no cross_family_guard row, or an L4 "pass" with
1351
+ // no rerun output). Because guardLevel is the computed value, a "pass" verdict
1352
+ // that the evidence cannot back is refused here.
1353
+ const consistencyError = guardLevelVerdictError(
1354
+ guardLevel,
1355
+ verdict,
1356
+ ownedRerun.length > 0,
1357
+ ownedCrossFamily.length > 0,
1358
+ rerunEvidenceIds.length > 0
1359
+ );
1360
+ if (consistencyError) {
1361
+ throw new Error(`Inconsistent receipt: ${consistencyError}.`);
1362
+ }
1363
+
1364
+ const receipts = readLedger(stateDir, "receipts");
1365
+ // Map verdict + evidence -> receipt status via the SHARED rule so the writer can
1366
+ // never emit a row the validator rejects. receiptStatusFor and receipt accept both
1367
+ // key on owned regular evidence (ownedEvidenceIds), so they never disagree on what
1368
+ // "has backing" means. Under the L4 rule (computeGuardLevel) a clean pass needs L3+,
1369
+ // which requires a cited cross_family_guard evidence row — so a pass always has
1370
+ // same-task evidenceIds and auto-accepts; it can never land in a contradictory
1371
+ // pass+pending. A "pass_with_risk" is written "pending" and needs `receipt accept
1372
+ // --owner` (P2 gate); reject/insufficient -> rejected.
1373
+ const status = receiptStatusFor(verdict, ownedEvidenceIds(evidenceIds, taskId, evidence), false);
1374
+ // Record shape: the COMPUTED guardLevel + the resolved reviewMode are stored so
1375
+ // the row carries HOW it was reviewed and the level that method+evidence earned
1376
+ // (not what was claimed). familyUnverified is stored only when TRUE — it marks a
1377
+ // self-declared cross-family level the tool could not verify, so a reader never
1378
+ // mistakes an L3 for an independently-checked pass. Field order matches the seed
1379
+ // generator so the on-disk shape is identical.
1380
+ // Provisional id; re-stamped authoritatively inside the lock at append (B6a-2).
1381
+ const record = { id: nextId(receipts, "c"), taskId, verdict, guardLevel, reviewMode: computed.reviewMode, evidenceIds };
1382
+ if (rerunEvidenceIds.length > 0) record.rerunEvidenceIds = rerunEvidenceIds;
1383
+ if (computed.familyUnverified) record.familyUnverified = true;
1384
+ record.status = status;
1385
+ record.createdAt = now();
1386
+ // B6a-2: re-stamp id + append UNDER the lock (atomic vs concurrent receipt creates).
1387
+ appendWithNextId(stateDir, "receipts", "c", (id) => { record.id = id; return record; });
1388
+ const honesty = familyHonestyMarker(computed.familyUnverified);
1389
+ // "Why this level" (P-getitback): one plain sentence explaining what evidence
1390
+ // backed the COMPUTED level and the concrete next step to the level above, so a
1391
+ // user who followed the docs but landed at (say) L1 is not left wondering why it
1392
+ // was not the L2 the docs mentioned. Derived from the SAME owned-evidence flags
1393
+ // the level was computed from (never asserts a level higher than guardLevel), so
1394
+ // it cannot mislead the user into overstating the receipt. ownedRerun is the
1395
+ // RECONCILED rerun set (the L4-grade one); the bare --rerun count is surfaced as
1396
+ // "cited a rerun" only when it is reconciled, matching what actually counts.
1397
+ const levelWhy = guardLevelExplanation({
1398
+ level: guardLevel,
1399
+ hasCrossFamilyGuardEvidence: ownedCrossFamily.length > 0,
1400
+ hasReconciledRerunEvidence: ownedRerun.length > 0,
1401
+ hasRerunOutputEvidence: ownedRerun.length > 0,
1402
+ hasAuthorRunEvidence: hasOwnedRunEvidence(evidenceIds, taskId, evidence),
1403
+ hasAnyEvidence: ownedEvidenceIds(evidenceIds, taskId, evidence).length > 0
1404
+ });
1405
+ record.levelExplanation = levelWhy; // surfaced in --json so a tool can show it too
1406
+ const l4ExecutionNote = guardLevel === "L4" && ownedRerun.length > 0
1407
+ ? "L4 local execution evidence: the cited rerun reconciles to a recorded run exec output (command, exit code, and output hash match)"
1408
+ : null;
1409
+ emit(args, `Receipt created.
1410
+ id: ${record.id}
1411
+ task: ${record.taskId}
1412
+ verdict: ${record.verdict}
1413
+ guardLevel: ${record.guardLevel} (computed)${honesty ? ` [${honesty}]` : ""}
1414
+ why: ${levelWhy}
1415
+ reviewMode: ${record.reviewMode}
1416
+ status: ${record.status}
1417
+ evidence: ${evidenceIds.length > 0 ? evidenceIds.join(", ") : "(none)"}${rerunEvidenceIds.length > 0 ? `\n rerun: ${rerunEvidenceIds.join(", ")}` : ""}${claimDowngradeNote ? `\n note: ${claimDowngradeNote}` : ""}${honesty ? `\n note: the cross-family family is ${honesty}; local rerun evidence verifies execution/output only, not model-family identity` : ""}${l4ExecutionNote ? `\n note: ${l4ExecutionNote}` : ""}${status === "pending" && verdict === "pass_with_risk" ? "\n (pass_with_risk: pending owner acceptance — run: node bin/ai-collab.js receipt accept --id " + record.id + " --owner)" : ""}
1418
+ Ledger: ${path.join(stateDir, "receipts.jsonl")}
1419
+ `, { command: "receipt create", ok: true, receipt: record, stateDir });
1420
+ }
1421
+
1422
+ // P2 owner-acceptance gate: a pass_with_risk receipt is created "pending" and
1423
+ // only an explicit owner sign-off moves it to "accepted". This is the human
1424
+ // accepting the named residual risk on the record. The accept is refused unless
1425
+ // the verdict is pass_with_risk, the receipt is currently pending, and it has
1426
+ // same-task evidence — so an owner cannot accept a risk receipt that has no
1427
+ // evidence of its own (which the validator would then reject anyway).
1428
+ //
1429
+ // IMPORTANT (this is a LOCAL collaboration audit record, not cryptographic
1430
+ // proof): the owner acceptance is a human sign-off captured locally — actor name
1431
+ // + timestamp on the record. It is NOT a signature and makes NO anti-forgery
1432
+ // claim; a single-user local-first tool trusts the local actor. What it DOES
1433
+ // guarantee is consistency: an accepted risk receipt must carry the marker and
1434
+ // have own-task evidence, and its (verdict, level, evidence) must be internally
1435
+ // consistent (F2) — so the status can never claim more than the rule grants.
1436
+ function receiptAccept(args) {
1437
+ const id = requireOption(args, "id");
1438
+ if (args.ownerAccepted !== true) {
1439
+ throw new Error("receipt accept requires --owner (the human accepting the named residual risk on the record).");
1440
+ }
1441
+ const stateDir = resolveStateDir(args.workspace);
1442
+ const receipts = readLedger(stateDir, "receipts");
1443
+ const target = receipts.find((receipt) => receipt.id === id);
1444
+ if (!target) {
1445
+ throw new Error(`Receipt ${id} not found.`);
1446
+ }
1447
+ if (target.verdict !== "pass_with_risk") {
1448
+ throw new Error(`Receipt ${id} has verdict "${target.verdict}"; only pass_with_risk receipts need an owner acceptance.`);
1449
+ }
1450
+ if (target.status === "accepted") {
1451
+ throw new Error(`Receipt ${id} is already accepted.`);
1452
+ }
1453
+ if (target.status !== "pending") {
1454
+ throw new Error(`Receipt ${id} has status "${target.status}"; only a pending receipt can be owner-accepted.`);
1455
+ }
1456
+ const evidence = readLedger(stateDir, "evidence");
1457
+ const owned = ownedEvidenceIds(Array.isArray(target.evidenceIds) ? target.evidenceIds : [], target.taskId, evidence);
1458
+ if (owned.length === 0) {
1459
+ throw new Error(`Receipt ${id} cites no evidence belonging to task ${target.taskId}; cannot accept a risk receipt with no own-task evidence.`);
1460
+ }
1461
+ // F2: RE-RUN the verdict x guardLevel consistency gate before accepting, using
1462
+ // the SAME shared predicate as create/validate. The accept path used to trust
1463
+ // the stored row, so a hand-written pending row that is itself inconsistent
1464
+ // (e.g. pass_with_risk at guardLevel L0, which L0 can never carry) could be
1465
+ // promoted to "accepted" and only the validator would object after the fact.
1466
+ // Refusing here makes accept-time and create-time agree: you cannot launder an
1467
+ // inconsistent receipt into accepted via the owner gate.
1468
+ const targetRerunIds = Array.isArray(target.rerunEvidenceIds) ? target.rerunEvidenceIds : [];
1469
+ const runs = readLedger(stateDir, "runs");
1470
+ const ownedRerun = ownedRerunEvidenceIds(targetRerunIds, target.taskId, evidence, runs);
1471
+ const ownedCrossFamily = ownedCrossFamilyGuardEvidenceIds(Array.isArray(target.evidenceIds) ? target.evidenceIds : [], target.taskId, evidence);
1472
+ const consistencyError = guardLevelVerdictError(target.guardLevel, target.verdict, ownedRerun.length > 0, ownedCrossFamily.length > 0, targetRerunIds.length > 0);
1473
+ if (consistencyError) {
1474
+ throw new Error(`Cannot accept inconsistent receipt ${id}: ${consistencyError}.`);
1475
+ }
1476
+ // Recompute status through the SHARED rule with ownerAccepted=true so the
1477
+ // accepted state always matches receiptStatusFor (no hand-set "accepted").
1478
+ target.ownerAccepted = true;
1479
+ // F4: record WHO accepted (actor name from --owner <name>, defaulting to a
1480
+ // generic "owner" when no name was given) and WHEN, as a local audit trail.
1481
+ // This is a human sign-off record, not a cryptographic signature.
1482
+ target.acceptedBy = typeof args.ownerName === "string" && args.ownerName.length > 0 ? args.ownerName : "owner";
1483
+ target.acceptedAt = now();
1484
+ target.status = receiptStatusFor(target.verdict, owned, true);
1485
+ // F2 belt-and-braces: the recomputed status must be "accepted" here (owned
1486
+ // evidence + ownerAccepted=true on a pass_with_risk). If the shared rule ever
1487
+ // returned anything else, refuse rather than write a contradicting row.
1488
+ if (target.status !== "accepted") {
1489
+ throw new Error(`Cannot accept receipt ${id}: the consistency rule computes status "${target.status}", not "accepted".`);
1490
+ }
1491
+ writeLedger(stateDir, "receipts", receipts);
1492
+ emit(args, `Receipt accepted by owner (local sign-off recorded — not a cryptographic signature).
1493
+ id: ${target.id}
1494
+ task: ${target.taskId}
1495
+ verdict: ${target.verdict}
1496
+ guardLevel: ${target.guardLevel}
1497
+ status: ${target.status}
1498
+ ownerAccepted: true
1499
+ acceptedBy: ${target.acceptedBy}
1500
+ Ledger: ${path.join(stateDir, "receipts.jsonl")}
1501
+ `, { command: "receipt accept", ok: true, receipt: target, stateDir });
1502
+ }
1503
+
1504
+ // --- Learning ledger (P4) --------------------------------------------------
1505
+ //
1506
+ // The learning ledger turns "what we learned this task" into something the next
1507
+ // task can feel. A learning row is one captured lesson (type "harvest") or one
1508
+ // suggested standing preference (type "profile"), proposed by the AI and then
1509
+ // kept or discarded by the human through the SAME proposed/confirmed/edited/
1510
+ // dropped discipline the profile-candidate buffer and the harvest mechanism use —
1511
+ // nothing graduates on the AI's say-so alone. Only confirmed/edited rows count as
1512
+ // kept; the status command echoes back the latest kept profile preference so the
1513
+ // tool feels like it is learning how you work, without making you maintain a
1514
+ // system. All shape rules live in ledger.js (learningRecordError) so the writer
1515
+ // here can never emit a row the validator would reject.
1516
+ //
1517
+ // Discipline (deliberately a CONVENTION, not a hard limiter): each task close-out
1518
+ // should propose AT MOST one harvest lesson and one profile candidate (see the
1519
+ // walkthrough Step 4 and the harvest mechanism). The point of P4 is that the user
1520
+ // feels understood, not buried — so the cap is documented and the recall shows
1521
+ // ONE preference, but `learning add` does not silently refuse a third row: a hard
1522
+ // limit here would fight legitimate manual use and a corrupt-state edge (e.g.
1523
+ // re-adding after a drop). Keeping it advisory matches how the rest of the loop
1524
+ // trusts the human to dispose.
1525
+ function learningCommand(args) {
1526
+ const action = args._[1];
1527
+ if (action === "add") return learningAdd(args);
1528
+ if (action === "confirm") return learningSetStatus(args, "confirmed");
1529
+ if (action === "edit") return learningEdit(args);
1530
+ if (action === "drop") return learningSetStatus(args, "dropped");
1531
+ throw new Error(
1532
+ "Unknown learning command. Run: node bin/ai-collab.js learning add --type <harvest|profile> --content \"...\" [--task <id>] | node bin/ai-collab.js learning confirm --id <id> | node bin/ai-collab.js learning edit --id <id> --content \"...\" | node bin/ai-collab.js learning drop --id <id>"
1533
+ );
1534
+ }
1535
+
1536
+ function learningAdd(args) {
1537
+ const type = requireOption(args, "type");
1538
+ const content = requireOption(args, "content");
1539
+ if (!LEARNING_TYPES.includes(type)) {
1540
+ throw new Error(`--type must be one of: ${LEARNING_TYPES.join(", ")} (got "${type}").`);
1541
+ }
1542
+ const stateDir = resolveStateDir(args.workspace);
1543
+
1544
+ // --task is OPTIONAL (a lesson need not belong to one task), but when given it
1545
+ // must point at a real task — a learning row bound to a non-existent task is a
1546
+ // dangling reference, the same standard evidence/run/receipt rows hold to.
1547
+ let taskId;
1548
+ if (args.task !== undefined && args.task !== "") {
1549
+ const tasks = readLedger(stateDir, "tasks");
1550
+ if (!tasks.some((task) => task.id === args.task)) {
1551
+ throw new Error(tr("error.taskNotFound.learning", { id: args.task }));
1552
+ }
1553
+ taskId = args.task;
1554
+ }
1555
+
1556
+ const learning = readLedger(stateDir, "learning");
1557
+ // Field order mirrors the generated seed (id, taskId?, type, content, status,
1558
+ // createdAt) so a hand read of the ledger stays predictable. New rows are always
1559
+ // "proposed": a freshly captured lesson/preference is an un-reviewed guess until
1560
+ // the human confirms/edits/drops it.
1561
+ // Provisional id; re-stamped authoritatively inside the lock at append (B6a-2).
1562
+ const record = { id: nextId(learning, "l") };
1563
+ if (taskId !== undefined) record.taskId = taskId;
1564
+ record.type = type;
1565
+ record.content = content;
1566
+ record.status = "proposed";
1567
+ record.createdAt = now();
1568
+
1569
+ // Reject a malformed row at WRITE time using the SAME shared predicate the
1570
+ // validator applies (so the CLI can never emit a row the validator rejects).
1571
+ const shapeError = learningRecordError(record);
1572
+ if (shapeError) {
1573
+ throw new Error(`Cannot add learning row: ${shapeError}.`);
1574
+ }
1575
+
1576
+ // B6a-2: re-stamp id + append UNDER the lock (atomic vs concurrent learning adds).
1577
+ appendWithNextId(stateDir, "learning", "l", (id) => { record.id = id; return record; });
1578
+ emit(args, `Learning candidate added (proposed).
1579
+ id: ${record.id}
1580
+ type: ${record.type}${record.taskId ? `\n task: ${record.taskId}` : ""}
1581
+ content: ${record.content}
1582
+ status: ${record.status}
1583
+ Review it to keep it: node bin/ai-collab.js learning confirm --id ${record.id} (keep) | learning edit --id ${record.id} --content "..." (reword) | learning drop --id ${record.id} (discard).
1584
+ Only confirmed/edited candidates graduate into your long-term profile.
1585
+ Ledger: ${path.join(stateDir, "learning-ledger.jsonl")}
1586
+ `, { command: "learning add", ok: true, learning: record, stateDir });
1587
+ }
1588
+
1589
+ // Shared state-flip for confirm/drop: read all -> patch the matching row's status
1590
+ // -> rewrite, the same deterministic, order-preserving pattern run finish and
1591
+ // task update use. Only the four legal states are reachable (confirm -> confirmed,
1592
+ // drop -> dropped; edit has its own entry because it also rewrites content).
1593
+ function learningSetStatus(args, status) {
1594
+ const id = requireOption(args, "id");
1595
+ const stateDir = resolveStateDir(args.workspace);
1596
+ const learning = readLedger(stateDir, "learning");
1597
+ const target = learning.find((row) => row.id === id);
1598
+ if (!target) {
1599
+ throw new Error(`Learning row ${id} not found. Add one first: node bin/ai-collab.js learning add --type <harvest|profile> --content "...".`);
1600
+ }
1601
+ target.status = status;
1602
+ target.updatedAt = now();
1603
+ // Re-validate the patched row through the shared predicate before writing, so a
1604
+ // status flip can never leave the ledger in a shape the validator would reject.
1605
+ const shapeError = learningRecordError(target);
1606
+ if (shapeError) {
1607
+ throw new Error(`Cannot update learning row: ${shapeError}.`);
1608
+ }
1609
+ writeLedger(stateDir, "learning", learning);
1610
+ const kept = status === "confirmed";
1611
+ emit(args, `Learning candidate ${kept ? "confirmed (kept)" : "dropped (discarded)"}.
1612
+ id: ${target.id}
1613
+ type: ${target.type}
1614
+ status: ${target.status}${kept && target.type === "profile" ? "\n (this preference will now be echoed back next time via `node bin/ai-collab.js status`)" : ""}
1615
+ Ledger: ${path.join(stateDir, "learning-ledger.jsonl")}
1616
+ `, { command: `learning ${kept ? "confirm" : "drop"}`, ok: true, learning: target, stateDir });
1617
+ }
1618
+
1619
+ // edit = reword the candidate AND keep it: the edited line is what graduates, so
1620
+ // the row moves to status "edited" with its new content. Same read-all -> patch ->
1621
+ // rewrite pattern; content is required (an edit with no new wording is a no-op the
1622
+ // user almost certainly did not mean).
1623
+ function learningEdit(args) {
1624
+ const id = requireOption(args, "id");
1625
+ const content = requireOption(args, "content");
1626
+ const stateDir = resolveStateDir(args.workspace);
1627
+ const learning = readLedger(stateDir, "learning");
1628
+ const target = learning.find((row) => row.id === id);
1629
+ if (!target) {
1630
+ throw new Error(`Learning row ${id} not found. Add one first: node bin/ai-collab.js learning add --type <harvest|profile> --content "...".`);
1631
+ }
1632
+ target.content = content;
1633
+ target.status = "edited";
1634
+ target.updatedAt = now();
1635
+ const shapeError = learningRecordError(target);
1636
+ if (shapeError) {
1637
+ throw new Error(`Cannot edit learning row: ${shapeError}.`);
1638
+ }
1639
+ writeLedger(stateDir, "learning", learning);
1640
+ emit(args, `Learning candidate edited (kept, reworded).
1641
+ id: ${target.id}
1642
+ type: ${target.type}
1643
+ content: ${target.content}
1644
+ status: ${target.status}${target.type === "profile" ? "\n (this preference will now be echoed back next time via `node bin/ai-collab.js status`)" : ""}
1645
+ Ledger: ${path.join(stateDir, "learning-ledger.jsonl")}
1646
+ `, { command: "learning edit", ok: true, learning: target, stateDir });
1647
+ }
1648
+
1649
+ function countBy(records, key) {
1650
+ const counts = {};
1651
+ for (const record of records) {
1652
+ const value = record[key];
1653
+ counts[value] = (counts[value] ?? 0) + 1;
1654
+ }
1655
+ return counts;
1656
+ }
1657
+
1658
+ function formatCounts(counts) {
1659
+ const keys = Object.keys(counts).sort();
1660
+ if (keys.length === 0) return "(none)";
1661
+ return keys.map((key) => `${key}=${counts[key]}`).join(", ");
1662
+ }
1663
+
1664
+ // --- A2 capability detect -------------------------------------------------
1665
+ //
1666
+ // "capability detect" answers a different question from the run loop: not "what did
1667
+ // this task earn?" (that is the receipt's guard level, A1) but "given the TOOLS you
1668
+ // have, what is the highest guard level you could EVER reach?" — your CEILING. It
1669
+ // combines a PROJECT-SIGNAL PROBE (look for tool marker files in the project) with
1670
+ // GUIDED SELF-REPORT (flags), because the CLI is non-interactive and cannot see
1671
+ // which AI you actually installed. Signals are surfaced as "inferred — confirm",
1672
+ // never asserted (a marker like AGENTS.md is read by many tools and pins no family).
1673
+
1674
+ // Split a comma/space-separated list flag (e.g. --tools "claude,codex") into a
1675
+ // clean lowercased array. Returns [] for an absent/empty flag.
1676
+ function splitListArg(value) {
1677
+ if (typeof value !== "string") return [];
1678
+ return value
1679
+ .split(/[,\s]+/)
1680
+ .map((item) => item.trim().toLowerCase())
1681
+ .filter((item) => item.length > 0);
1682
+ }
1683
+
1684
+ // Probe a project directory for tool-marker files (.claude/, .codex/, AGENTS.md, …)
1685
+ // and return the signals found. Each hit echoes the TOOL_SIGNALS row plus whether
1686
+ // the marker exists. This only says a tool MIGHT be configured here; it never
1687
+ // proves which model family (design rule 4) — low-confidence markers are flagged so
1688
+ // the caller can print "inferred — please confirm". Pure read-only existence checks.
1689
+ function probeProjectSignals(projectDir) {
1690
+ const root = path.resolve(projectDir ?? ".");
1691
+ const found = [];
1692
+ for (const signal of TOOL_SIGNALS) {
1693
+ if (existsSync(path.join(root, signal.marker))) {
1694
+ found.push({ ...signal, present: true });
1695
+ }
1696
+ }
1697
+ return { root, found };
1698
+ }
1699
+
1700
+ function capabilityCommand(args) {
1701
+ const action = args._[1];
1702
+ // "capability" and "capability detect" both run the detector; any other
1703
+ // subcommand is a usage error.
1704
+ if (action !== undefined && action !== "detect") {
1705
+ throw new Error("Unknown capability command. Run: node bin/ai-collab.js capability detect [--project <dir>] [--tools <list>] [--families <list>] [--subagents] [--can-switch-model] [--can-rerun] [--no-new-conversation] [--json]");
1706
+ }
1707
+
1708
+ // 1) Probe the project for tool-marker files (default: current dir, or --project).
1709
+ const { root, found } = probeProjectSignals(args.project);
1710
+
1711
+ // 2) Self-report flags OVERRIDE / augment the probe. --tools / --families let the
1712
+ // user state their real setup (the honest answer to "the CLI can't see what you
1713
+ // installed"); the booleans describe what the setup can DO.
1714
+ const declaredTools = splitListArg(args.tools);
1715
+ const declaredFamilies = splitListArg(args.families);
1716
+
1717
+ // Tools = declared tools if given, else the confident signal hits (a generic
1718
+ // marker like AGENTS.md, which pins no tool, is NOT counted as a tool).
1719
+ const signalTools = found.filter((signal) => signal.tool).map((signal) => signal.tool);
1720
+ const tools = declaredTools.length > 0 ? declaredTools : [...new Set(signalTools)];
1721
+
1722
+ // Families: prefer an explicit --families; else derive from the confident signals'
1723
+ // families (NOT from generic markers — those carry family null). This is the
1724
+ // load-bearing input: only NAMED, distinct families clear the cross-family gate.
1725
+ const signalFamilies = found
1726
+ .filter((signal) => signal.confident && signal.family)
1727
+ .map((signal) => signal.family);
1728
+ // When the user named tools but not families, derive families from those tools so
1729
+ // a "--tools claude,codex" alone still resolves two families.
1730
+ const derivedFromTools = tools.map((tool) => TOOL_FAMILY[tool] ?? "unknown");
1731
+ const families =
1732
+ declaredFamilies.length > 0
1733
+ ? declaredFamilies
1734
+ : signalFamilies.length > 0
1735
+ ? [...new Set(signalFamilies)]
1736
+ : derivedFromTools;
1737
+
1738
+ // Booleans. canOpenNewConversation defaults TRUE (almost every chat tool can open
1739
+ // a fresh chat); --no-new-conversation models the strict "one locked conversation"
1740
+ // tier. The others default FALSE and are opt-in via flags.
1741
+ const setup = {
1742
+ families,
1743
+ tools,
1744
+ canSwitchModelFamily: args.canSwitchModel === true,
1745
+ hasSubAgents: args.subagents === true,
1746
+ canOpenNewConversation: args.noNewConversation !== true,
1747
+ canRerun: args.canRerun === true
1748
+ };
1749
+
1750
+ const cap = computeCapability(setup);
1751
+
1752
+ // Whether ANY part of the answer rests on an inference the user has not confirmed:
1753
+ // a low-confidence marker was used, OR families were derived from tools/signals
1754
+ // rather than stated explicitly. Drives the "inferred — please confirm" note.
1755
+ const usedSignalsForFamilies = declaredFamilies.length === 0;
1756
+ const lowConfidenceHits = found.filter((signal) => !signal.confident);
1757
+ const inferred = (found.length > 0 || tools.length > 0) && (usedSignalsForFamilies || lowConfidenceHits.length > 0);
1758
+
1759
+ // Human text. Lead with the ceiling and the tier, then the upgrade path, then the
1760
+ // hard line between CEILING (this) and ACHIEVED (the receipt's guard level), then
1761
+ // the honesty note. Kept to a single readable screen, like `status`.
1762
+ const signalLines = found.length > 0
1763
+ ? found.map((signal) => {
1764
+ const what = signal.tool ? `${signal.marker} -> ${signal.tool} (${signal.family ?? "family unknown"})` : `${signal.marker} -> some agent tool (family unknown)`;
1765
+ return ` - ${what}${signal.confident ? "" : " [low-confidence: many tools use this — confirm]"}`;
1766
+ }).join("\n")
1767
+ : " (no tool-marker files found in the project)";
1768
+
1769
+ const familiesLine = cap.families.length > 0 ? cap.families.join(", ") : "(none named yet)";
1770
+ const recLine = cap.recommendation
1771
+ ? `Path up (to ${cap.recommendation.nextCeiling}): ${cap.recommendation.action}`
1772
+ : `You are already at the strongest local ceiling (L4). Per task, you still have to EARN it with the evidence.`;
1773
+ const inferredNote = inferred
1774
+ ? `\nNote: parts of this are INFERRED from project signals and may be wrong — confirm your real setup with --tools / --families (a marker file does not prove which model family you run).`
1775
+ : "";
1776
+
1777
+ emit(args, `Capability: how high your setup can EVER score
1778
+ Project scanned: ${root}
1779
+
1780
+ Tool signals (inference, not proof):
1781
+ ${signalLines}
1782
+
1783
+ Distinct model families you can bring: ${cap.distinctFamilies} [${familiesLine}]
1784
+ Your capability CEILING: ${cap.ceiling} (${cap.tier.label})
1785
+ ${cap.tier.experience}
1786
+ Why: ${cap.reason}
1787
+
1788
+ ${recLine}
1789
+
1790
+ Ceiling vs achieved — keep them apart:
1791
+ - This CEILING (${cap.ceiling}) is the most your TOOLS could ever support.
1792
+ - What a task actually EARNS is its receipt's guard level (node bin/ai-collab.js receipt create),
1793
+ computed from the evidence you cite THAT task. Same setup, no real evidence -> a
1794
+ lower achieved level. The ceiling is the roof; each task still has to reach it.
1795
+
1796
+ Note: the families/tools shown above are SELF-REPORTED setup describing your CEILING
1797
+ (how high you could ever score) — not evidence any task achieved it. Declaring a second
1798
+ family does not make a task cross-family-passed; only a real cross-family guard with
1799
+ rerun evidence earns L3/L4.${inferredNote}
1800
+ `, {
1801
+ command: "capability detect",
1802
+ ok: true,
1803
+ project: root,
1804
+ signals: found.map((signal) => ({ marker: signal.marker, tool: signal.tool, family: signal.family, confident: signal.confident })),
1805
+ tools,
1806
+ families: cap.families,
1807
+ distinctFamilies: cap.distinctFamilies,
1808
+ ceiling: cap.ceiling,
1809
+ tier: { id: cap.tier.id, label: cap.tier.label, experience: cap.tier.experience },
1810
+ reason: cap.reason,
1811
+ recommendation: cap.recommendation,
1812
+ inferred,
1813
+ // Make the ceiling-vs-achieved distinction explicit in the machine payload too,
1814
+ // so an integrating tool cannot mistake the ceiling for a per-task verdict.
1815
+ note: "ceiling is the maximum the tools support; a task's achieved guard level comes from `receipt create` and the evidence cited that task"
1816
+ });
1817
+ }
1818
+
1819
+ // Render ONE per-task achievement block for the human `status` view (the
1820
+ // "what did I earn on this task" line). Joins the task to its strongest receipt
1821
+ // (verdict + computed guardLevel + who accepted it) and its evidence/run counts,
1822
+ // so a user who ran a chain sees the concrete thing they earned instead of a bare
1823
+ // counter. A seed example row is clearly flagged so a brand-new workspace's
1824
+ // numbers are not mistaken for the user's own progress. The guardLevel/family
1825
+ // warning shown here is recomputed in summarizeTasks via ledger.js, so `status`
1826
+ // cannot be fooled by hand-edited stored receipt fields.
1827
+ // One plain-language phrase per guard level, so a newcomer reading "L1" / "L3"
1828
+ // on a status line knows what it MEANS without first reading the L0-L4 ladder.
1829
+ // Keyed by the bare level token (the stored guardLevel may carry a "(computed)"
1830
+ // suffix in some surfaces, so callers normalize before lookup). Kept to a short
1831
+ // clause that reads naturally appended after the level. The wording mirrors the
1832
+ // real semantics in `--help levels` (single-tool tops out at L2; a plain pass
1833
+ // needs cross-family L3; L4 adds a reconciled rerun) so the two never drift.
1834
+ // The bare level token -> its i18n message key. L2.5's dot is not key-safe, so it
1835
+ // maps to level.L2_5. The plain-language phrasing itself now lives in the catalog
1836
+ // (en canonical + zh faithful), so the honesty gloss reads in the active language.
1837
+ const GUARD_LEVEL_MESSAGE_KEY = {
1838
+ L0: "level.L0",
1839
+ L1: "level.L1",
1840
+ L2: "level.L2",
1841
+ "L2.5": "level.L2_5",
1842
+ L3: "level.L3",
1843
+ L4: "level.L4"
1844
+ };
1845
+
1846
+ // Normalize a guardLevel value to its bare token (strip any " (computed)" suffix
1847
+ // or surrounding whitespace) and return the plain-language phrase in the active
1848
+ // locale, or "" if the level is unknown (never throw on an unexpected value — the
1849
+ // gloss is additive).
1850
+ function guardLevelPlainLanguage(guardLevel) {
1851
+ if (!guardLevel) return "";
1852
+ const token = String(guardLevel).replace(/\s*\(computed\)\s*$/i, "").trim();
1853
+ const key = GUARD_LEVEL_MESSAGE_KEY[token];
1854
+ return key ? tr(key) : "";
1855
+ }
1856
+
1857
+ // Translate the honesty-bearing task status DISPLAY label at render time. The ledger
1858
+ // model emits the canonical English ("done — author-marked, unverified") as a stable
1859
+ // data field; here we localize only that one annotated label, leaving plain statuses
1860
+ // (open / done / blocked / …) untouched. Faithful — never softened to "verified".
1861
+ function localizeStatusDisplay(statusDisplay) {
1862
+ if (statusDisplay === "done — author-marked, unverified") {
1863
+ return tr("status.display.authorMarkedDone");
1864
+ }
1865
+ return statusDisplay;
1866
+ }
1867
+
1868
+ function formatTaskSummaryLine(entry) {
1869
+ const titleText = entry.title && entry.title.length > 0 ? entry.title : tr("common.untitled");
1870
+ const seedTag = entry.isSeed ? tr("status.taskLine.seedTag") : "";
1871
+ const statusText = localizeStatusDisplay(entry.statusDisplay || entry.status);
1872
+ const head = tr("status.taskLine.head", { id: entry.id, title: titleText, status: statusText, seedTag });
1873
+
1874
+ let receiptLine;
1875
+ if (entry.receipt) {
1876
+ const r = entry.receipt;
1877
+ const acceptedBy = r.status === "accepted" && r.acceptedBy ? tr("status.taskLine.receipt.acceptedBy", { who: r.acceptedBy }) : "";
1878
+ // The self-declared cross-family caveat — rendered in the active language,
1879
+ // faithfully (never softened into "verified").
1880
+ const unverified = r.familyUnverified ? tr("status.taskLine.receipt.unverified") : "";
1881
+ // Append a plain-language gloss of the level so "L2" etc. is self-explaining.
1882
+ const plain = guardLevelPlainLanguage(r.guardLevel);
1883
+ const plainNote = plain ? tr("status.taskLine.receipt.plainNote", { level: r.guardLevel, plain }) : "";
1884
+ receiptLine = tr("status.taskLine.receipt", {
1885
+ id: r.id, verdict: r.verdict, level: r.guardLevel, status: r.status, acceptedBy, unverified, plainNote
1886
+ });
1887
+ } else {
1888
+ receiptLine = tr("status.taskLine.receipt.none");
1889
+ }
1890
+ const countsLine = tr("status.taskLine.counts", { evidence: entry.evidenceCount, runs: entry.runCount });
1891
+ return `${head}\n${receiptLine}\n${countsLine}`;
1892
+ }
1893
+
1894
+ // --- Handoff draft (resume across tools without re-explaining) --------------
1895
+ //
1896
+ // `handoff create` reads the ledger and writes a DRAFT handoff note into the
1897
+ // workspace's handoff layer (.aict/handoff/) so the next session/tool can resume
1898
+ // without replaying the conversation. It is the runtime tie-in for the handoff
1899
+ // layer's existing format (README/TEMPLATE/EXAMPLE under .aict/handoff/): the
1900
+ // SAME done / pending / blocked / unverified separation, auto-filled from the
1901
+ // rows the run loop already recorded. buildHandoffModel (ledger.js) does the
1902
+ // honest classification (DONE only for an ACCEPTED receipt; pass_with_risk /
1903
+ // pending / unverified-family receipts land in Unverified); this layer only
1904
+ // renders that model and writes the file. It NEVER marks unverified work "done".
1905
+
1906
+ // The banner every draft leads with, so a reader can never mistake an
1907
+ // auto-generated starting point for a finished, trustworthy handoff.
1908
+ const HANDOFF_DRAFT_BANNER =
1909
+ "> **This is an auto-generated draft from the ledger — review and complete it before handing off.**";
1910
+
1911
+ // Render one task entry's evidence/run/receipt references as indented bullet
1912
+ // lines under a section. Kept compact: a draft is a starting point a human
1913
+ // finishes, not an exhaustive dump.
1914
+ function renderHandoffEntry(entry, { showReceipts }) {
1915
+ const lines = [];
1916
+ const titleText = entry.title && entry.title.length > 0 ? entry.title : "(untitled)";
1917
+ const statusText = entry.taskStatusDisplay || entry.taskStatus;
1918
+ lines.push(`- **${entry.id} — ${titleText}** _(task status: ${statusText})_`);
1919
+
1920
+ if (showReceipts && entry.receipts.length > 0) {
1921
+ for (const r of entry.receipts) {
1922
+ const acceptedBy = r.status === "accepted" && r.acceptedBy ? ` · accepted by ${r.acceptedBy}` : "";
1923
+ const unverified = r.familyUnverified ? " · self-declared cross-family (unverified)" : "";
1924
+ lines.push(
1925
+ ` - receipt ${r.id}: ${r.verdict} · ${r.guardLevel} · ${r.status}${acceptedBy}${unverified}`
1926
+ );
1927
+ }
1928
+ }
1929
+
1930
+ if (entry.evidence.length > 0) {
1931
+ const ev = entry.evidence
1932
+ .map((e) => `${e.id} (${e.kind})${e.summary ? `: ${e.summary}` : ""}`)
1933
+ .join("; ");
1934
+ lines.push(` - evidence: ${ev}`);
1935
+ }
1936
+
1937
+ if (entry.runs.length > 0) {
1938
+ const latest = entry.runs[0]; // already sorted most-recent-first by buildHandoffModel
1939
+ const cmd = latest.command ? `\`${latest.command}\`` : "(no command recorded)";
1940
+ const exit = latest.exitCode === null ? `status ${latest.status}` : `exit ${latest.exitCode}`;
1941
+ const more = entry.runs.length > 1 ? ` (+${entry.runs.length - 1} earlier run${entry.runs.length - 1 === 1 ? "" : "s"})` : "";
1942
+ lines.push(` - last run ${latest.id}: ${cmd} → ${exit}${more}`);
1943
+ }
1944
+
1945
+ if (entry.riskNotes.length > 0) {
1946
+ for (const note of entry.riskNotes) {
1947
+ lines.push(` - ⚠️ ${note}`);
1948
+ }
1949
+ }
1950
+
1951
+ return lines.join("\n");
1952
+ }
1953
+
1954
+ // Render the full Markdown draft from a handoff model. Follows the handoff
1955
+ // layer's section vocabulary (Done / Pending / Blocked / Unverified) so the
1956
+ // draft slots into the existing TEMPLATE shape the docs teach.
1957
+ function renderHandoffDraft(model, { stateDir, focusTitle }) {
1958
+ const section = (heading, entries, opts) => {
1959
+ if (entries.length === 0) {
1960
+ return `## ${heading}\n\n_None._\n`;
1961
+ }
1962
+ return `## ${heading}\n\n${entries.map((e) => renderHandoffEntry(e, opts)).join("\n")}\n`;
1963
+ };
1964
+
1965
+ const titleLine = model.focusTaskId
1966
+ ? `# Handoff draft — ${model.focusTaskId}${focusTitle ? `: ${focusTitle}` : ""}`
1967
+ : "# Handoff draft";
1968
+
1969
+ const c = model.counts;
1970
+ const overall =
1971
+ `Overall: ${c.done} done · ${c.unverified} unverified · ${c.pending} pending · ${c.blocked} blocked ` +
1972
+ `(across ${c.tasksConsidered} task${c.tasksConsidered === 1 ? "" : "s"}).`;
1973
+
1974
+ const learningsBlock =
1975
+ model.learnings.length > 0
1976
+ ? `## Confirmed learnings to carry forward\n\n${model.learnings
1977
+ .map((l) => `- (${l.type}) ${l.content}`)
1978
+ .join("\n")}\n`
1979
+ : "## Confirmed learnings to carry forward\n\n_None kept yet._\n";
1980
+
1981
+ return [
1982
+ titleLine,
1983
+ "",
1984
+ HANDOFF_DRAFT_BANNER,
1985
+ "",
1986
+ overall,
1987
+ "",
1988
+ "## Current status",
1989
+ "",
1990
+ "Auto-derived from the ledger. Only the **Done** items carry an accepted receipt;",
1991
+ "everything under **Unverified** was reviewed but not accepted (or is self-declared",
1992
+ "/ pending) and must be re-checked before you rely on it.",
1993
+ "",
1994
+ // Done leads, then the honest "not trustworthy yet" sections, then learnings.
1995
+ section("Done (has an accepted receipt)", model.done, { showReceipts: true }),
1996
+ section("Pending / in progress (no receipt yet)", model.pending, { showReceipts: true }),
1997
+ section("Blocked", model.blocked, { showReceipts: true }),
1998
+ section("Unverified (reviewed but not accepted — re-check before trusting)", model.unverified, { showReceipts: true }),
1999
+ learningsBlock,
2000
+ "## Next action",
2001
+ "",
2002
+ "_Fill in the exact next step for the receiver (the draft cannot know your intent)._",
2003
+ "",
2004
+ "---",
2005
+ "",
2006
+ `_Source: ${stateDir} · generated by \`ai-collab handoff create\`. Review every section before handing off._`,
2007
+ ""
2008
+ ].join("\n");
2009
+ }
2010
+
2011
+ // A filesystem-safe timestamp for the draft filename (ISO with colons/dots
2012
+ // swapped for dashes, e.g. 2026-06-22T10-15-30-123Z). Deterministic format; the
2013
+ // caller passes the wall-clock value so this stays pure.
2014
+ function handoffStamp(isoString) {
2015
+ return isoString.replace(/[:.]/g, "-");
2016
+ }
2017
+
2018
+ // List the handoff DRAFT files this command has written into a workspace (the
2019
+ // generated `handoff-*.md`, NOT the shipped layer docs README/TEMPLATE/etc.), so
2020
+ // `status` can show a user that drafts exist. Returns names sorted newest-first by
2021
+ // the timestamp embedded in the filename (lexical sort works on the ISO stamp).
2022
+ // Pure read; returns [] when the dir is absent or unreadable.
2023
+ function listHandoffDrafts(workspaceRoot) {
2024
+ const dir = path.join(workspaceRoot, "handoff");
2025
+ if (!existsSync(dir)) return [];
2026
+ let names;
2027
+ try {
2028
+ names = readdirSync(dir);
2029
+ } catch {
2030
+ return [];
2031
+ }
2032
+ return names
2033
+ .filter((name) => /^handoff-.*\.md$/.test(name))
2034
+ .sort((a, b) => b.localeCompare(a));
2035
+ }
2036
+
2037
+ function handoffCommand(args) {
2038
+ const action = args._[1];
2039
+ if (action !== "create") {
2040
+ throw new Error(
2041
+ "Unknown handoff command. Run: node bin/ai-collab.js handoff create [--task <id>] [--workspace <dir>] [--json]"
2042
+ );
2043
+ }
2044
+
2045
+ const stateDir = resolveStateDir(args.workspace);
2046
+ // The handoff layer dir lives next to state/ inside the workspace root
2047
+ // (<root>/handoff), the SAME .aict/handoff the docs teach. resolveStateDir
2048
+ // returns <root>/state, so the handoff dir is its sibling.
2049
+ const workspaceRoot = path.dirname(stateDir);
2050
+ const handoffDir = path.join(workspaceRoot, "handoff");
2051
+
2052
+ const tasks = readLedger(stateDir, "tasks");
2053
+ const evidence = readLedger(stateDir, "evidence");
2054
+ const runs = readLedger(stateDir, "runs");
2055
+ const receipts = readLedger(stateDir, "receipts");
2056
+ const learning = readLedger(stateDir, "learning");
2057
+
2058
+ const taskId = args.task !== undefined && args.task !== "" ? args.task : undefined;
2059
+ if (taskId !== undefined && !tasks.some((task) => task.id === taskId)) {
2060
+ throw new Error(
2061
+ tr("error.taskNotFound.handoff", { id: taskId })
2062
+ );
2063
+ }
2064
+
2065
+ const model = buildHandoffModel(
2066
+ { tasks, evidence, runs, receipts, learning },
2067
+ { taskId }
2068
+ );
2069
+
2070
+ // The focused task's title (for the draft heading), if a single task was named.
2071
+ const focusTitle = taskId
2072
+ ? (tasks.find((task) => task.id === taskId)?.title ?? "")
2073
+ : "";
2074
+
2075
+ const markdown = renderHandoffDraft(model, { stateDir, focusTitle });
2076
+
2077
+ // Filename: handoff-<taskid|all>-<timestamp>.md, mirroring the run-layer's
2078
+ // "draft you then complete" convention. The timestamp keeps successive drafts
2079
+ // from overwriting each other (a user can regenerate without losing the last).
2080
+ const stamp = handoffStamp(now());
2081
+ const fileName = `handoff-${taskId ?? "all"}-${stamp}.md`;
2082
+ const filePath = path.join(handoffDir, fileName);
2083
+
2084
+ mkdirSync(handoffDir, { recursive: true });
2085
+ writeFileSync(filePath, markdown, "utf8");
2086
+
2087
+ const c = model.counts;
2088
+ emit(
2089
+ args,
2090
+ `Handoff draft written.
2091
+ file: ${filePath}
2092
+ scope: ${taskId ? `task ${taskId}` : "whole workspace"}
2093
+ summary: ${c.done} done · ${c.unverified} unverified · ${c.pending} pending · ${c.blocked} blocked
2094
+ This is a DRAFT auto-filled from the ledger — open it, review every section (especially Unverified), and complete the Next action before handing off.
2095
+ Next: review ${filePath}, then share it (or paste it into the next tool with the handoff PROMPT in ${path.join(handoffDir, "PROMPT.md")}).
2096
+ `,
2097
+ {
2098
+ command: "handoff create",
2099
+ ok: true,
2100
+ stateDir,
2101
+ file: filePath,
2102
+ taskId: taskId ?? null,
2103
+ draft: true,
2104
+ counts: c,
2105
+ model
2106
+ }
2107
+ );
2108
+ }
2109
+
2110
+ // --- bootstrap (first-experience value report) ------------------------------
2111
+ //
2112
+ // bootstrap is the first-experience entry point: it reads the user's OWN recent
2113
+ // work (repo structure, read-only git, the .aict ledger, AI instruction files) and
2114
+ // prints a plain "AI collaboration baseline" — five cards (PROFILE CLUES /
2115
+ // VERIFY / RESUME / ROLES / HARVEST). The honest core (which "done"s cannot be
2116
+ // trusted, what the bucketing
2117
+ // is) lives in bootstrap.js + ledger.js; this command only gathers the inputs
2118
+ // (resolve the workspace, capture git output, read the ledgers) and prints.
2119
+ //
2120
+ // v1 is REPORT-ONLY: it writes NOTHING (no profile, no long-term state). --yes
2121
+ // confirms the local-scan consent gate; without it bootstrap prints the scan scope
2122
+ // and stops (the CLI is non-interactive, so consent is an explicit re-run).
2123
+
2124
+ // Capture read-only git signals for the repo the user is in. Best-effort: git may
2125
+ // be absent or this may not be a repo — in either case we return { available:false }
2126
+ // and the report degrades gracefully (no git section), never throwing. These are
2127
+ // the ONLY git calls bootstrap makes, and both are pure read-only (log / diff
2128
+ // --stat); bootstrap never writes to the repo.
2129
+ // Split a --dialogue / --logs value into a list of paths. The flag accepts a single
2130
+ // path or a comma-separated list ("a.txt,b.json"); each entry is trimmed and empties
2131
+ // are dropped. Returns [] for an absent/blank flag, so the connector stays OFF unless
2132
+ // the user actually named a file. Pure string parsing — the actual reading is fail-soft
2133
+ // inside dialogue.js (a bad path is skipped with a note, never fatal).
2134
+ function splitPathList(value) {
2135
+ if (typeof value !== "string") return [];
2136
+ return value
2137
+ .split(",")
2138
+ .map((s) => s.trim())
2139
+ .filter((s) => s.length > 0);
2140
+ }
2141
+
2142
+ // --- bootstrap --send-to-model: the consent + send presentation -------------
2143
+ //
2144
+ // These helpers render the EXTERNAL-model path's user-facing surface. They never send
2145
+ // anything themselves (sendmodel.js does the assemble/assert/call); they only show the
2146
+ // preview, ask for confirmation, and render returned candidates with the honesty caveats.
2147
+
2148
+ // Render the "what will be sent" preview (red line #2): the sources, the snippet count,
2149
+ // the all-redacted promise, and the target model — printed BEFORE any send/confirm so the
2150
+ // user sees the exact scope. Localized; the same data also goes out under --json.
2151
+ function renderSendPreview(preview, locale = "en") {
2152
+ const lines = [
2153
+ t("send.preview.head", {}, locale),
2154
+ t("send.preview.model", { model: preview.model }, locale),
2155
+ t("send.preview.count", { count: preview.snippetCount, plural: preview.snippetCount === 1 ? "" : "s" }, locale),
2156
+ t("send.preview.redacted", {}, locale)
2157
+ ];
2158
+ if (preview.sources.length > 0) {
2159
+ lines.push(t("send.preview.sources", { files: preview.sources.join(", ") }, locale));
2160
+ }
2161
+ lines.push(t("send.preview.promise", {}, locale));
2162
+ return lines.join("\n");
2163
+ }
2164
+
2165
+ // Ask the user (on an interactive TTY) to confirm the send. Resolves true ONLY on an
2166
+ // explicit y / yes (case-insensitive); ANY other input — including a bare Enter — is
2167
+ // false (default-DENY). Mirrors confirmDangerous so the two consent prompts behave
2168
+ // identically. Used only when stdin is a TTY; the non-TTY path requires --yes instead.
2169
+ async function confirmSend(locale = "en") {
2170
+ const { createInterface } = await import("node:readline");
2171
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
2172
+ try {
2173
+ const answer = await new Promise((resolve) => {
2174
+ rl.question(t("send.confirm.prompt", {}, locale), (reply) => resolve(reply));
2175
+ });
2176
+ return /^\s*y(es)?\s*$/i.test(answer);
2177
+ } finally {
2178
+ rl.close();
2179
+ }
2180
+ }
2181
+
2182
+ // Render the LLM candidate block (red line #5 presentation): a clearly-separated section
2183
+ // that labels every model-sourced candidate "AI suggestion · low confidence · unverified ·
2184
+ // confirm each yourself", never shown as done. Each candidate prints its kind, summary,
2185
+ // and basis. Returns [] when there are none (so a degraded/empty pass adds no section).
2186
+ function renderModelCandidates(candidates, locale = "en") {
2187
+ if (!Array.isArray(candidates) || candidates.length === 0) return [];
2188
+ const out = [t("send.candidates.head", { count: candidates.length, plural: candidates.length === 1 ? "" : "s" }, locale)];
2189
+ out.push(t("send.candidates.caveat", {}, locale));
2190
+ for (const c of candidates) {
2191
+ const kindLabel = t(`send.candidates.kind.${c.kind}`, {}, locale);
2192
+ out.push(t("send.candidates.item", { kind: kindLabel, summary: c.summary }, locale));
2193
+ if (c.basis && c.basis.length > 0) {
2194
+ out.push(t("send.candidates.basis", { basis: c.basis }, locale));
2195
+ }
2196
+ }
2197
+ return out;
2198
+ }
2199
+
2200
+ function captureGitSignals(repoRoot) {
2201
+ const opts = { cwd: repoRoot, encoding: "utf8", timeout: 15000, maxBuffer: 8 * 1024 * 1024 };
2202
+ const isRepo = spawnSync("git", ["rev-parse", "--is-inside-work-tree"], opts);
2203
+ if (isRepo.status !== 0 || String(isRepo.stdout).trim() !== "true") {
2204
+ return { available: false, logText: "", diffStatText: "" };
2205
+ }
2206
+ // --name-only over the last 20 commits feeds the "repeatedly re-touched files"
2207
+ // signal; --stat shows uncommitted work in flight. Both read-only.
2208
+ const log = spawnSync("git", ["log", "--name-only", "-n", "20"], opts);
2209
+ const diff = spawnSync("git", ["diff", "--stat"], opts);
2210
+ return {
2211
+ available: true,
2212
+ logText: log.status === 0 ? String(log.stdout) : "",
2213
+ diffStatText: diff.status === 0 ? String(diff.stdout) : ""
2214
+ };
2215
+ }
2216
+
2217
+ async function bootstrapCommand(args) {
2218
+ // bootstrap operates an EXISTING workspace (it reports on your recorded work), so
2219
+ // it requires one the same way the run-layer commands do — refuse with init
2220
+ // guidance rather than scaffold a stray ./state.
2221
+ const stateDir = resolveStateDir(args.workspace);
2222
+ const workspaceRoot = path.dirname(stateDir); // <root>/.aict
2223
+ // The repo/project root the user is actually working in. When --workspace points
2224
+ // at an <x>/.aict, the project root is its parent; otherwise default to the cwd
2225
+ // (a user who runs bootstrap from their project). This is what we scan for repo
2226
+ // structure + git, NOT the workspace dir.
2227
+ const repoRoot = path.resolve(args.workspace ? path.dirname(workspaceRoot) : ".");
2228
+
2229
+ // OPT-IN local connectors: the user's EXPLICIT chat/log export paths (comma-split).
2230
+ // Empty unless --dialogue / --logs was given, so the high-privacy source is OFF by
2231
+ // default and the default scan is unchanged. These feed the deterministic dialogue
2232
+ // scan (dialogue.js) — read locally, never sent anywhere.
2233
+ const dialoguePaths = splitPathList(args.dialogue);
2234
+ const logPaths = splitPathList(args.logs);
2235
+ const dialogueScopeForConsent = [...dialoguePaths, ...logPaths];
2236
+
2237
+ // Consent gate (red line: show the scope before reading). The scan is local +
2238
+ // read-only, but we still print exactly what it reads and require --yes to
2239
+ // proceed, so bootstrap never quietly rifles through a repo. --yes opts in. When the
2240
+ // user named dialogue/log files, the consent scope NAMES them too (so the extra
2241
+ // high-privacy read is shown explicitly before it happens).
2242
+ if (args.yes !== true) {
2243
+ if (args.json) {
2244
+ console.log(JSON.stringify({
2245
+ command: "bootstrap",
2246
+ ok: false,
2247
+ consentRequired: true,
2248
+ reportOnly: true,
2249
+ willScan: {
2250
+ repoStructure: repoRoot,
2251
+ gitHistory: "git log / git diff --stat (read-only)",
2252
+ ledger: path.join(workspaceRoot, "state"),
2253
+ aiInstructionFiles: ["CLAUDE.md", "AGENTS.md", ".cursorrules", "…"],
2254
+ // Only present when the user opted in by naming files; the connector is OFF
2255
+ // (high-privacy source not read) otherwise.
2256
+ ...(dialogueScopeForConsent.length > 0
2257
+ ? { dialogueExports: dialogueScopeForConsent }
2258
+ : {})
2259
+ },
2260
+ network: "not used",
2261
+ hint: "re-run with --yes to confirm"
2262
+ }, null, 2));
2263
+ } else {
2264
+ console.log(renderConsentPreview(repoRoot, CURRENT_LOCALE, dialogueScopeForConsent));
2265
+ }
2266
+ return;
2267
+ }
2268
+
2269
+ // Gather inputs. The ledgers are read via the SAME readLedger the rest of the CLI
2270
+ // uses (so bootstrap sees exactly what status/handoff see). Git is captured
2271
+ // read-only; the scan parses it.
2272
+ const git = captureGitSignals(repoRoot);
2273
+ const scan = scanLocalStructure({ workspaceRoot, repoRoot, git });
2274
+
2275
+ const tasks = readLedger(stateDir, "tasks");
2276
+ const evidence = readLedger(stateDir, "evidence");
2277
+ const runs = readLedger(stateDir, "runs");
2278
+ const receipts = readLedger(stateDir, "receipts");
2279
+ const learning = readLedger(stateDir, "learning");
2280
+
2281
+ // How many handoff drafts already exist (RESUME uses this to decide "missing
2282
+ // handoff"). Reuses the same lister status uses, so the two agree.
2283
+ const handoffDraftCount = listHandoffDrafts(workspaceRoot).length;
2284
+
2285
+ // OPT-IN dialogue scan (deterministic, local, zero-network). Only runs when the user
2286
+ // named a file; otherwise `dialogue` is null and the model is byte-identical to the
2287
+ // no-flag report. We pass the SAME honest per-task view (summarizeTasks) the cards
2288
+ // use, so a chat "done" is cross-referenced against the recomputed ledger truth (an
2289
+ // accepted clean receipt / an executed run), never a raw status flag.
2290
+ const dialogue = (dialoguePaths.length > 0 || logPaths.length > 0)
2291
+ ? scanDialogueAndLogs({
2292
+ dialoguePaths,
2293
+ logPaths,
2294
+ perTask: summarizeTasks(tasks, receipts, evidence, runs)
2295
+ })
2296
+ : null;
2297
+
2298
+ const model = buildBootstrapModel({
2299
+ ledgers: { tasks, evidence, runs, receipts, learning },
2300
+ scan,
2301
+ handoffDraftCount,
2302
+ dialogue
2303
+ });
2304
+
2305
+ // The LOCAL report is ALWAYS produced first and is the source of truth. The
2306
+ // external-model pass below only ENRICHES it (or degrades to it) — it never replaces
2307
+ // or rewrites the honest local cards.
2308
+ const localReportText = renderBootstrapReport(model, CURRENT_LOCALE);
2309
+
2310
+ // === EXTERNAL-MODEL HALF (semantic scan v1) ================================
2311
+ // RED LINE #1 — DEFAULT NEVER SENDS. This whole block is entered ONLY when the user
2312
+ // explicitly passed --send-to-model or --dry-run-send. With neither flag, `sendModel`
2313
+ // stays null, NOTHING is sent or even assembled, and the report + --json payload are
2314
+ // byte-identical to the pure-local behavior (network: "not used" stays literally true).
2315
+ let sendModel = null; // the result surfaced to the user (or null when not requested)
2316
+ const sendRequested = args.sendToModel === true || args.dryRunSend === true;
2317
+ if (sendRequested) {
2318
+ // The model command: the user's --model override, else the default (claude over stdin).
2319
+ const modelCmd = (typeof args.model === "string" && args.model.trim().length > 0)
2320
+ ? args.model.trim()
2321
+ : DEFAULT_MODEL_CMD;
2322
+
2323
+ // The ONLY thing that can be sent is the REDACTED snippets the local dialogue scan
2324
+ // surfaced. No dialogue/log export => nothing to send: we say so and degrade to local
2325
+ // (sending an empty/ledger-only payload would be both useless and a needless exposure).
2326
+ const snippets = collectRedactedSnippets(dialogue);
2327
+
2328
+ if (snippets.length === 0) {
2329
+ // Nothing redacted to send. Not an error — just no external input. Degrade to local.
2330
+ sendModel = {
2331
+ requested: true,
2332
+ sent: false,
2333
+ degraded: true,
2334
+ reason: "no_redacted_snippets",
2335
+ model: modelCmd,
2336
+ candidates: []
2337
+ };
2338
+ } else if (args.dryRunSend === true) {
2339
+ // --dry-run-send (red line #3 audit): build the EXACT payload that WOULD be sent,
2340
+ // ASSERT it is redaction-clean, and PRINT it — but send nothing, call no model.
2341
+ const payload = buildModelPrompt(snippets);
2342
+ assertPayloadRedacted(payload); // throws (caught in main) if anything leaks — never ship
2343
+ sendModel = {
2344
+ requested: true,
2345
+ sent: false,
2346
+ dryRun: true,
2347
+ degraded: false,
2348
+ reason: null,
2349
+ model: modelCmd,
2350
+ candidates: [],
2351
+ payload
2352
+ };
2353
+ } else {
2354
+ // A real send was requested. RED LINE #2 — CONSENT + PREVIEW BEFORE SEND.
2355
+ const preview = buildSendPreview({ dialogue, snippets, modelCmd });
2356
+
2357
+ // Show EXACTLY what will be sent (sources, count, all-redacted promise, model),
2358
+ // then require confirmation: an interactive TTY answers y/N (default-deny); a
2359
+ // non-interactive caller MUST have passed --yes, else we refuse and send nothing.
2360
+ let confirmed = false;
2361
+ if (process.stdin.isTTY && args.yes !== true) {
2362
+ // Interactive: print the preview, then ask. (When --yes was already given the
2363
+ // user pre-confirmed; we still print the preview below for transparency.)
2364
+ if (!args.json) console.log(renderSendPreview(preview, CURRENT_LOCALE));
2365
+ confirmed = await confirmSend(CURRENT_LOCALE);
2366
+ } else if (args.yes === true) {
2367
+ // Pre-confirmed via --yes (the non-TTY consent path, and an explicit opt-in on a
2368
+ // TTY). Still print the preview so the scope is shown before the send happens.
2369
+ if (!args.json) console.log(renderSendPreview(preview, CURRENT_LOCALE));
2370
+ confirmed = true;
2371
+ } else {
2372
+ // Non-interactive AND no --yes: refuse. Print the preview + the refusal so the
2373
+ // user sees what WOULD have been sent and how to consent. NOTHING is sent.
2374
+ if (!args.json) {
2375
+ console.log(renderSendPreview(preview, CURRENT_LOCALE));
2376
+ console.log(t("send.refusedNonTty", {}, CURRENT_LOCALE));
2377
+ }
2378
+ sendModel = {
2379
+ requested: true,
2380
+ sent: false,
2381
+ degraded: true,
2382
+ reason: "consent_required_non_tty",
2383
+ model: modelCmd,
2384
+ candidates: []
2385
+ };
2386
+ }
2387
+
2388
+ if (sendModel === null && !confirmed) {
2389
+ // Asked on a TTY and the user declined (or bare Enter). Default-deny: send nothing.
2390
+ if (!args.json) console.log(t("send.declined", {}, CURRENT_LOCALE));
2391
+ sendModel = {
2392
+ requested: true,
2393
+ sent: false,
2394
+ degraded: true,
2395
+ reason: "declined",
2396
+ model: modelCmd,
2397
+ candidates: []
2398
+ };
2399
+ } else if (sendModel === null && confirmed) {
2400
+ // CONFIRMED. Now run the external pass: assemble (redacted) -> ASSERT clean ->
2401
+ // call the (injectable) model -> parse. A test injects args._invokeModel so the
2402
+ // real model is NEVER spawned; production passes nothing (defaultInvoke shells out
2403
+ // to `claude` over stdin). Any failure degrades to local; nothing is fabricated.
2404
+ const pass = runExternalModelPass({
2405
+ snippets,
2406
+ modelCmd,
2407
+ ...(typeof args._invokeModel === "function" ? { invoke: args._invokeModel } : {})
2408
+ });
2409
+ sendModel = {
2410
+ requested: true,
2411
+ sent: pass.sent === true,
2412
+ degraded: pass.degraded === true,
2413
+ reason: pass.reason,
2414
+ model: modelCmd,
2415
+ // Each candidate is already low / proposed / source:"model" / displayedAsDone:false
2416
+ // from sendmodel.js — bootstrap does NOT write any of them anywhere (report-only).
2417
+ candidates: Array.isArray(pass.candidates) ? pass.candidates : [],
2418
+ dropped: pass.dropped ?? 0
2419
+ };
2420
+ }
2421
+ }
2422
+ }
2423
+
2424
+ // Compose the final text: the honest local report, then (only if requested) the
2425
+ // external-model section — candidates under the low-confidence/unverified caveat, OR a
2426
+ // plain "degraded to local" line, OR the dry-run payload. The local report is never
2427
+ // altered; the model section is strictly additive.
2428
+ let outText = localReportText;
2429
+ if (sendModel) {
2430
+ const extra = [""];
2431
+ if (sendModel.dryRun) {
2432
+ extra.push(t("send.dryRun.head", {}, CURRENT_LOCALE));
2433
+ extra.push(t("send.dryRun.note", {}, CURRENT_LOCALE));
2434
+ extra.push("");
2435
+ extra.push(sendModel.payload);
2436
+ } else if (sendModel.candidates.length > 0) {
2437
+ for (const line of renderModelCandidates(sendModel.candidates, CURRENT_LOCALE)) extra.push(line);
2438
+ } else {
2439
+ // Degraded / nothing returned: say so plainly (with the reason) and that the local
2440
+ // result above stands. Never silent, never a fake candidate. The reason is either a
2441
+ // known machine code (its own localized line) OR a free-form error string from the
2442
+ // model call (ENOENT / timeout / bad-shape) — those use the generic line with the
2443
+ // raw reason interpolated, so we never render a fallback key name at the user.
2444
+ const KNOWN_DEGRADE = new Set(["no_redacted_snippets", "consent_required_non_tty", "declined"]);
2445
+ if (KNOWN_DEGRADE.has(sendModel.reason)) {
2446
+ extra.push(t(`send.degraded.${sendModel.reason}`, { model: sendModel.model }, CURRENT_LOCALE));
2447
+ } else {
2448
+ extra.push(t("send.degraded.generic", { model: sendModel.model, reason: sendModel.reason ?? "unknown" }, CURRENT_LOCALE));
2449
+ }
2450
+ }
2451
+ outText = `${localReportText}\n${extra.join("\n")}`;
2452
+ }
2453
+
2454
+ emit(args, outText, {
2455
+ command: "bootstrap",
2456
+ ok: true,
2457
+ reportOnly: true,
2458
+ // The honest top-line: whether the user has any data of their own (seeds
2459
+ // excluded). A --json consumer can branch on this instead of re-deriving it.
2460
+ hasOwnData: model.hasOwnData,
2461
+ seedOnly: model.seedOnly,
2462
+ // Whether a local dialogue/log export was actually read, plus the transparency
2463
+ // record (which files, how many flagged snippets, what was skipped). null when no
2464
+ // connector was used, so the default --json payload is unchanged.
2465
+ dialogueUsed: model.dialogueUsed,
2466
+ dialogue: model.dialogue,
2467
+ workspaceRoot,
2468
+ repoRoot,
2469
+ counts: model.counts,
2470
+ scan: model.scan,
2471
+ cards: model.cards,
2472
+ // The external-model pass record. null unless --send-to-model / --dry-run-send was
2473
+ // given (so the default --json payload is unchanged). When present it states whether
2474
+ // anything was sent, whether it degraded + why, the model command, and the LOW-trust
2475
+ // proposed candidates (source:"model") — which are NEVER written to a ledger/profile.
2476
+ sendToModel: sendModel
2477
+ ? {
2478
+ requested: true,
2479
+ sent: sendModel.sent === true,
2480
+ degraded: sendModel.degraded === true,
2481
+ dryRun: sendModel.dryRun === true,
2482
+ reason: sendModel.reason ?? null,
2483
+ model: sendModel.model,
2484
+ candidates: sendModel.candidates,
2485
+ // Carried so a --json/dry-run consumer can audit the exact redacted payload.
2486
+ ...(sendModel.dryRun ? { payload: sendModel.payload } : {})
2487
+ }
2488
+ : null,
2489
+ // network is HONEST about the one possible egress: it says "external model contacted"
2490
+ // ONLY when a send actually succeeded (a model answered usefully — degraded:false). A
2491
+ // requested-but-degraded send (declined / no snippets / unreachable / bad output) did
2492
+ // NOT usefully contact a model, so it stays "not used" rather than overclaiming. With
2493
+ // no send requested at all it is plainly "not used".
2494
+ network: sendModel && sendModel.sent === true && sendModel.degraded !== true
2495
+ ? "external model contacted (redacted payload)"
2496
+ : "not used"
2497
+ });
2498
+ }
2499
+
2500
+ // Derive ONE actionable "next step" from the current ledger state, so a newcomer
2501
+ // reading status knows not just where they are but what to do next. Pure read over
2502
+ // already-loaded rows; returns { code, text, command }:
2503
+ // - code: a locale-STABLE machine enum (one per branch below). --json consumers
2504
+ // branch on this instead of string-matching the localized text. The set
2505
+ // is closed and stable: run_bootstrap | add_evidence | create_receipt |
2506
+ // accept_receipt | create_handoff | confirm_learning | none.
2507
+ // - text: the localized, human-readable line (varies by --lang).
2508
+ // - command: a real copy-pasteable CLI line (locale-stable English), or null for
2509
+ // the all-clear / nothing-actionable case.
2510
+ // Ordered most-actionable-first: an empty/seed-only workspace is nudged to put real
2511
+ // work in; an open task missing evidence/receipt is walked up the loop; a pending
2512
+ // pass_with_risk is nudged to acceptance or a stronger review; once work is real and
2513
+ // settled, a missing handoff draft or a keepable lesson is surfaced. Every suggested
2514
+ // command points at an EXISTING audited command — it never invents a flag.
2515
+ function deriveStatusNextStep({ perTask, learning, handoffDrafts, workspaceArg }) {
2516
+ // The --workspace suffix a copy-pasted command needs to target THIS workspace
2517
+ // (omitted when the user ran status against the default ./.aict).
2518
+ const wsSuffix = workspaceArg ? ` --workspace ${workspaceArg}` : "";
2519
+ const ownTasks = perTask.filter((t) => !t.isSeed);
2520
+
2521
+ // (1) No real task yet (brand-new or example-only workspace): get your own work in.
2522
+ if (ownTasks.length === 0) {
2523
+ return {
2524
+ code: "run_bootstrap",
2525
+ text: tr("status.next.noOwnWork.text"),
2526
+ command: `node bin/ai-collab.js bootstrap --yes${wsSuffix} # or: node bin/ai-collab.js task create --title "..."${wsSuffix}`
2527
+ };
2528
+ }
2529
+
2530
+ // Among your own tasks, walk the loop up from the least-finished state.
2531
+ // (2) An open task with NO evidence at all -> record what you ran.
2532
+ const noEvidence = ownTasks.find(
2533
+ (t) => t.status !== "done" && t.evidenceCount === 0
2534
+ );
2535
+ if (noEvidence) {
2536
+ return {
2537
+ code: "add_evidence",
2538
+ text: tr("status.next.noEvidence.text", { id: noEvidence.id }),
2539
+ command: `node bin/ai-collab.js run exec --task ${noEvidence.id} --command "..."${wsSuffix}`
2540
+ };
2541
+ }
2542
+
2543
+ // (3) An open task WITH evidence but NO receipt -> create the receipt.
2544
+ const noReceipt = ownTasks.find(
2545
+ (t) => t.status !== "done" && t.evidenceCount > 0 && !t.receipt
2546
+ );
2547
+ if (noReceipt) {
2548
+ return {
2549
+ code: "create_receipt",
2550
+ text: tr("status.next.noReceipt.text", { id: noReceipt.id }),
2551
+ command: `node bin/ai-collab.js receipt create --task ${noReceipt.id} --verdict pass_with_risk --review-mode self --evidence <id>${wsSuffix}`
2552
+ };
2553
+ }
2554
+
2555
+ // (4) A pending pass_with_risk receipt -> accept it (owner sign-off) or raise it.
2556
+ const pending = ownTasks.find(
2557
+ (t) => t.receipt && t.receipt.status === "pending"
2558
+ );
2559
+ if (pending) {
2560
+ return {
2561
+ code: "accept_receipt",
2562
+ text: tr("status.next.pending.text", { receiptId: pending.receipt.id, taskId: pending.id }),
2563
+ command: `node bin/ai-collab.js receipt accept --id ${pending.receipt.id} --owner you${wsSuffix}`
2564
+ };
2565
+ }
2566
+
2567
+ // (5) Real, settled work but NO handoff draft yet -> make one so the next
2568
+ // session/tool can resume without re-explaining.
2569
+ if (handoffDrafts.length === 0) {
2570
+ return {
2571
+ code: "create_handoff",
2572
+ text: tr("status.next.missingHandoff.text"),
2573
+ command: `node bin/ai-collab.js handoff create${wsSuffix}`
2574
+ };
2575
+ }
2576
+
2577
+ // (6) A proposed lesson of your own waiting to be kept -> confirm it so it
2578
+ // graduates into your profile. Skip the shipped example seed (l0).
2579
+ const keepableLearning = Array.isArray(learning)
2580
+ ? learning.find(
2581
+ (row) => row && row.status === "proposed" && !isSeedRow(row, "learning")
2582
+ )
2583
+ : null;
2584
+ if (keepableLearning) {
2585
+ return {
2586
+ code: "confirm_learning",
2587
+ text: tr("status.next.keepLesson.text", { id: keepableLearning.id }),
2588
+ command: `node bin/ai-collab.js learning confirm --id ${keepableLearning.id}${wsSuffix}`
2589
+ };
2590
+ }
2591
+
2592
+ // (7) Nothing outstanding: the loop is closed for now.
2593
+ return {
2594
+ code: "none",
2595
+ text: tr("status.next.allClear.text"),
2596
+ command: null
2597
+ };
2598
+ }
2599
+
2600
+ function statusCommand(args) {
2601
+ const stateDir = resolveStateDir(args.workspace);
2602
+ const tasks = readLedger(stateDir, "tasks");
2603
+ const evidence = readLedger(stateDir, "evidence");
2604
+ const runs = readLedger(stateDir, "runs");
2605
+ const receipts = readLedger(stateDir, "receipts");
2606
+ const learning = readLedger(stateDir, "learning");
2607
+
2608
+ const taskStatus = countBy(tasks, "status");
2609
+ const runStatus = countBy(runs, "status");
2610
+ const receiptStatus = countBy(receipts, "status");
2611
+
2612
+ // Per-task achievement summary: join each task to its receipts/evidence/runs so
2613
+ // status shows WHAT was earned on WHICH task, not just totals (the getitback
2614
+ // goal — a user who ran one chain should see the receipt they earned). Pure
2615
+ // aggregation over already-validated rows; it computes no guard levels itself.
2616
+ const perTask = summarizeTasks(tasks, receipts, evidence, runs);
2617
+
2618
+ // Seed-honesty notes on the TOP counters. A brand-new workspace ships one
2619
+ // example task/receipt/run plus two evidence rows; without these notes the
2620
+ // counters read like real progress (esp. "Receipts: 1 [accepted=1]", which
2621
+ // looks like an already-verified result). Each note names how many of that
2622
+ // counter's rows are the shipped seed, symmetric to the Tasks note, so the
2623
+ // total stays honest (real counts unchanged — we only annotate). countSeedRows
2624
+ // reuses isSeedRow, so "what is a seed" stays defined in exactly one place.
2625
+ const seedTaskCount = countSeedRows(tasks, "tasks");
2626
+ const seedReceiptCount = countSeedRows(receipts, "receipts");
2627
+ const seedEvidenceCount = countSeedRows(evidence, "evidence");
2628
+ const seedRunCount = countSeedRows(runs, "runs");
2629
+ const seedCountNote = (count) =>
2630
+ count > 0
2631
+ ? tr("status.seedNote.generic", { count, plural: count === 1 ? "" : "s" })
2632
+ : "";
2633
+ // The Tasks note keeps the extra call-to-action (the task is the row the user
2634
+ // deletes to clear the whole seed set); the other counters share the plain note.
2635
+ const seedNote =
2636
+ seedTaskCount > 0
2637
+ ? tr("status.seedNote.tasks", { count: seedTaskCount, plural: seedTaskCount === 1 ? "" : "s" })
2638
+ : "";
2639
+ const seedReceiptNote = seedCountNote(seedReceiptCount);
2640
+ const seedEvidenceNote = seedCountNote(seedEvidenceCount);
2641
+ const seedRunNote = seedCountNote(seedRunCount);
2642
+ const taskSummaryBlock =
2643
+ perTask.length > 0
2644
+ ? `\n${tr("status.yourTasks")}\n${perTask.map(formatTaskSummaryLine).join("\n")}\n`
2645
+ : "";
2646
+
2647
+ // Handoff drafts already generated for this workspace (handoff create output),
2648
+ // so status nudges a user toward an existing resume note instead of starting
2649
+ // from zero. One line naming the most recent draft + the total count; omitted
2650
+ // entirely when none exist (no noise on a workspace that never ran the command).
2651
+ const handoffDrafts = listHandoffDrafts(path.dirname(stateDir));
2652
+ const handoffLine =
2653
+ handoffDrafts.length > 0
2654
+ ? `\n${tr("status.handoffLine", { count: handoffDrafts.length, latest: handoffDrafts[0] })}\n`
2655
+ : "";
2656
+
2657
+ // "Most recent activity" = the highest createdAt/startedAt across the ledgers,
2658
+ // shown as a single line so status is a quick human glance, not a dashboard.
2659
+ const stamps = [
2660
+ ...tasks.map((t) => t.createdAt),
2661
+ ...evidence.map((e) => e.createdAt),
2662
+ ...runs.map((r) => r.finishedAt ?? r.startedAt),
2663
+ ...receipts.map((c) => c.createdAt)
2664
+ ].filter(Boolean).sort();
2665
+ const latest = stamps.length > 0 ? stamps[stamps.length - 1] : tr("status.noActivity");
2666
+
2667
+ // P4 recall: echo back the ONE standing preference the user most recently kept
2668
+ // (a confirmed/edited profile-type learning row), so the tool feels like it is
2669
+ // carrying forward how you work. Deliberately one line, not a dump — if nothing
2670
+ // has been confirmed yet, the line is omitted entirely (no noise on an empty or
2671
+ // brand-new workspace). The seed ships a single "proposed" row, which does NOT
2672
+ // qualify, so a fresh workspace shows no carry-forward line until the user
2673
+ // actually confirms a preference.
2674
+ const carriedPreference = latestConfirmedProfileLearning(learning);
2675
+ const carryLine = carriedPreference
2676
+ ? `\n${tr("status.carryLine", { content: carriedPreference.content })}\n`
2677
+ : "";
2678
+
2679
+ // P4 recall (harvest twin): the SAME echo for the most recently kept HARVEST
2680
+ // lesson. Before this, a confirmed harvest row had nowhere to surface (only
2681
+ // profile rows were recalled), so a lesson the user kept silently disappeared.
2682
+ // Symmetric to the preference line — one line, omitted when none is kept, and a
2683
+ // proposed seed harvest row (l0) never qualifies. The two recalls are separate
2684
+ // lines so a harvest lesson never poses as a standing preference (and vice versa).
2685
+ const carriedHarvestLesson = latestConfirmedHarvestLearning(learning);
2686
+ const harvestLine = carriedHarvestLesson
2687
+ ? `\n${tr("status.harvestLine", { content: carriedHarvestLesson.content })}\n`
2688
+ : "";
2689
+
2690
+ // One plain-language "what to do next" line, derived from the current state, so a
2691
+ // newcomer who runs status knows their next move (not just the counters). The
2692
+ // suggested command is always an existing audited CLI line; it is printed on its
2693
+ // own line so it is copy-pasteable, and carried in --json as { text, command }.
2694
+ const nextStep = deriveStatusNextStep({
2695
+ perTask,
2696
+ learning,
2697
+ handoffDrafts,
2698
+ workspaceArg: args.workspace
2699
+ });
2700
+ const nextStepBlock = nextStep.command
2701
+ ? `\n${tr("status.nextStep.withCommand", { text: nextStep.text, command: nextStep.command })}\n`
2702
+ : `\n${tr("status.nextStep.textOnly", { text: nextStep.text })}\n`;
2703
+
2704
+ emit(args, `${tr("status.title")}
2705
+ ${tr("status.state", { stateDir })}
2706
+
2707
+ ${tr("status.tasks", { count: tasks.length, breakdown: formatCounts(taskStatus), seedNote })}
2708
+ ${tr("status.evidence", { count: evidence.length, seedNote: seedEvidenceNote })}
2709
+ ${tr("status.runs", { count: runs.length, breakdown: formatCounts(runStatus), seedNote: seedRunNote })}
2710
+ ${tr("status.receipts", { count: receipts.length, breakdown: formatCounts(receiptStatus), seedNote: seedReceiptNote })}
2711
+ ${tr("status.learning", { count: learning.length })}
2712
+ ${taskSummaryBlock}${carryLine}${harvestLine}${handoffLine}
2713
+ ${tr("status.mostRecentActivity", { latest })}
2714
+ ${nextStepBlock}`, {
2715
+ command: "status",
2716
+ ok: true,
2717
+ stateDir,
2718
+ // Handoff draft files already generated for this workspace (newest-first), so
2719
+ // a --json integration sees the same resume notes the text line surfaces.
2720
+ handoffDrafts,
2721
+ counts: {
2722
+ tasks: tasks.length,
2723
+ evidence: evidence.length,
2724
+ runs: runs.length,
2725
+ receipts: receipts.length,
2726
+ learning: learning.length
2727
+ },
2728
+ taskStatus,
2729
+ runStatus,
2730
+ receiptStatus,
2731
+ // Structured per-task achievement data so a tool integrating via --json gets
2732
+ // the same join (title + strongest receipt + evidence/run counts + seed flag).
2733
+ perTask,
2734
+ // How many of each ledger's rows are the shipped example seed (so an
2735
+ // integration can subtract them from "real" progress the same way the human
2736
+ // notes on the top counters do). seedTaskCount kept for back-compat; the
2737
+ // receipts/evidence/runs counts are symmetric additions so --json carries the
2738
+ // same seed honesty the text counters now show.
2739
+ seedTaskCount,
2740
+ seedReceiptCount,
2741
+ seedEvidenceCount,
2742
+ seedRunCount,
2743
+ // The kept preference echoed back this run (null when none confirmed yet), so
2744
+ // a tool integrating via --json can surface the same single carry-forward.
2745
+ carriedPreference: carriedPreference
2746
+ ? { id: carriedPreference.id, content: carriedPreference.content, status: carriedPreference.status }
2747
+ : null,
2748
+ // The kept harvest lesson echoed back this run (null when none), symmetric to
2749
+ // carriedPreference so both recalls are available to an integrating tool.
2750
+ carriedHarvestLesson: carriedHarvestLesson
2751
+ ? { id: carriedHarvestLesson.id, content: carriedHarvestLesson.content, status: carriedHarvestLesson.status }
2752
+ : null,
2753
+ // The single derived "what to do next", so a --json consumer gets the same
2754
+ // guidance the text surface prints on its Next step line:
2755
+ // - code: a locale-STABLE machine enum (identical under --lang en / zh);
2756
+ // branch on this, not the localized text.
2757
+ // - text: the localized human line (varies by language).
2758
+ // - command: the copy-pasteable CLI line (locale-stable), null when nothing
2759
+ // is outstanding.
2760
+ nextStep: { code: nextStep.code, text: nextStep.text, command: nextStep.command },
2761
+ mostRecentActivity: latest
2762
+ });
2763
+ }
2764
+
2765
+ function printVersion() {
2766
+ let version = "unknown";
2767
+ try {
2768
+ version = JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8")).version;
2769
+ } catch {
2770
+ /* fall through to "unknown" — version is informational, never load-bearing */
2771
+ }
2772
+ console.log(version);
2773
+ }
2774
+
2775
+ // Every top-level command word the CLI dispatches. Used both to validate an
2776
+ // unknown command and to suggest the closest match on a typo.
2777
+ const TOP_LEVEL_COMMANDS = [
2778
+ "init", "welcome", "guide", "demo", "check", "adapters", "task", "evidence",
2779
+ "run", "receipt", "learning", "status", "capability", "handoff", "bootstrap", "help", "version"
2780
+ ];
2781
+
2782
+ // Classic Levenshtein edit distance — small, dependency-free, only ever run on two
2783
+ // short command words, so the simple O(m*n) table is fine. Used to turn a typo into
2784
+ // a "Did you mean 'x'?" hint instead of dumping the whole reference.
2785
+ function editDistance(a, b) {
2786
+ const rows = a.length + 1;
2787
+ const cols = b.length + 1;
2788
+ const dist = Array.from({ length: rows }, () => new Array(cols).fill(0));
2789
+ for (let i = 0; i < rows; i += 1) dist[i][0] = i;
2790
+ for (let j = 0; j < cols; j += 1) dist[0][j] = j;
2791
+ for (let i = 1; i < rows; i += 1) {
2792
+ for (let j = 1; j < cols; j += 1) {
2793
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
2794
+ dist[i][j] = Math.min(
2795
+ dist[i - 1][j] + 1, // deletion
2796
+ dist[i][j - 1] + 1, // insertion
2797
+ dist[i - 1][j - 1] + cost // substitution
2798
+ );
2799
+ }
2800
+ }
2801
+ return dist[a.length][b.length];
2802
+ }
2803
+
2804
+ // Suggest the closest known command for a typo. A prefix match wins outright (e.g.
2805
+ // "stat" -> "status"); otherwise the nearest by edit distance, but only when it is
2806
+ // close enough to be a plausible typo (<= 3 edits AND under half the word length),
2807
+ // so a wild word like "bogus" suggests nothing rather than a misleading guess.
2808
+ function closestCommand(input) {
2809
+ const prefix = TOP_LEVEL_COMMANDS.find((cmd) => cmd.startsWith(input) || input.startsWith(cmd));
2810
+ if (prefix) return prefix;
2811
+ let best = null;
2812
+ let bestDistance = Infinity;
2813
+ for (const cmd of TOP_LEVEL_COMMANDS) {
2814
+ const distance = editDistance(input, cmd);
2815
+ if (distance < bestDistance) {
2816
+ best = cmd;
2817
+ bestDistance = distance;
2818
+ }
2819
+ }
2820
+ if (best && bestDistance <= 3 && bestDistance < Math.max(input.length, best.length) / 2 + 1) {
2821
+ return best;
2822
+ }
2823
+ return null;
2824
+ }
2825
+
2826
+ async function main() {
2827
+ // Resolve the run's locale FIRST, before parseArgs can throw, so even a
2828
+ // parseArgs / dispatch error renders in the user's language. We read --lang
2829
+ // straight from raw argv (the token after a bare "--lang"), then let resolveLocale
2830
+ // apply the full precedence ladder: flag > AI_COLLAB_LANG > OS locale > 'en'.
2831
+ const rawArgv = process.argv.slice(2);
2832
+ const langIdx = rawArgv.indexOf("--lang");
2833
+ const langFlag = langIdx >= 0 ? rawArgv[langIdx + 1] : undefined;
2834
+ CURRENT_LOCALE = resolveLocale({ langFlag, env: process.env });
2835
+ try {
2836
+ const args = parseArgs(rawArgv);
2837
+ // A --version/-v or --help/-h token ANYWHERE short-circuits before dispatch: so
2838
+ // `<command> --help` always shows help with no command side effects (previously
2839
+ // `demo --help` ran the demo and wrote a throwaway workspace), and --version works
2840
+ // regardless of position.
2841
+ if (args.version) { printVersion(); return; }
2842
+ // `--help levels` (the guard-level word riding alongside the --help flag) prints ONLY
2843
+ // the L0-L4 reference, which printHelp() now sinks out of its first screen. Any other
2844
+ // `--help` (bare, or `<command> --help`) prints the main reference.
2845
+ if (args.help) {
2846
+ if (args._[0] === "levels") printLevelsHelp();
2847
+ else printHelp();
2848
+ return;
2849
+ }
2850
+
2851
+ // No command at all (bare `node bin/ai-collab.js`) is a NEW user's first
2852
+ // contact: show the short quickstart, NOT the full --help reference (which
2853
+ // includes the L0-L4 theory and buries how to start). The explicit `help`
2854
+ // command and the --help flag still print the full reference below.
2855
+ const command = args._[0];
2856
+ if (command === undefined) { printQuickstart(); return; }
2857
+
2858
+ if (command === "init") init(args);
2859
+ else if (command === "welcome") welcome(args);
2860
+ else if (command === "guide") guide(args);
2861
+ else if (command === "demo") demo(args);
2862
+ else if (command === "check") check(args);
2863
+ else if (command === "adapters") adapters(args);
2864
+ else if (command === "task") taskCommand(args);
2865
+ else if (command === "evidence") evidenceCommand(args);
2866
+ else if (command === "run") await runCommand(args);
2867
+ else if (command === "receipt") receiptCommand(args);
2868
+ else if (command === "learning") learningCommand(args);
2869
+ else if (command === "status") statusCommand(args);
2870
+ else if (command === "capability") capabilityCommand(args);
2871
+ else if (command === "handoff") handoffCommand(args);
2872
+ else if (command === "bootstrap") await bootstrapCommand(args);
2873
+ // `version` (bare subcommand) is an alias for --version, so a user who types
2874
+ // the word instead of the flag gets the version, not "Unknown command".
2875
+ else if (command === "version") printVersion();
2876
+ else if (command === "help" || command === "--help" || command === "-h") {
2877
+ // `help levels` is the bare-subcommand twin of `--help levels`: print only the
2878
+ // L0-L4 ladder. Plain `help` prints the full reference.
2879
+ if (args._[1] === "levels") printLevelsHelp();
2880
+ else printHelp();
2881
+ }
2882
+ else {
2883
+ // A typo no longer dumps the whole 100+ line reference. Give 1-2 lines: the
2884
+ // unknown word, the closest known command if there is a plausible one, and
2885
+ // where to get the full list.
2886
+ const suggestion = closestCommand(command);
2887
+ console.error(`Unknown command: ${command}`);
2888
+ if (suggestion) console.error(`Did you mean '${suggestion}'?`);
2889
+ console.error("Run 'node bin/ai-collab.js --help' for all commands.");
2890
+ process.exitCode = 1;
2891
+ }
2892
+ } catch (error) {
2893
+ console.error(error.message);
2894
+ process.exitCode = 1;
2895
+ }
2896
+ }
2897
+
2898
+ main();
2899
+