ai-collab-open-system 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. package/.aict/START_HERE.md +127 -0
  2. package/.aict/WORKSPACE_MANIFEST.json +91 -0
  3. package/.aict/acceptance/EXAMPLE.synthetic.md +49 -0
  4. package/.aict/acceptance/FAILURE_MODES.md +40 -0
  5. package/.aict/acceptance/PROMPT.md +47 -0
  6. package/.aict/acceptance/README.md +44 -0
  7. package/.aict/acceptance/TEMPLATE.md +57 -0
  8. package/.aict/adapters/SHARED_CORE_CONTRACT.md +106 -0
  9. package/.aict/adapters/claude-code/ADAPTER.md +28 -0
  10. package/.aict/adapters/cline/ADAPTER.md +28 -0
  11. package/.aict/adapters/codex/ADAPTER.md +28 -0
  12. package/.aict/adapters/copilot/ADAPTER.md +28 -0
  13. package/.aict/adapters/cursor/ADAPTER.md +28 -0
  14. package/.aict/adapters/windsurf/ADAPTER.md +28 -0
  15. package/.aict/context/EXAMPLE.synthetic.md +53 -0
  16. package/.aict/context/FAILURE_MODES.md +40 -0
  17. package/.aict/context/PROMPT.md +47 -0
  18. package/.aict/context/README.md +44 -0
  19. package/.aict/context/TEMPLATE.md +63 -0
  20. package/.aict/cookbook/README.md +8 -0
  21. package/.aict/cookbook/bridge-to-a-second-family.md +103 -0
  22. package/.aict/cookbook/connect-a-tool.md +67 -0
  23. package/.aict/cookbook/review-a-half-product.md +79 -0
  24. package/.aict/cookbook/run-a-first-loop.md +81 -0
  25. package/.aict/examples/README.md +21 -0
  26. package/.aict/examples/ai-coding-long-task/CASE.md +161 -0
  27. package/.aict/examples/ai-coding-long-task/artifacts/acceptance-card.md +36 -0
  28. package/.aict/examples/ai-coding-long-task/artifacts/context-package.md +30 -0
  29. package/.aict/examples/ai-coding-long-task/artifacts/execution-prompt.md +30 -0
  30. package/.aict/examples/ai-coding-long-task/artifacts/first-ai-output.md +109 -0
  31. package/.aict/examples/ai-coding-long-task/artifacts/guard-review.md +40 -0
  32. package/.aict/examples/ai-coding-long-task/artifacts/handoff-note.md +28 -0
  33. package/.aict/examples/ai-coding-long-task/artifacts/harvest-seed.md +28 -0
  34. package/.aict/examples/ai-coding-long-task/artifacts/revised-output.md +62 -0
  35. package/.aict/examples/content-production-harvest/CASE.md +87 -0
  36. package/.aict/examples/content-production-harvest/artifacts/acceptance-card.md +28 -0
  37. package/.aict/examples/content-production-harvest/artifacts/context-package.md +28 -0
  38. package/.aict/examples/content-production-harvest/artifacts/execution-prompt.md +30 -0
  39. package/.aict/examples/content-production-harvest/artifacts/guard-review.md +28 -0
  40. package/.aict/examples/content-production-harvest/artifacts/handoff-note.md +28 -0
  41. package/.aict/examples/content-production-harvest/artifacts/harvest-seed.md +28 -0
  42. package/.aict/examples/multi-tool-collaboration/CASE.md +87 -0
  43. package/.aict/examples/multi-tool-collaboration/artifacts/acceptance-card.md +28 -0
  44. package/.aict/examples/multi-tool-collaboration/artifacts/context-package.md +28 -0
  45. package/.aict/examples/multi-tool-collaboration/artifacts/execution-prompt.md +30 -0
  46. package/.aict/examples/multi-tool-collaboration/artifacts/guard-review.md +28 -0
  47. package/.aict/examples/multi-tool-collaboration/artifacts/handoff-note.md +28 -0
  48. package/.aict/examples/multi-tool-collaboration/artifacts/harvest-seed.md +28 -0
  49. package/.aict/examples/personal-judgment-growth-assistant/CASE.md +87 -0
  50. package/.aict/examples/personal-judgment-growth-assistant/artifacts/acceptance-card.md +28 -0
  51. package/.aict/examples/personal-judgment-growth-assistant/artifacts/context-package.md +28 -0
  52. package/.aict/examples/personal-judgment-growth-assistant/artifacts/execution-prompt.md +30 -0
  53. package/.aict/examples/personal-judgment-growth-assistant/artifacts/guard-review.md +28 -0
  54. package/.aict/examples/personal-judgment-growth-assistant/artifacts/handoff-note.md +28 -0
  55. package/.aict/examples/personal-judgment-growth-assistant/artifacts/harvest-seed.md +28 -0
  56. package/.aict/examples/research-knowledge-synthesis/CASE.md +87 -0
  57. package/.aict/examples/research-knowledge-synthesis/artifacts/acceptance-card.md +28 -0
  58. package/.aict/examples/research-knowledge-synthesis/artifacts/context-package.md +28 -0
  59. package/.aict/examples/research-knowledge-synthesis/artifacts/execution-prompt.md +30 -0
  60. package/.aict/examples/research-knowledge-synthesis/artifacts/guard-review.md +28 -0
  61. package/.aict/examples/research-knowledge-synthesis/artifacts/handoff-note.md +28 -0
  62. package/.aict/examples/research-knowledge-synthesis/artifacts/harvest-seed.md +28 -0
  63. package/.aict/guard/EXAMPLE.synthetic.md +51 -0
  64. package/.aict/guard/FAILURE_MODES.md +40 -0
  65. package/.aict/guard/PROMPT.md +47 -0
  66. package/.aict/guard/README.md +44 -0
  67. package/.aict/guard/TEMPLATE.md +60 -0
  68. package/.aict/handoff/EXAMPLE.synthetic.md +51 -0
  69. package/.aict/handoff/FAILURE_MODES.md +40 -0
  70. package/.aict/handoff/PROMPT.md +47 -0
  71. package/.aict/handoff/README.md +44 -0
  72. package/.aict/handoff/TEMPLATE.md +60 -0
  73. package/.aict/harvest/EXAMPLE.synthetic.md +51 -0
  74. package/.aict/harvest/FAILURE_MODES.md +40 -0
  75. package/.aict/harvest/PROMPT.md +47 -0
  76. package/.aict/harvest/README.md +44 -0
  77. package/.aict/harvest/TEMPLATE.md +60 -0
  78. package/.aict/mechanisms/README.md +34 -0
  79. package/.aict/mechanisms/anti-drift-partner/EXAMPLE.synthetic.md +46 -0
  80. package/.aict/mechanisms/anti-drift-partner/FAILURE_MODES.md +25 -0
  81. package/.aict/mechanisms/anti-drift-partner/PROMPT.md +75 -0
  82. package/.aict/mechanisms/anti-drift-partner/README.md +82 -0
  83. package/.aict/mechanisms/anti-drift-partner/TEMPLATE.md +74 -0
  84. package/.aict/mechanisms/blind-spot-scan/EXAMPLE.synthetic.md +39 -0
  85. package/.aict/mechanisms/blind-spot-scan/FAILURE_MODES.md +25 -0
  86. package/.aict/mechanisms/blind-spot-scan/PROMPT.md +72 -0
  87. package/.aict/mechanisms/blind-spot-scan/README.md +79 -0
  88. package/.aict/mechanisms/blind-spot-scan/TEMPLATE.md +70 -0
  89. package/.aict/mechanisms/collaboration-coach/EXAMPLE.synthetic.md +40 -0
  90. package/.aict/mechanisms/collaboration-coach/FAILURE_MODES.md +25 -0
  91. package/.aict/mechanisms/collaboration-coach/PROMPT.md +72 -0
  92. package/.aict/mechanisms/collaboration-coach/README.md +79 -0
  93. package/.aict/mechanisms/collaboration-coach/TEMPLATE.md +61 -0
  94. package/.aict/mechanisms/do-not-handle-yet/EXAMPLE.synthetic.md +15 -0
  95. package/.aict/mechanisms/do-not-handle-yet/FAILURE_MODES.md +16 -0
  96. package/.aict/mechanisms/do-not-handle-yet/PROMPT.md +41 -0
  97. package/.aict/mechanisms/do-not-handle-yet/README.md +30 -0
  98. package/.aict/mechanisms/do-not-handle-yet/TEMPLATE.md +38 -0
  99. package/.aict/mechanisms/dual-guard/EXAMPLE.synthetic.md +54 -0
  100. package/.aict/mechanisms/dual-guard/FAILURE_MODES.md +25 -0
  101. package/.aict/mechanisms/dual-guard/PROMPT.md +76 -0
  102. package/.aict/mechanisms/dual-guard/README.md +81 -0
  103. package/.aict/mechanisms/dual-guard/TEMPLATE.md +73 -0
  104. package/.aict/mechanisms/feedback-absorption-ledger/EXAMPLE.synthetic.md +49 -0
  105. package/.aict/mechanisms/feedback-absorption-ledger/FAILURE_MODES.md +25 -0
  106. package/.aict/mechanisms/feedback-absorption-ledger/PROMPT.md +74 -0
  107. package/.aict/mechanisms/feedback-absorption-ledger/README.md +81 -0
  108. package/.aict/mechanisms/feedback-absorption-ledger/TEMPLATE.md +69 -0
  109. package/.aict/mechanisms/half-product-review/EXAMPLE.synthetic.md +15 -0
  110. package/.aict/mechanisms/half-product-review/FAILURE_MODES.md +16 -0
  111. package/.aict/mechanisms/half-product-review/PROMPT.md +41 -0
  112. package/.aict/mechanisms/half-product-review/README.md +30 -0
  113. package/.aict/mechanisms/half-product-review/TEMPLATE.md +38 -0
  114. package/.aict/mechanisms/handoff-abc/EXAMPLE.synthetic.md +47 -0
  115. package/.aict/mechanisms/handoff-abc/FAILURE_MODES.md +25 -0
  116. package/.aict/mechanisms/handoff-abc/PROMPT.md +75 -0
  117. package/.aict/mechanisms/handoff-abc/README.md +82 -0
  118. package/.aict/mechanisms/handoff-abc/TEMPLATE.md +60 -0
  119. package/.aict/mechanisms/harvest-and-erc/EXAMPLE.synthetic.md +43 -0
  120. package/.aict/mechanisms/harvest-and-erc/FAILURE_MODES.md +25 -0
  121. package/.aict/mechanisms/harvest-and-erc/PROMPT.md +74 -0
  122. package/.aict/mechanisms/harvest-and-erc/README.md +81 -0
  123. package/.aict/mechanisms/harvest-and-erc/TEMPLATE.md +60 -0
  124. package/.aict/mechanisms/honest-calibration/EXAMPLE.synthetic.md +43 -0
  125. package/.aict/mechanisms/honest-calibration/FAILURE_MODES.md +25 -0
  126. package/.aict/mechanisms/honest-calibration/PROMPT.md +74 -0
  127. package/.aict/mechanisms/honest-calibration/README.md +81 -0
  128. package/.aict/mechanisms/honest-calibration/TEMPLATE.md +66 -0
  129. package/.aict/mechanisms/one-click-dispatch/EXAMPLE.synthetic.md +15 -0
  130. package/.aict/mechanisms/one-click-dispatch/FAILURE_MODES.md +16 -0
  131. package/.aict/mechanisms/one-click-dispatch/PROMPT.md +41 -0
  132. package/.aict/mechanisms/one-click-dispatch/README.md +30 -0
  133. package/.aict/mechanisms/one-click-dispatch/TEMPLATE.md +38 -0
  134. package/.aict/mechanisms/plain-language-first-screen/EXAMPLE.synthetic.md +15 -0
  135. package/.aict/mechanisms/plain-language-first-screen/FAILURE_MODES.md +16 -0
  136. package/.aict/mechanisms/plain-language-first-screen/PROMPT.md +41 -0
  137. package/.aict/mechanisms/plain-language-first-screen/README.md +30 -0
  138. package/.aict/mechanisms/plain-language-first-screen/TEMPLATE.md +38 -0
  139. package/.aict/mechanisms/root-cause-brake/EXAMPLE.synthetic.md +55 -0
  140. package/.aict/mechanisms/root-cause-brake/FAILURE_MODES.md +25 -0
  141. package/.aict/mechanisms/root-cause-brake/PROMPT.md +73 -0
  142. package/.aict/mechanisms/root-cause-brake/README.md +79 -0
  143. package/.aict/mechanisms/root-cause-brake/TEMPLATE.md +74 -0
  144. package/.aict/mechanisms/scout-review-controller/EXAMPLE.synthetic.md +15 -0
  145. package/.aict/mechanisms/scout-review-controller/FAILURE_MODES.md +16 -0
  146. package/.aict/mechanisms/scout-review-controller/PROMPT.md +41 -0
  147. package/.aict/mechanisms/scout-review-controller/README.md +30 -0
  148. package/.aict/mechanisms/scout-review-controller/TEMPLATE.md +38 -0
  149. package/.aict/mechanisms/single-tool-guard/EXAMPLE.synthetic.md +54 -0
  150. package/.aict/mechanisms/single-tool-guard/FAILURE_MODES.md +25 -0
  151. package/.aict/mechanisms/single-tool-guard/PROMPT.md +76 -0
  152. package/.aict/mechanisms/single-tool-guard/README.md +83 -0
  153. package/.aict/mechanisms/single-tool-guard/TEMPLATE.md +75 -0
  154. package/.aict/mechanisms/task-splitting/EXAMPLE.synthetic.md +53 -0
  155. package/.aict/mechanisms/task-splitting/FAILURE_MODES.md +25 -0
  156. package/.aict/mechanisms/task-splitting/PROMPT.md +72 -0
  157. package/.aict/mechanisms/task-splitting/README.md +79 -0
  158. package/.aict/mechanisms/task-splitting/TEMPLATE.md +76 -0
  159. package/.aict/modes/README.md +11 -0
  160. package/.aict/modes/execute.md +31 -0
  161. package/.aict/modes/handoff.md +29 -0
  162. package/.aict/modes/harvest.md +30 -0
  163. package/.aict/modes/review.md +28 -0
  164. package/.aict/modes/shape.md +34 -0
  165. package/.aict/privacy/COMMERCIAL_BOUNDARY.md +34 -0
  166. package/.aict/privacy/PRIVACY.md +36 -0
  167. package/.aict/privacy/REDACTION_CHECKLIST.md +12 -0
  168. package/.aict/profile/CANDIDATES.md +44 -0
  169. package/.aict/profile/EXAMPLE.synthetic.md +49 -0
  170. package/.aict/profile/FAILURE_MODES.md +40 -0
  171. package/.aict/profile/PROMPT.md +47 -0
  172. package/.aict/profile/README.md +44 -0
  173. package/.aict/profile/TEMPLATE.md +57 -0
  174. package/.aict/prompts/acceptance-definition.md +109 -0
  175. package/.aict/prompts/guard-review.md +116 -0
  176. package/.aict/prompts/handoff-generation.md +110 -0
  177. package/.aict/prompts/harvest-extraction.md +110 -0
  178. package/.aict/prompts/mode-switching.md +66 -0
  179. package/.aict/prompts/profile-creation.md +66 -0
  180. package/.aict/prompts/profile-refinement.md +66 -0
  181. package/.aict/prompts/project-context-packaging.md +113 -0
  182. package/.aict/prompts/red-team-challenge.md +106 -0
  183. package/.aict/prompts/rule-update-proposal.md +114 -0
  184. package/.aict/prompts/workflow-reset.md +109 -0
  185. package/.aict/roles/README.md +18 -0
  186. package/.aict/roles/executor.md +34 -0
  187. package/.aict/roles/harvester.md +33 -0
  188. package/.aict/roles/owner-controller.md +38 -0
  189. package/.aict/roles/scout.md +33 -0
  190. package/.aict/roles/supervisor.md +34 -0
  191. package/.aict/roles/system-guardian.md +34 -0
  192. package/.aict/skills/acceptance/SKILL.md +43 -0
  193. package/.aict/skills/context/SKILL.md +44 -0
  194. package/.aict/skills/evidence-pack/SKILL.md +42 -0
  195. package/.aict/skills/guard/SKILL.md +46 -0
  196. package/.aict/skills/handoff/SKILL.md +44 -0
  197. package/.aict/skills/harvest/SKILL.md +44 -0
  198. package/.aict/skills/mode-switch/SKILL.md +42 -0
  199. package/.aict/skills/profile/SKILL.md +42 -0
  200. package/.aict/skills/red-team/SKILL.md +42 -0
  201. package/.aict/skills/single-tool-guard/SKILL.md +42 -0
  202. package/.aict/state/CURRENT_STATE.md +13 -0
  203. package/.aict/state/DECISIONS.md +7 -0
  204. package/.aict/state/TASK_LOG.md +7 -0
  205. package/.aict/state/evidence.jsonl +2 -0
  206. package/.aict/state/learning-ledger.jsonl +1 -0
  207. package/.aict/state/receipts.jsonl +1 -0
  208. package/.aict/state/runs.jsonl +1 -0
  209. package/.aict/state/tasks.jsonl +1 -0
  210. package/.aict/walkthroughs/10-minute-your-task.md +107 -0
  211. package/.aict/walkthroughs/10-minute.md +43 -0
  212. package/.aict/walkthroughs/30-minute.md +22 -0
  213. package/.aict/walkthroughs/60-minute.md +27 -0
  214. package/.aict/walkthroughs/synthetic-loop-transcript.md +43 -0
  215. package/CHANGELOG.md +23 -0
  216. package/CODE_OF_CONDUCT.md +20 -0
  217. package/CONTRIBUTING.md +30 -0
  218. package/KNOWN_LIMITATIONS.md +54 -0
  219. package/LICENSE +199 -0
  220. package/PRODUCT_CONTRACT.md +446 -0
  221. package/README.md +245 -0
  222. package/RELEASE_CHECKLIST.md +78 -0
  223. package/SECURITY.md +56 -0
  224. package/START_HERE.md +89 -0
  225. package/bin/ai-collab.js +2 -0
  226. package/docs/DOGFOOD.md +85 -0
  227. package/docs/FEEDBACK.md +61 -0
  228. package/docs/FIRST_EXPERIENCE_SPEC.md +32 -0
  229. package/docs/FREE_VS_PAID.md +53 -0
  230. package/docs/PUBLIC_BOUNDARY.md +36 -0
  231. package/docs/PUBLIC_MAPPING.md +178 -0
  232. package/docs/RELEASE_PRIORITY.md +23 -0
  233. package/docs/WHY_THIS_EXISTS.md +36 -0
  234. package/docs/open-system/00-start-here.md +60 -0
  235. package/docs/open-system/01-ai-collaboration-os.md +33 -0
  236. package/docs/open-system/02-six-layer-architecture.md +45 -0
  237. package/docs/open-system/03-role-system.md +33 -0
  238. package/docs/open-system/04-core-mechanisms.md +34 -0
  239. package/docs/open-system/05-failure-patterns.md +31 -0
  240. package/docs/open-system/06-how-to-adapt-to-your-workflow.md +31 -0
  241. package/package.json +69 -0
  242. package/privacy-manifest.json +78 -0
  243. package/privacy-scan.local.json.example +18 -0
  244. package/scripts/lib/forbidden-in-pack.js +55 -0
  245. package/scripts/pack-check.js +154 -0
  246. package/scripts/privacy-scan.js +487 -0
  247. package/scripts/validate-contract.js +160 -0
  248. package/src/adapters.js +590 -0
  249. package/src/bootstrap.js +1184 -0
  250. package/src/catalog.js +2723 -0
  251. package/src/cli.js +2899 -0
  252. package/src/dialogue.js +470 -0
  253. package/src/i18n.js +1034 -0
  254. package/src/ledger.js +2011 -0
  255. package/src/render.js +1381 -0
  256. package/src/sendmodel.js +452 -0
  257. package/src/validate.js +1307 -0
  258. package/src/workspace.js +1679 -0
  259. package/tests/contract.test.js +8514 -0
@@ -0,0 +1,1307 @@
1
+ import { existsSync, readFileSync, readdirSync, statSync } from "node:fs";
2
+ import path from "node:path";
3
+ import {
4
+ mechanismDefinitions,
5
+ requiredAdapterIds,
6
+ requiredCaseIds,
7
+ requiredMechanismIds,
8
+ requiredPromptFiles,
9
+ requiredSkillIds,
10
+ requiredWorkspaceDirs
11
+ } from "./catalog.js";
12
+ import {
13
+ parseLedgerFile,
14
+ ledgerPath,
15
+ TASK_STATUSES,
16
+ GUARD_LEVELS,
17
+ RECEIPT_VERDICTS,
18
+ REVIEW_MODES,
19
+ doneRequiresEvidence,
20
+ ownedEvidenceIds,
21
+ ownedRerunEvidenceIds,
22
+ ownedCrossFamilyGuardEvidenceIds,
23
+ guardLevelVerdictError,
24
+ guardLevelRank,
25
+ computeReceiptGuardLevel,
26
+ ownerAcceptanceError,
27
+ receiptStatusFor,
28
+ specialEvidenceStructureError,
29
+ rerunRunReconcileError,
30
+ learningRecordError,
31
+ EVIDENCE_KIND_RERUN,
32
+ RECEIPT_STATUSES
33
+ } from "./ledger.js";
34
+
35
+ // True for a rerun evidence row that carries a (present, non-blank) runId — the
36
+ // rows the L4 reconciliation read-check (2c) inspects. A rerun with no runId is a
37
+ // valid generic rerun that simply cannot reach L4, so it is intentionally excluded.
38
+ function isRerunWithRunId(record) {
39
+ return (
40
+ record != null &&
41
+ typeof record === "object" &&
42
+ record.kind === EVIDENCE_KIND_RERUN &&
43
+ typeof record.runId === "string" &&
44
+ record.runId.trim().length > 0
45
+ );
46
+ }
47
+
48
+ function read(file) {
49
+ return readFileSync(file, "utf8");
50
+ }
51
+
52
+ function exists(root, ...parts) {
53
+ return existsSync(path.join(root, ...parts));
54
+ }
55
+
56
+ function requireFile(errors, root, ...parts) {
57
+ const file = path.join(root, ...parts);
58
+ if (!existsSync(file) || statSync(file).isDirectory()) {
59
+ errors.push(`missing file ${path.relative(root, file)}`);
60
+ return "";
61
+ }
62
+ return read(file);
63
+ }
64
+
65
+ function requireDir(errors, root, ...parts) {
66
+ const dir = path.join(root, ...parts);
67
+ if (!existsSync(dir) || !statSync(dir).isDirectory()) {
68
+ errors.push(`missing directory ${path.relative(root, dir)}`);
69
+ }
70
+ }
71
+
72
+ function includesAll(errors, label, content, phrases) {
73
+ for (const phrase of phrases) {
74
+ if (!new RegExp(phrase, "i").test(content)) {
75
+ errors.push(`${label} missing ${phrase}`);
76
+ }
77
+ }
78
+ }
79
+
80
+ // --- Deep-validation helpers ----------------------------------------------
81
+ //
82
+ // These power the structural depth checks (P2): they look past "the file
83
+ // exists and contains keyword X" into "this file actually carries the
84
+ // substance a real workspace would". Each helper is intentionally cheap and
85
+ // deterministic so a degraded workspace fails loudly with a pointable reason.
86
+
87
+ const MECHANISM_BY_ID = new Map(mechanismDefinitions.map((mechanism) => [mechanism.id, mechanism]));
88
+
89
+ // Mechanisms whose catalog entry carries the deepened 9-element shape
90
+ // (antiTrigger + inputsDetailed + outputShape + passBar + rejectBar + misuse).
91
+ // Their rendered README therefore must expose those structural anchors; a thin
92
+ // mechanism that only has Purpose/When/Input/Process/Package files is fine for
93
+ // the lighter mechanisms but a regression for these.
94
+ const DEEP_MECHANISM_README_ANCHORS = [
95
+ "## When not to use",
96
+ "## Input materials",
97
+ "## Output shape",
98
+ "## Pass bar",
99
+ "## Reject bar",
100
+ "## Common misuse"
101
+ ];
102
+
103
+ function isDeepMechanism(mechanismId) {
104
+ const mechanism = MECHANISM_BY_ID.get(mechanismId);
105
+ if (!mechanism) return false;
106
+ return Boolean(
107
+ mechanism.antiTrigger &&
108
+ mechanism.inputsDetailed &&
109
+ mechanism.outputShape &&
110
+ mechanism.passBar &&
111
+ mechanism.rejectBar &&
112
+ mechanism.misuse
113
+ );
114
+ }
115
+
116
+ function readIfPresent(root, ...parts) {
117
+ const file = path.join(root, ...parts);
118
+ if (!existsSync(file) || statSync(file).isDirectory()) return null;
119
+ return read(file);
120
+ }
121
+
122
+ function countFences(content) {
123
+ return (content.match(/^```/gm) ?? []).length;
124
+ }
125
+
126
+ function nonEmptyLines(content) {
127
+ return content.split("\n").filter((line) => line.trim().length > 0);
128
+ }
129
+
130
+ // "Substance" lines = non-empty lines that are not part of the fixed artifact
131
+ // scaffold (the title, the standard `## ` section headings, fence markers, and
132
+ // the boilerplate "Why this exists" trailer). This is what separates a real
133
+ // filled artifact from a hollowed-out template that still keeps its headings.
134
+ function substanceLines(content) {
135
+ return content
136
+ .split("\n")
137
+ .map((line) => line.trim())
138
+ .filter((line) => line.length > 0)
139
+ .filter((line) => !line.startsWith("#"))
140
+ .filter((line) => !line.startsWith("```"))
141
+ .filter((line) => !/^This artifact makes the case runnable and reviewable\./i.test(line));
142
+ }
143
+
144
+ // Level-2 headings, lower-cased, in document order — used both to detect
145
+ // duplicated stacked sections inside one file and to compare across cases.
146
+ function level2Headings(content) {
147
+ const headings = [];
148
+ for (const match of content.matchAll(/^##\s+(.+?)\s*$/gm)) {
149
+ headings.push(match[1].trim().toLowerCase());
150
+ }
151
+ return headings;
152
+ }
153
+
154
+ // A normalized signature of a case CASE.md body with the per-case unique text
155
+ // (code fences) stripped, used to catch "same boilerplate copied across N
156
+ // cases". Two genuinely different cases share headings but differ in prose; a
157
+ // copy-paste clone collapses to (near) the same signature.
158
+ function caseBodySignature(content) {
159
+ return content
160
+ .replace(/```[\s\S]*?```/g, " ")
161
+ .toLowerCase()
162
+ .replace(/[^a-z0-9]+/g, " ")
163
+ .split(/\s+/)
164
+ .filter((token) => token.length > 3)
165
+ .join(" ")
166
+ .trim();
167
+ }
168
+
169
+ // ===========================================================================
170
+ // Deep structural validation (P2), refactored into one named sub-function per
171
+ // numbered block. Unlike the ledger checks (which return error arrays), the
172
+ // deep blocks interleave `tick()` calls inside their loops and write to BOTH
173
+ // `errors` and `warnings`, so each block keeps the original passthrough shape:
174
+ // it receives the shared `errors` / `warnings` / `tick` and performs its own
175
+ // ticks at the EXACT points the inline block did. The split is mechanical
176
+ // (extract-method) — every pushed string, every tick, and their order are
177
+ // byte-for-byte the pre-refactor behavior; the deepValidate orchestrator just
178
+ // calls the blocks in sequence and threads the values they share (the parsed
179
+ // manifest, the flagship artifact bodies, the case-dir list). Each is exported
180
+ // so a unit test can exercise one structural block in isolation too.
181
+ // ===========================================================================
182
+
183
+ // (1) Manifest really exists, parses, and its declared files/dirs are real.
184
+ // Returns the parsed manifest (or null) so check (2) can reuse it without a
185
+ // second parse — exactly the data flow of the original inline blocks.
186
+ export function deepCheckManifest(workspace, errors, tick) {
187
+ tick();
188
+ const manifestRaw = readIfPresent(workspace, "WORKSPACE_MANIFEST.json");
189
+ let manifest = null;
190
+ if (manifestRaw === null) {
191
+ errors.push("manifest WORKSPACE_MANIFEST.json is missing");
192
+ } else {
193
+ try {
194
+ manifest = JSON.parse(manifestRaw);
195
+ } catch (parseError) {
196
+ errors.push(`manifest WORKSPACE_MANIFEST.json is not valid JSON (${parseError.message})`);
197
+ }
198
+ }
199
+
200
+ if (manifest) {
201
+ for (const field of ["name", "workspaceDirs", "layers", "mechanisms", "prompts", "skills", "adapters", "syntheticCases"]) {
202
+ if (manifest[field] === undefined) errors.push(`manifest missing field "${field}"`);
203
+ }
204
+
205
+ // Every directory the manifest declares must actually exist on disk.
206
+ if (Array.isArray(manifest.workspaceDirs)) {
207
+ for (const dir of manifest.workspaceDirs) {
208
+ if (!exists(workspace, dir) || !statSync(path.join(workspace, dir)).isDirectory()) {
209
+ errors.push(`manifest declares directory "${dir}" but it is missing on disk`);
210
+ }
211
+ }
212
+ }
213
+
214
+ // Manifest-listed mechanism / prompt / skill / adapter / case assets exist.
215
+ if (Array.isArray(manifest.mechanisms)) {
216
+ for (const mechanism of manifest.mechanisms) {
217
+ if (!exists(workspace, "mechanisms", mechanism, "README.md")) {
218
+ errors.push(`manifest lists mechanism "${mechanism}" but mechanisms/${mechanism}/README.md is missing`);
219
+ }
220
+ }
221
+ }
222
+ if (Array.isArray(manifest.prompts)) {
223
+ for (const prompt of manifest.prompts) {
224
+ if (!exists(workspace, "prompts", prompt)) {
225
+ errors.push(`manifest lists prompt "${prompt}" but prompts/${prompt} is missing`);
226
+ }
227
+ }
228
+ }
229
+ if (Array.isArray(manifest.skills)) {
230
+ for (const skill of manifest.skills) {
231
+ if (!exists(workspace, "skills", skill, "SKILL.md")) {
232
+ errors.push(`manifest lists skill "${skill}" but skills/${skill}/SKILL.md is missing`);
233
+ }
234
+ }
235
+ }
236
+ if (Array.isArray(manifest.adapters)) {
237
+ for (const adapter of manifest.adapters) {
238
+ if (!exists(workspace, "adapters", adapter, "ADAPTER.md")) {
239
+ errors.push(`manifest lists adapter "${adapter}" but adapters/${adapter}/ADAPTER.md is missing`);
240
+ }
241
+ }
242
+ }
243
+ if (Array.isArray(manifest.syntheticCases)) {
244
+ for (const caseId of manifest.syntheticCases) {
245
+ if (!exists(workspace, "examples", caseId, "CASE.md")) {
246
+ errors.push(`manifest lists case "${caseId}" but examples/${caseId}/CASE.md is missing`);
247
+ }
248
+ }
249
+ }
250
+ }
251
+
252
+ return manifest;
253
+ }
254
+
255
+ // (2) Declared workspaceDirs == the actual governance dir set under .aict/.
256
+ // "Governance" dirs = the canonical set the manifest is supposed to own;
257
+ // walkthroughs/ is generated but intentionally not a manifest dir, so it
258
+ // is excluded from the equality check rather than reported as extra.
259
+ export function deepCheckWorkspaceDirs(workspace, manifest, errors, tick) {
260
+ tick();
261
+ if (manifest && Array.isArray(manifest.workspaceDirs)) {
262
+ const declared = new Set(manifest.workspaceDirs);
263
+ const actualDirs = readdirSync(workspace, { withFileTypes: true })
264
+ .filter((entry) => entry.isDirectory())
265
+ .map((entry) => entry.name);
266
+ const nonManifestDirs = new Set(["walkthroughs"]);
267
+
268
+ for (const dir of actualDirs) {
269
+ if (!declared.has(dir) && !nonManifestDirs.has(dir)) {
270
+ errors.push(`directory "${dir}" exists under .aict/ but is not declared in manifest.workspaceDirs`);
271
+ }
272
+ }
273
+ for (const dir of declared) {
274
+ if (!actualDirs.includes(dir)) {
275
+ errors.push(`manifest.workspaceDirs declares "${dir}" but no such directory exists`);
276
+ }
277
+ }
278
+ // Cross-check against the generator's own canonical list so a manifest that
279
+ // was hand-edited away from the generator is caught too.
280
+ for (const dir of requiredWorkspaceDirs) {
281
+ if (!declared.has(dir)) errors.push(`manifest.workspaceDirs is missing canonical dir "${dir}"`);
282
+ }
283
+ }
284
+ }
285
+
286
+ // (3) Mechanism schema completeness: 5 files each, and deepened mechanisms
287
+ // keep their 9-element README structure anchors. (Ticks once per mechanism,
288
+ // matching the inline loop.)
289
+ export function deepCheckMechanismSchema(workspace, errors, tick) {
290
+ for (const mechanism of requiredMechanismIds) {
291
+ tick();
292
+ const readme = readIfPresent(workspace, "mechanisms", mechanism, "README.md");
293
+ if (readme === null) {
294
+ // requireFile in the base pass already reports the missing file; skip.
295
+ continue;
296
+ }
297
+ if (isDeepMechanism(mechanism)) {
298
+ for (const anchor of DEEP_MECHANISM_README_ANCHORS) {
299
+ if (!readme.includes(anchor)) {
300
+ errors.push(`mechanisms/${mechanism}/README.md lost deepened structure anchor "${anchor}"`);
301
+ }
302
+ }
303
+ }
304
+ }
305
+ }
306
+
307
+ // (4) Flagship lab completeness: the 11-step artifact chain is present, the
308
+ // causal-chain trio exists, and guard-review really cites first-ai-output
309
+ // line numbers (not just mentions the filename). Returns the three
310
+ // load-bearing artifact bodies (firstAi/guard/revised) + flagshipId so the
311
+ // depth block (5) can reuse them without re-reading — the original flow.
312
+ // This function performs BOTH the presence tick AND the causal-chain tick,
313
+ // in that order, exactly as the inline code did.
314
+ export function deepCheckFlagship(workspace, errors, warnings, tick) {
315
+ tick();
316
+ const flagshipId = "ai-coding-long-task";
317
+ const flagshipArtifactsDir = path.join(workspace, "examples", flagshipId, "artifacts");
318
+ const flagshipArtifacts = [
319
+ "context-package.md",
320
+ "acceptance-card.md",
321
+ "execution-prompt.md",
322
+ "first-ai-output.md",
323
+ "guard-review.md",
324
+ "revised-output.md",
325
+ "handoff-note.md",
326
+ "harvest-seed.md"
327
+ ];
328
+ // 11 environment "rungs" of the loop = case file + the 8 artifacts + the
329
+ // case's two narrative proof surfaces (raw-input/baseline). We assert the
330
+ // load-bearing artifacts directly.
331
+ if (!exists(workspace, "examples", flagshipId, "CASE.md")) {
332
+ errors.push(`flagship examples/${flagshipId}/CASE.md is missing`);
333
+ }
334
+ for (const artifact of flagshipArtifacts) {
335
+ if (!existsSync(path.join(flagshipArtifactsDir, artifact))) {
336
+ errors.push(`flagship artifact examples/${flagshipId}/artifacts/${artifact} is missing`);
337
+ }
338
+ }
339
+
340
+ const firstAi = readIfPresent(workspace, "examples", flagshipId, "artifacts", "first-ai-output.md");
341
+ const guard = readIfPresent(workspace, "examples", flagshipId, "artifacts", "guard-review.md");
342
+ const revised = readIfPresent(workspace, "examples", flagshipId, "artifacts", "revised-output.md");
343
+
344
+ // Causal link: guard-review must reference first-ai-output.md AND quote a line
345
+ // range (the cited onKeyDown stub), proving it actually reviewed the code, not
346
+ // just name-dropped the file.
347
+ tick();
348
+ if (guard !== null) {
349
+ if (!/first-ai-output\.md/i.test(guard)) {
350
+ errors.push(`flagship guard-review.md does not reference first-ai-output.md (causal chain broken)`);
351
+ }
352
+ const lineRefs = guard.match(/\blines?\s+\d+(?:\s*-\s*\d+)?/gi) ?? [];
353
+ if (lineRefs.length === 0) {
354
+ errors.push(`flagship guard-review.md cites no line numbers from first-ai-output.md (cannot prove it reviewed the code)`);
355
+ }
356
+ if (!/\blines?\s+27\s*-\s*30\b/i.test(guard)) {
357
+ warnings.push(`flagship guard-review.md no longer cites the onKeyDown stub at lines 27-30 (causal-chain anchor weakened)`);
358
+ }
359
+ }
360
+
361
+ return { flagshipId, firstAi, guard, revised };
362
+ }
363
+
364
+ // (5) Minimum artifact depth: the causal-chain artifacts must carry real
365
+ // structural substance (code fences, a verdict, evidence), not just a
366
+ // template header + one sentence. Then every ordinary case artifact must
367
+ // clear a minimum substance floor. Returns caseDirs (computed here, reused
368
+ // by check 9). Performs the three flagship-depth ticks plus one tick per
369
+ // ordinary case artifact, in the original order.
370
+ export function deepCheckArtifactDepth(workspace, firstAi, guard, revised, errors, tick) {
371
+ tick();
372
+ if (firstAi !== null) {
373
+ if (countFences(firstAi) < 2) {
374
+ errors.push(`flagship first-ai-output.md has no fenced code block (boilerplate, not a runnable artifact)`);
375
+ }
376
+ if (!/completion claim/i.test(firstAi)) {
377
+ errors.push(`flagship first-ai-output.md is missing the completion claim it is supposed to expose`);
378
+ }
379
+ if (substanceLines(firstAi).length < 12) {
380
+ errors.push(`flagship first-ai-output.md is too thin (${substanceLines(firstAi).length} substance lines; looks like boilerplate)`);
381
+ }
382
+ }
383
+ tick();
384
+ if (guard !== null) {
385
+ if (!/verdict/i.test(guard)) {
386
+ errors.push(`flagship guard-review.md has no verdict (a review without a verdict is boilerplate)`);
387
+ }
388
+ if (!/evidence/i.test(guard)) {
389
+ errors.push(`flagship guard-review.md cites no evidence section`);
390
+ }
391
+ if (substanceLines(guard).length < 8) {
392
+ errors.push(`flagship guard-review.md is too thin (${substanceLines(guard).length} substance lines; looks like boilerplate)`);
393
+ }
394
+ }
395
+ tick();
396
+ if (revised !== null) {
397
+ if (countFences(revised) < 2) {
398
+ errors.push(`flagship revised-output.md has no fenced code block (the fix is not actually shown)`);
399
+ }
400
+ if (!/Arrow(?:Up|Down)/.test(revised) || !/moveTask/.test(revised)) {
401
+ errors.push(`flagship revised-output.md does not show the keyboard reorder fix (ArrowUp/Down -> moveTask)`);
402
+ }
403
+ }
404
+
405
+ // Every ordinary case artifact must clear a minimum substance floor so a case
406
+ // cannot be hollowed into the bare scaffold. The scaffold itself contributes
407
+ // ~0 substance lines (title + headings + trailer are all stripped), so the
408
+ // synthetic content body is what is measured here.
409
+ const caseDirs = exists(workspace, "examples")
410
+ ? readdirSync(path.join(workspace, "examples"), { withFileTypes: true })
411
+ .filter((entry) => entry.isDirectory())
412
+ .map((entry) => entry.name)
413
+ : [];
414
+ for (const caseId of caseDirs) {
415
+ const artifactsDir = path.join(workspace, "examples", caseId, "artifacts");
416
+ if (!existsSync(artifactsDir)) continue;
417
+ for (const artifact of readdirSync(artifactsDir).filter((file) => file.endsWith(".md"))) {
418
+ tick();
419
+ const content = read(path.join(artifactsDir, artifact));
420
+ const substance = substanceLines(content);
421
+ // Floor of 2 substance lines: a real artifact has at least a "how to use"
422
+ // body + a "synthetic content" body + a review note. A gutted artifact
423
+ // (headings only, or one stub sentence) falls under this.
424
+ if (substance.length < 2) {
425
+ errors.push(`examples/${caseId}/artifacts/${artifact} is boilerplate-only (${substance.length} substance lines)`);
426
+ }
427
+ }
428
+ }
429
+
430
+ return caseDirs;
431
+ }
432
+
433
+ // (6) Cookbook recipes must carry the 8-element shape AND a real copy-paste
434
+ // block (a fenced code block), not just a prose outline. (Ticks once per
435
+ // recipe, matching the inline loop.)
436
+ export function deepCheckCookbook(workspace, errors, tick) {
437
+ const cookbookRecipes = ["run-a-first-loop.md", "connect-a-tool.md", "review-a-half-product.md"];
438
+ const cookbookElements = [
439
+ "When to use",
440
+ "Prerequisites",
441
+ "Steps",
442
+ "Copy-paste block",
443
+ "Expected output",
444
+ "Failure handling",
445
+ "Privacy note",
446
+ "Next step"
447
+ ];
448
+ for (const recipe of cookbookRecipes) {
449
+ tick();
450
+ const content = readIfPresent(workspace, "cookbook", recipe);
451
+ if (content === null) continue; // base pass reports the missing file.
452
+ for (const element of cookbookElements) {
453
+ if (!new RegExp(element, "i").test(content)) {
454
+ errors.push(`cookbook/${recipe} missing operational element "${element}"`);
455
+ }
456
+ }
457
+ if (countFences(content) < 2) {
458
+ errors.push(`cookbook/${recipe} has no copy-paste fenced block (empty outline, not a do-it recipe)`);
459
+ }
460
+ }
461
+ }
462
+
463
+ // (8 entry) Entry path consistency: the generated START_HERE first screen must
464
+ // point at the real flagship loop surfaces the workspace ships, so a reader
465
+ // who follows it does not hit a dead link.
466
+ export function deepCheckEntryPath(workspace, flagshipId, errors, tick) {
467
+ tick();
468
+ const startHere = readIfPresent(workspace, "START_HERE.md");
469
+ if (startHere !== null) {
470
+ const previewTargets = [
471
+ ["walkthroughs/10-minute.md", path.join(workspace, "walkthroughs", "10-minute.md")],
472
+ ["examples/ai-coding-long-task/CASE.md", path.join(workspace, "examples", flagshipId, "CASE.md")]
473
+ ];
474
+ for (const [label, target] of previewTargets) {
475
+ if (new RegExp(label.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "i").test(startHere) && !existsSync(target)) {
476
+ errors.push(`START_HERE points to "${label}" but that path does not exist in the workspace`);
477
+ }
478
+ }
479
+ // The handoff preview line must reflect the accepted/post-revised state, not
480
+ // a stale "keyboard test pending" that contradicts revised-output.md.
481
+ const handoffLine = (startHere.match(/^Handoff:.*$/m) ?? [])[0] ?? "";
482
+ if (handoffLine && /keyboard[^.\n]*\b(pending|missing)\b|\b(pending|missing)\b[^.\n]*keyboard/i.test(handoffLine)) {
483
+ errors.push(`START_HERE "Handoff:" preview still describes keyboard work as pending/missing (contradicts the accepted revised output)`);
484
+ }
485
+ }
486
+ }
487
+
488
+ // (9) Duplicate-boilerplate detection: no level-2 heading repeats inside a
489
+ // single CASE.md, and no two cases collapse to the same body signature
490
+ // (the "same boilerplate copied N times" failure). Ticks once per case in
491
+ // the per-case loop, then one final tick for the cross-case comparison.
492
+ export function deepCheckDuplicateBoilerplate(workspace, caseDirs, errors, tick) {
493
+ const caseSignatures = [];
494
+ for (const caseId of caseDirs) {
495
+ tick();
496
+ const caseContent = readIfPresent(workspace, "examples", caseId, "CASE.md");
497
+ if (caseContent === null) continue;
498
+
499
+ const headings = level2Headings(caseContent);
500
+ const seen = new Map();
501
+ for (const heading of headings) {
502
+ seen.set(heading, (seen.get(heading) ?? 0) + 1);
503
+ }
504
+ for (const [heading, count] of seen) {
505
+ if (count >= 2) {
506
+ errors.push(`examples/${caseId}/CASE.md duplicates level-2 heading "## ${heading}" (${count}x; stacked boilerplate)`);
507
+ }
508
+ }
509
+
510
+ caseSignatures.push({ caseId, signature: caseBodySignature(caseContent) });
511
+ }
512
+ // Cross-case duplicate boilerplate: identical (or empty) signatures mean the
513
+ // synthetic prose was copied wholesale instead of being a distinct case.
514
+ tick();
515
+ for (let i = 0; i < caseSignatures.length; i += 1) {
516
+ for (let j = i + 1; j < caseSignatures.length; j += 1) {
517
+ const a = caseSignatures[i];
518
+ const b = caseSignatures[j];
519
+ if (a.signature.length > 0 && a.signature === b.signature) {
520
+ errors.push(`examples/${a.caseId}/CASE.md and examples/${b.caseId}/CASE.md are identical boilerplate copies`);
521
+ }
522
+ }
523
+ }
524
+ }
525
+
526
+ // Deep structural validation orchestrator (P2). Thin sequencer: runs each
527
+ // numbered block (above) in the EXACT original order, threading the values the
528
+ // blocks share (parsed manifest -> dir check; flagship artifact bodies -> depth
529
+ // check; case-dir list -> duplicate check). Tick order/count and the
530
+ // errors/warnings contents are unchanged from the pre-refactor inline body.
531
+ function deepValidate(workspace, errors, warnings, counters) {
532
+ const tick = () => {
533
+ counters.deepChecks += 1;
534
+ };
535
+
536
+ const manifest = deepCheckManifest(workspace, errors, tick); // (1)
537
+ deepCheckWorkspaceDirs(workspace, manifest, errors, tick); // (2)
538
+ deepCheckMechanismSchema(workspace, errors, tick); // (3)
539
+ const { flagshipId, firstAi, guard, revised } = deepCheckFlagship(workspace, errors, warnings, tick); // (4)
540
+ const caseDirs = deepCheckArtifactDepth(workspace, firstAi, guard, revised, errors, tick); // (5)
541
+ deepCheckCookbook(workspace, errors, tick); // (6)
542
+ deepCheckEntryPath(workspace, flagshipId, errors, tick); // (8 entry)
543
+ deepCheckDuplicateBoilerplate(workspace, caseDirs, errors, tick); // (9)
544
+
545
+ // ---------------------------------------------------------------------
546
+ // (7) PUBLIC_MAPPING coverage — scope note.
547
+ // docs/PUBLIC_MAPPING.md lives in the repo, NOT inside the .aict user
548
+ // workspace this validator inspects, so it is intentionally out of scope
549
+ // here and is covered by the contract test layer instead. No check is
550
+ // emitted; see the blind-spots note in the task report.
551
+ // ---------------------------------------------------------------------
552
+
553
+ // ---------------------------------------------------------------------
554
+ // (8) Run-layer ledger integrity (P1). The five JSONL ledgers under state/
555
+ // are the live substance of the run loop, so a degraded ledger (corrupt
556
+ // line, orphaned evidence, illegal status, broken reference, a "done"
557
+ // task with no evidence, an accepted receipt with no evidence) must fail
558
+ // loudly with a pointable reason — never silently accept inconsistent
559
+ // state. Reads go through the SAME ledger.js parser the CLI writes with,
560
+ // so the on-disk shape cannot drift between writer and reader.
561
+ // ---------------------------------------------------------------------
562
+ validateLedgers(workspace, errors, tick);
563
+ }
564
+
565
+ // ===========================================================================
566
+ // Run-layer ledger validation — refactored into one named sub-function per
567
+ // numbered integrity check. Each `check*` function:
568
+ // - receives a parsed-ledger context `ctx` (the five record arrays plus the
569
+ // derived id Sets), reads only what it needs, and
570
+ // - RETURNS an array of error strings (never mutates shared state).
571
+ // The error strings are byte-for-byte identical to the pre-refactor inline
572
+ // blocks — this is a behavior-preserving split, NOT a wording change. The
573
+ // `validateLedgers` orchestrator calls them in the original order, ticks once
574
+ // per check exactly as before, and appends each returned array to `errors`.
575
+ // Exporting them lets a unit test feed one check a hand-built ledger directly
576
+ // (no whole-workspace round-trip), which is what closes the thin-coverage gaps
577
+ // the mutation tests surfaced.
578
+ // ===========================================================================
579
+
580
+ // Build the parsed-ledger context once. Checks 1 (bad JSONL) and 1b (per-ledger
581
+ // id integrity) are folded in here because they BOTH run during the parse pass
582
+ // in the original (1 emits parse errors per file; 1b walks each file's records),
583
+ // and they produce the parse `errors` the orchestrator ticks for. The returned
584
+ // object carries the parse/id errors (already ordered file-by-file) plus every
585
+ // derived value the later checks read, so no check re-parses or re-derives.
586
+ //
587
+ // Exported so a unit test can build the same context (from a temp state dir it
588
+ // populated with hand-crafted .jsonl rows) and feed an individual check
589
+ // directly — the precise, fast path the thin-coverage tests use instead of a
590
+ // whole validateWorkspace round-trip.
591
+ export function buildLedgerContext(stateDir) {
592
+ const LEDGER_KEYS = ["tasks", "evidence", "runs", "receipts", "learning"];
593
+ const parsed = {};
594
+
595
+ // (1) Bad JSONL: any non-empty line that does not parse as JSON, OR parses to
596
+ // a non-object (null / array / scalar), is an error with file + line number.
597
+ // The parser tags each error kind so a type error reads "record must be an
598
+ // object" (pointable) instead of crashing a later record.id access with a
599
+ // non-pointable TypeError. parseErrorsByKey[key] is the ordered list for that
600
+ // ledger so the orchestrator can tick + append per file, preserving order.
601
+ const parseErrorsByKey = {};
602
+ for (const key of LEDGER_KEYS) {
603
+ const file = ledgerPath(stateDir, key);
604
+ const { records, errors: parseErrors } = parseLedgerFile(file);
605
+ const fileErrors = [];
606
+ for (const parseError of parseErrors) {
607
+ const reason = parseError.kind === "type"
608
+ ? parseError.message
609
+ : `is not valid JSON (${parseError.message})`;
610
+ fileErrors.push(`ledger ${path.basename(file)}:${parseError.line} ${reason}`);
611
+ }
612
+ parseErrorsByKey[key] = fileErrors;
613
+ parsed[key] = records;
614
+ }
615
+
616
+ // (1b) Per-ledger id integrity: within ONE ledger every record needs a
617
+ // non-empty string id, and ids must be unique. Without this the cross-ledger
618
+ // Sets below silently fold duplicate ids into one entry (so a duplicate task
619
+ // id or a blank id passes unnoticed). Report each with a pointable file + id.
620
+ const idErrorsByKey = {};
621
+ for (const key of LEDGER_KEYS) {
622
+ const file = ledgerPath(stateDir, key);
623
+ const seen = new Set();
624
+ const fileErrors = [];
625
+ for (const record of parsed[key]) {
626
+ const id = record.id;
627
+ if (typeof id !== "string" || id.length === 0) {
628
+ fileErrors.push(`ledger ${path.basename(file)} has a record with a missing or non-string id`);
629
+ continue;
630
+ }
631
+ if (seen.has(id)) {
632
+ fileErrors.push(`ledger ${path.basename(file)} has duplicate id "${id}"`);
633
+ }
634
+ seen.add(id);
635
+ }
636
+ idErrorsByKey[key] = fileErrors;
637
+ }
638
+
639
+ const tasks = parsed.tasks;
640
+ const evidence = parsed.evidence;
641
+ const receipts = parsed.receipts;
642
+ // The runs ledger is now load-bearing for the L4 gate: a rerun only counts
643
+ // toward L4 if it reconciles against a recorded run here (A1 L4 reconciliation).
644
+ const runs = parsed.runs;
645
+ const learning = parsed.learning;
646
+
647
+ return {
648
+ LEDGER_KEYS,
649
+ parseErrorsByKey,
650
+ idErrorsByKey,
651
+ tasks,
652
+ evidence,
653
+ receipts,
654
+ runs,
655
+ learning,
656
+ taskIds: new Set(tasks.map((task) => task.id)),
657
+ evidenceIds: new Set(evidence.map((item) => item.id)),
658
+ // Set of task ids that have at least one piece of evidence — used by check 5.
659
+ tasksWithEvidence: new Set(evidence.map((item) => item.taskId))
660
+ };
661
+ }
662
+
663
+ // (3) Illegal task status: task.status must be in the enum.
664
+ export function checkTaskStatusEnum(ctx) {
665
+ const errors = [];
666
+ for (const task of ctx.tasks) {
667
+ if (!TASK_STATUSES.includes(task.status)) {
668
+ errors.push(`ledger tasks.jsonl task ${task.id ?? "(no id)"} has illegal status "${task.status}" (allowed: ${TASK_STATUSES.join(", ")})`);
669
+ }
670
+ }
671
+ return errors;
672
+ }
673
+
674
+ // (5) A done task must have evidence. A task marked done with no evidence row
675
+ // pointing at it is exactly the "thin done" the system exists to catch.
676
+ // Uses the SAME doneRequiresEvidence predicate the CLI writer (task update)
677
+ // applies, so the write-time check and this read-time check cannot drift.
678
+ export function checkDoneRequiresEvidence(ctx) {
679
+ const errors = [];
680
+ for (const task of ctx.tasks) {
681
+ if (doneRequiresEvidence(task.status) && !ctx.tasksWithEvidence.has(task.id)) {
682
+ errors.push(`ledger tasks.jsonl task ${task.id} is "done" but has no evidence (only blocked/partial/unverified may have none)`);
683
+ }
684
+ }
685
+ return errors;
686
+ }
687
+
688
+ // (2) Orphan evidence: evidence.taskId must reference an existing task.
689
+ export function checkOrphanEvidence(ctx) {
690
+ const errors = [];
691
+ for (const item of ctx.evidence) {
692
+ if (!ctx.taskIds.has(item.taskId)) {
693
+ errors.push(`ledger evidence.jsonl evidence ${item.id ?? "(no id)"} references unknown task "${item.taskId}" (orphan)`);
694
+ }
695
+ }
696
+ return errors;
697
+ }
698
+
699
+ // (2b) Special-evidence structure (P2 structure gate): a load-bearing kind
700
+ // (cross_family_guard / rerun) must carry its required structured fields,
701
+ // not just the right label. Uses the SAME specialEvidenceStructureError
702
+ // predicate the CLI writer applies at evidence-add time, so a hand-planted
703
+ // empty-shell special row (e.g. a cross_family_guard with no
704
+ // reviewer/family/ref, or a rerun with no command/exitCode) is caught
705
+ // read-time exactly as the writer refuses it write-time — even if no
706
+ // receipt cites it yet. Generic kinds return null here and are unaffected.
707
+ export function checkSpecialEvidenceStructure(ctx) {
708
+ const errors = [];
709
+ for (const item of ctx.evidence) {
710
+ const structureError = specialEvidenceStructureError(item);
711
+ if (structureError) {
712
+ errors.push(`ledger evidence.jsonl evidence ${item.id ?? "(no id)"}: ${structureError}`);
713
+ }
714
+ }
715
+ return errors;
716
+ }
717
+
718
+ // (2c) Rerun-run reconciliation (A1 L4 reconciliation, read side): a rerun row
719
+ // that carries a runId MUST reconcile against a recorded run in runs.jsonl
720
+ // (same task, finished, executed:true, matching exitCode + command + output hash), using the SAME
721
+ // rerunRunReconcileError the CLI applies at evidence-add time. This catches a
722
+ // hand-edited jsonl that bolts a runId onto a rerun whose exitCode/command
723
+ // disagree with the recorded run (the red-team "runs=1 but rerun says 0"
724
+ // forgery), even before any receipt cites it. A rerun with NO runId is NOT
725
+ // flagged here (it is a valid generic rerun that simply cannot reach L4 — the
726
+ // L4 gate is enforced where a receipt claims L4, via ownedRerunEvidenceIds);
727
+ // only a PRESENT-but-broken runId is an integrity error. Structurally-
728
+ // incomplete rerun rows are already reported by (2b); skip them so the
729
+ // reconcile check does not double-report.
730
+ export function checkRerunRunReconcile(ctx) {
731
+ const errors = [];
732
+ for (const item of ctx.evidence) {
733
+ if (!isRerunWithRunId(item)) continue;
734
+ if (specialEvidenceStructureError(item) !== null) continue; // (2b) already reported
735
+ const reconcileError = rerunRunReconcileError(item, ctx.runs);
736
+ if (reconcileError) {
737
+ errors.push(`ledger evidence.jsonl evidence ${item.id ?? "(no id)"}: ${reconcileError}`);
738
+ }
739
+ }
740
+ return errors;
741
+ }
742
+
743
+ // (4) Broken reference: every id in receipt.evidenceIds must exist in evidence.
744
+ export function checkReceiptEvidenceRefs(ctx) {
745
+ const errors = [];
746
+ for (const receipt of ctx.receipts) {
747
+ const ids = Array.isArray(receipt.evidenceIds) ? receipt.evidenceIds : [];
748
+ for (const id of ids) {
749
+ if (!ctx.evidenceIds.has(id)) {
750
+ errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} references unknown evidence "${id}" (broken reference)`);
751
+ }
752
+ }
753
+ }
754
+ return errors;
755
+ }
756
+
757
+ // (4b) Receipt task reference: receipt.taskId must point at an existing task.
758
+ // A receipt for a task that does not exist is a dangling receipt — and it
759
+ // is also the entry point the cross-task check (4c) needs, since "does
760
+ // this evidence belong to the receipt's task" is meaningless if the task
761
+ // itself is unknown. Same rule the CLI writer enforces at receipt create.
762
+ export function checkReceiptTaskRef(ctx) {
763
+ const errors = [];
764
+ for (const receipt of ctx.receipts) {
765
+ if (!ctx.taskIds.has(receipt.taskId)) {
766
+ errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} references unknown task "${receipt.taskId}"`);
767
+ }
768
+ }
769
+ return errors;
770
+ }
771
+
772
+ // (4c) Cross-task evidence: every evidence id a receipt cites must belong to
773
+ // the receipt's OWN task. Citing another task's evidence is the back door
774
+ // that lets a task with no evidence of its own be written "accepted" by
775
+ // borrowing someone else's proof. Uses the SAME ownedEvidenceIds filter
776
+ // the CLI writer applies, so the write-time guard and this read-time check
777
+ // cannot drift. Unknown ids are already reported by (4); here we flag only
778
+ // ids that resolve to a real evidence row owned by a DIFFERENT task.
779
+ export function checkReceiptCrossTaskEvidence(ctx) {
780
+ const errors = [];
781
+ for (const receipt of ctx.receipts) {
782
+ const ids = Array.isArray(receipt.evidenceIds) ? receipt.evidenceIds : [];
783
+ const owned = new Set(ownedEvidenceIds(ids, receipt.taskId, ctx.evidence));
784
+ for (const id of ids) {
785
+ if (ctx.evidenceIds.has(id) && !owned.has(id)) {
786
+ errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} cites evidence "${id}" that belongs to another task (not task "${receipt.taskId}")`);
787
+ }
788
+ }
789
+ }
790
+ return errors;
791
+ }
792
+
793
+ // (4d) rerunEvidenceIds reference integrity (G2): rerunEvidenceIds is a real
794
+ // cited-evidence list (the writer rejects unknown/foreign ids in it at
795
+ // receipt-create time exactly like evidenceIds), but read-time it was only
796
+ // consulted inside the L4-pass boolean — so a hand-planted receipt with a
797
+ // bad rerunEvidenceIds (an id that does not exist, or one owned by another
798
+ // task) slipped past the global reference checks unless it happened to be
799
+ // an L4 pass. This check covers ALL receipts, mirroring (4) + (4c) for the
800
+ // plain evidenceIds list: an unknown id is a broken reference; a known id
801
+ // owned by a different task is a cross-task citation. Uses the SAME
802
+ // ownedEvidenceIds ownership predicate the writer applies, so write-time
803
+ // and read-time cannot drift. (The L4 kind/structure requirement is a
804
+ // separate, stronger gate handled by check 8 via ownedRerunEvidenceIds.)
805
+ export function checkRerunEvidenceIdRefs(ctx) {
806
+ const errors = [];
807
+ for (const receipt of ctx.receipts) {
808
+ const rerunIds = Array.isArray(receipt.rerunEvidenceIds) ? receipt.rerunEvidenceIds : [];
809
+ if (rerunIds.length === 0) continue;
810
+ const ownedRerunByTask = new Set(ownedEvidenceIds(rerunIds, receipt.taskId, ctx.evidence));
811
+ for (const id of rerunIds) {
812
+ if (!ctx.evidenceIds.has(id)) {
813
+ errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} references unknown rerun evidence "${id}" (broken reference)`);
814
+ } else if (!ownedRerunByTask.has(id)) {
815
+ errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} cites rerun evidence "${id}" that belongs to another task (not task "${receipt.taskId}")`);
816
+ }
817
+ }
818
+ }
819
+ return errors;
820
+ }
821
+
822
+ // (6) Accepted receipt must cite SAME-TASK evidence: an accepted verdict with no
823
+ // evidence that belongs to its own task is an unsupported acceptance — whether
824
+ // the evidenceIds list is empty OR it only cites another task's evidence.
825
+ // Counting via ownedEvidenceIds (not raw length) closes the cross-task back
826
+ // door at the status level too, and matches `receipt accept`, which also keys
827
+ // on owned regular evidence — so the two never disagree on what backs an
828
+ // acceptance. (A clean pass needs a cited cross_family_guard row to reach L3+,
829
+ // so a legitimately-accepted pass always has same-task evidenceIds.)
830
+ export function checkAcceptedReceiptHasEvidence(ctx) {
831
+ const errors = [];
832
+ for (const receipt of ctx.receipts) {
833
+ if (receipt.status !== "accepted") continue;
834
+ const ids = Array.isArray(receipt.evidenceIds) ? receipt.evidenceIds : [];
835
+ const owned = ownedEvidenceIds(ids, receipt.taskId, ctx.evidence);
836
+ if (owned.length === 0) {
837
+ errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} is "accepted" but cites no evidence`);
838
+ }
839
+ }
840
+ return errors;
841
+ }
842
+
843
+ // (7) Guard level present + valid (P2): every receipt must carry a guardLevel
844
+ // in the enum. The level grades the evidence the guard saw and is what the
845
+ // verdict-consistency check (8) bounds the verdict against, so a missing or
846
+ // bogus level is rejected here first.
847
+ export function checkGuardLevelEnum(ctx) {
848
+ const errors = [];
849
+ for (const receipt of ctx.receipts) {
850
+ if (!GUARD_LEVELS.includes(receipt.guardLevel)) {
851
+ errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} has missing/illegal guardLevel "${receipt.guardLevel}" (allowed: ${GUARD_LEVELS.join(", ")})`);
852
+ }
853
+ }
854
+ return errors;
855
+ }
856
+
857
+ // (8) Verdict x guardLevel consistency (P2 core): the verdict a receipt carries
858
+ // must be one its guard level can back. Uses the SAME guardLevelVerdictError
859
+ // predicate the CLI writer applies, so a hand-planted row (an L0 "pass", an
860
+ // L2 "pass", a pass below L3, or an L4 "pass" with no rerun output) is
861
+ // caught read-time exactly as the writer refuses it write-time. hasRerun is
862
+ // computed from rerun ids that actually belong to this task (a rerun id
863
+ // borrowed from another task does not satisfy the L4 requirement). Receipts
864
+ // whose guardLevel is already invalid are skipped (reported by check 7).
865
+ export function checkVerdictGuardLevelConsistency(ctx) {
866
+ const errors = [];
867
+ for (const receipt of ctx.receipts) {
868
+ if (!GUARD_LEVELS.includes(receipt.guardLevel)) continue;
869
+ const rerunIds = Array.isArray(receipt.rerunEvidenceIds) ? receipt.rerunEvidenceIds : [];
870
+ // A1 L4 reconciliation: ownedRerun counts a rerun toward L4 only if it
871
+ // references a recorded run that reconciles (runs passed), so a hand-planted L4
872
+ // whose rerun output disagrees with the recorded run is flagged read-time.
873
+ const ownedRerun = ownedRerunEvidenceIds(rerunIds, receipt.taskId, ctx.evidence, ctx.runs);
874
+ // P2 evidence-gate: an L3 pass must cite a real cross_family_guard evidence
875
+ // row owned by this task; computed the SAME way the CLI writer computes it so
876
+ // a hand-planted L3 "pass" on a kind:"note" row is flagged read-time exactly
877
+ // as the writer refuses it write-time.
878
+ const evidenceIds = Array.isArray(receipt.evidenceIds) ? receipt.evidenceIds : [];
879
+ const ownedCrossFamily = ownedCrossFamilyGuardEvidenceIds(evidenceIds, receipt.taskId, ctx.evidence);
880
+ const consistencyError = guardLevelVerdictError(
881
+ receipt.guardLevel,
882
+ receipt.verdict,
883
+ ownedRerun.length > 0,
884
+ ownedCrossFamily.length > 0,
885
+ rerunIds.length > 0
886
+ );
887
+ if (consistencyError) {
888
+ errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"}: ${consistencyError}`);
889
+ }
890
+ }
891
+ return errors;
892
+ }
893
+
894
+ // (8b) Review-mode validity (A1): a receipt's reviewMode, when present, must be
895
+ // a legal REVIEW_MODES value. The reviewMode is the load-bearing input to
896
+ // the level computation, so a bogus mode (a typo, or a made-up "binding")
897
+ // must be caught before check 8c trusts it. reviewMode is OPTIONAL on a row
898
+ // (a pre-A1 receipt has none; the computation infers it), so absence is
899
+ // fine — only a present-but-illegal value is flagged.
900
+ export function checkReviewModeEnum(ctx) {
901
+ const errors = [];
902
+ for (const receipt of ctx.receipts) {
903
+ if (receipt.reviewMode !== undefined && !REVIEW_MODES.includes(receipt.reviewMode)) {
904
+ errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} has illegal reviewMode "${receipt.reviewMode}" (allowed: ${REVIEW_MODES.join(", ")})`);
905
+ }
906
+ }
907
+ return errors;
908
+ }
909
+
910
+ // (8c) Computed-level integrity (A1 CORE, read side): the guard level is COMPUTED
911
+ // from the review method + the evidence, never self-asserted. A hand-edited
912
+ // receipts.jsonl could set guardLevel HIGHER than the method + evidence
913
+ // support (e.g. guardLevel "L4" on a row with no rerun output, or "L3" with
914
+ // reviewMode "same_family_subagent"). We RE-COMPUTE the level the SAME way
915
+ // the CLI writer does and flag any receipt whose stored level OUTRANKS the
916
+ // computed one — the read-time twin of "the CLI never stores a level above
917
+ // the evidence". A stored level <= computed is allowed (a row may under-claim
918
+ // its level; only OVER-claiming is the silent-green danger). Receipts whose
919
+ // guardLevel or reviewMode is already invalid are skipped (reported above).
920
+ export function checkReceiptComputedLevel(ctx) {
921
+ const errors = [];
922
+ for (const receipt of ctx.receipts) {
923
+ if (!GUARD_LEVELS.includes(receipt.guardLevel)) continue;
924
+ if (receipt.reviewMode !== undefined && !REVIEW_MODES.includes(receipt.reviewMode)) continue;
925
+ // RE-COMPUTE the level from the receipt's own evidence — the SAME shared helper
926
+ // the handoff drafter uses, so the two never drift on what the evidence backs.
927
+ const computed = computeReceiptGuardLevel(receipt, ctx.evidence, ctx.runs);
928
+ if (guardLevelRank(receipt.guardLevel) > guardLevelRank(computed.level)) {
929
+ errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} claims guard level "${receipt.guardLevel}" but the review method + evidence only support "${computed.level}" (${computed.reason}); the level is computed, not self-asserted`);
930
+ }
931
+ }
932
+ return errors;
933
+ }
934
+
935
+ // (8d) Family-honesty marker integrity (A1/C1): when the computed level rests on a
936
+ // SELF-DECLARED cross-family claim (familyUnverified), the stored row MUST
937
+ // carry familyUnverified: true — so a hand-edit cannot strip the
938
+ // "unverified" mark off any cross-family level to make it read like a hard pass.
939
+ // Conversely, familyUnverified must NOT be set on a row the computation does
940
+ // not flag (a non-cross-family level), so a row cannot
941
+ // falsely advertise an unverified caveat it has not earned either way. Only
942
+ // receipts whose stored level matches the computed level are checked here
943
+ // (an over-claimed level is already reported by 8c, and re-flagging its
944
+ // marker would be noise).
945
+ export function checkFamilyUnverifiedMarker(ctx) {
946
+ const errors = [];
947
+ for (const receipt of ctx.receipts) {
948
+ if (!GUARD_LEVELS.includes(receipt.guardLevel)) continue;
949
+ if (receipt.reviewMode !== undefined && !REVIEW_MODES.includes(receipt.reviewMode)) continue;
950
+ // Same shared re-computation as 8c (single source of the family-verification truth).
951
+ const computed = computeReceiptGuardLevel(receipt, ctx.evidence, ctx.runs);
952
+ if (receipt.guardLevel !== computed.level) continue; // over/under-claim handled by 8c
953
+ const storedUnverified = receipt.familyUnverified === true;
954
+ if (computed.familyUnverified && !storedUnverified) {
955
+ errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} is a self-declared cross-family level (${computed.level}) but is missing the familyUnverified: true marker (the cross-family family is unverified and must be marked so)`);
956
+ } else if (!computed.familyUnverified && storedUnverified) {
957
+ errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} carries familyUnverified: true but its computed level (${computed.level}) is not an unverified cross-family level (the marker is unwarranted)`);
958
+ }
959
+ }
960
+ return errors;
961
+ }
962
+
963
+ // (9) Owner acceptance integrity (P2): an "accepted" pass_with_risk receipt
964
+ // MUST carry the owner-acceptance marker (ownerAccepted: true). A risk
965
+ // receipt exists precisely because a human accepted the named residual
966
+ // risk; an accepted risk receipt with no owner mark is an unsupported
967
+ // acceptance. Same ownerAcceptanceError predicate the CLI accept path uses.
968
+ export function checkOwnerAcceptanceMarker(ctx) {
969
+ const errors = [];
970
+ for (const receipt of ctx.receipts) {
971
+ const acceptanceError = ownerAcceptanceError(receipt);
972
+ if (acceptanceError) {
973
+ errors.push(`ledger receipts.jsonl ${acceptanceError}`);
974
+ }
975
+ }
976
+ return errors;
977
+ }
978
+
979
+ // (10) Receipt status reverse-consistency (P2 evidence-gate, REJECT follow-up):
980
+ // a receipt's status is not just a free-form label — it is DERIVED from
981
+ // (verdict, owned-evidence, ownerAccepted) by the SAME receiptStatusFor
982
+ // rule the writer applies. Before this check, a hand-planted row could
983
+ // carry a status that contradicts its own verdict (e.g. verdict "reject"
984
+ // with status "accepted", or "pass_with_risk" written "accepted" with no
985
+ // owner sign-off) and slip past as long as it cited some evidence. Here we
986
+ // (a) require status to be a legal enum value, then (b) RE-COMPUTE the
987
+ // expected status and flag any receipt whose stored status differs — so
988
+ // the status can never claim more (or less) than the rule grants.
989
+ export function checkReceiptStatusReverse(ctx) {
990
+ const errors = [];
991
+ for (const receipt of ctx.receipts) {
992
+ // (a) status must be one of the three legal values.
993
+ if (!RECEIPT_STATUSES.includes(receipt.status)) {
994
+ errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} has missing/illegal status "${receipt.status}" (allowed: ${RECEIPT_STATUSES.join(", ")})`);
995
+ continue; // a bogus status cannot be meaningfully reverse-computed.
996
+ }
997
+ // (b) reverse-compute the status the rule would assign and compare. A verdict
998
+ // outside the enum is already reported by check 8's predicate; receiptStatusFor
999
+ // treats any non-accepting verdict as "rejected", so we only reverse-check
1000
+ // receipts whose verdict is a known value to avoid a confusing double report.
1001
+ if (!RECEIPT_VERDICTS.includes(receipt.verdict)) continue;
1002
+ const evidenceIds = Array.isArray(receipt.evidenceIds) ? receipt.evidenceIds : [];
1003
+ // Re-derive status from owned regular evidence — the SAME basis the CLI writer
1004
+ // and `receipt accept` use, so the three never disagree. (Under the L4 rule a
1005
+ // clean pass always carries a cited cross_family_guard row in evidenceIds, so a
1006
+ // top-level pass reverse-computes to "accepted", never a contradictory "pending".)
1007
+ const owned = ownedEvidenceIds(evidenceIds, receipt.taskId, ctx.evidence);
1008
+ const expected = receiptStatusFor(receipt.verdict, owned, receipt.ownerAccepted === true);
1009
+ if (receipt.status !== expected) {
1010
+ errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} has status "${receipt.status}" but verdict "${receipt.verdict}" with ${owned.length} own-task evidence and ownerAccepted=${receipt.ownerAccepted === true} computes to "${expected}" (status contradicts the rule)`);
1011
+ }
1012
+ }
1013
+ return errors;
1014
+ }
1015
+
1016
+ // (11) Learning-ledger record shape (P4): each learning row must carry a legal
1017
+ // type (harvest/profile), a non-empty content, and a legal status
1018
+ // (proposed/confirmed/edited/dropped). Uses the SAME learningRecordError
1019
+ // predicate the CLI writer (learning add / confirm / edit / drop) applies,
1020
+ // so a row the writer would refuse is flagged read-time too — and a
1021
+ // hand-edited ledger that drifts off the enum (a bogus type, a typo'd
1022
+ // status, an emptied content) is caught instead of silently feeding the
1023
+ // status recall a malformed preference. (P1 had id-integrity only; this is
1024
+ // the P4 type/status/content contract.)
1025
+ export function checkLearningRecordShape(ctx) {
1026
+ const errors = [];
1027
+ for (const row of ctx.learning) {
1028
+ const shapeError = learningRecordError(row);
1029
+ if (shapeError) {
1030
+ errors.push(`ledger learning-ledger.jsonl learning ${row.id ?? "(no id)"}: ${shapeError}`);
1031
+ }
1032
+ }
1033
+ return errors;
1034
+ }
1035
+
1036
+ // (12) Orphan learning row: a learning row MAY be unbound (no taskId), but a
1037
+ // taskId that is present must name a real task — a learning row pointing at
1038
+ // a non-existent task is a dangling binding, the same standard the evidence
1039
+ // orphan check (check 2) holds. Rows with no taskId are skipped (legitimate
1040
+ // cross-task lessons).
1041
+ export function checkOrphanLearning(ctx) {
1042
+ const errors = [];
1043
+ for (const row of ctx.learning) {
1044
+ if (row.taskId !== undefined && !ctx.taskIds.has(row.taskId)) {
1045
+ errors.push(`ledger learning-ledger.jsonl learning ${row.id ?? "(no id)"} references unknown task "${row.taskId}" (orphan)`);
1046
+ }
1047
+ }
1048
+ return errors;
1049
+ }
1050
+
1051
+ // Run-layer ledger validation. Split out for readability; called from
1052
+ // deepValidate so its checks land in the same `errors` list (CLI check + the
1053
+ // contract validator both keep failing on a degraded ledger with no interface
1054
+ // change). Each `tick()` records one performed check.
1055
+ //
1056
+ // This is now a thin ORCHESTRATOR: it builds the parsed-ledger context once,
1057
+ // then runs each numbered check (above) IN THE ORIGINAL ORDER, ticking once per
1058
+ // check and appending the check's returned errors. The parse pass (check 1) and
1059
+ // the per-ledger id pass (check 1b) each tick once per ledger file, exactly as
1060
+ // the original loops did, so the tick count and error order are unchanged.
1061
+ function validateLedgers(workspace, errors, tick) {
1062
+ const stateDir = path.join(workspace, "state");
1063
+ const ctx = buildLedgerContext(stateDir);
1064
+
1065
+ // (1) Bad JSONL — one tick + append per ledger file (preserves order/count).
1066
+ for (const key of ctx.LEDGER_KEYS) {
1067
+ tick();
1068
+ for (const error of ctx.parseErrorsByKey[key]) errors.push(error);
1069
+ }
1070
+
1071
+ // (1b) Per-ledger id integrity — one tick + append per ledger file.
1072
+ for (const key of ctx.LEDGER_KEYS) {
1073
+ tick();
1074
+ for (const error of ctx.idErrorsByKey[key]) errors.push(error);
1075
+ }
1076
+
1077
+ // Each remaining numbered check: tick once, append its returned errors. The
1078
+ // call order below is the EXACT order the inline checks ran in originally.
1079
+ for (const check of [
1080
+ checkTaskStatusEnum, // (3)
1081
+ checkDoneRequiresEvidence, // (5)
1082
+ checkOrphanEvidence, // (2)
1083
+ checkSpecialEvidenceStructure, // (2b)
1084
+ checkRerunRunReconcile, // (2c)
1085
+ checkReceiptEvidenceRefs, // (4)
1086
+ checkReceiptTaskRef, // (4b)
1087
+ checkReceiptCrossTaskEvidence, // (4c)
1088
+ checkRerunEvidenceIdRefs, // (4d)
1089
+ checkAcceptedReceiptHasEvidence, // (6)
1090
+ checkGuardLevelEnum, // (7)
1091
+ checkVerdictGuardLevelConsistency, // (8)
1092
+ checkReviewModeEnum, // (8b)
1093
+ checkReceiptComputedLevel, // (8c)
1094
+ checkFamilyUnverifiedMarker, // (8d)
1095
+ checkOwnerAcceptanceMarker, // (9)
1096
+ checkReceiptStatusReverse, // (10)
1097
+ checkLearningRecordShape, // (11)
1098
+ checkOrphanLearning // (12)
1099
+ ]) {
1100
+ tick();
1101
+ for (const error of check(ctx)) errors.push(error);
1102
+ }
1103
+ }
1104
+
1105
+ export function validateWorkspace(workspace) {
1106
+ const errors = [];
1107
+ const warnings = [];
1108
+ let checks = 0;
1109
+
1110
+ requireDir(errors, workspace);
1111
+ const startHere = requireFile(errors, workspace, "START_HERE.md");
1112
+ checks += 1;
1113
+ includesAll(errors, "START_HERE.md", startHere, ["10-minute path", "30-minute path", "60-minute path", "guard", "handoff", "harvest"]);
1114
+ if (/doctor/i.test(startHere.slice(0, 1200))) {
1115
+ errors.push("START_HERE first screen must not lead with doctor");
1116
+ }
1117
+
1118
+ for (const dir of requiredWorkspaceDirs) {
1119
+ requireDir(errors, workspace, dir);
1120
+ checks += 1;
1121
+ }
1122
+
1123
+ for (const layer of ["profile", "context", "acceptance", "guard", "handoff", "harvest"]) {
1124
+ for (const file of ["README.md", "PROMPT.md", "TEMPLATE.md", "EXAMPLE.synthetic.md", "FAILURE_MODES.md"]) {
1125
+ requireFile(errors, workspace, layer, file);
1126
+ checks += 1;
1127
+ }
1128
+ const combined = ["README.md", "PROMPT.md", "TEMPLATE.md", "EXAMPLE.synthetic.md", "FAILURE_MODES.md"]
1129
+ .map((file) => (exists(workspace, layer, file) ? read(path.join(workspace, layer, file)) : ""))
1130
+ .join("\n");
1131
+ includesAll(errors, layer, combined, [
1132
+ "Purpose",
1133
+ "When to use",
1134
+ "Input shape",
1135
+ "Output shape",
1136
+ "Copy-paste prompt",
1137
+ "Blank template",
1138
+ "Filled synthetic example",
1139
+ "Common failure modes",
1140
+ "Claude Code",
1141
+ "Codex",
1142
+ "Cursor",
1143
+ "Windsurf",
1144
+ "Copilot",
1145
+ "Cline"
1146
+ ]);
1147
+ }
1148
+
1149
+ // Profile candidate buffer (P0-5): a proposed preference must pass through
1150
+ // profile/CANDIDATES.md before it can graduate into the long-term profile.
1151
+ // Require the file and its four-state machine so a degraded workspace that
1152
+ // drops the buffer (and lets unreviewed guesses edit the profile) fails loudly.
1153
+ const candidates = requireFile(errors, workspace, "profile", "CANDIDATES.md");
1154
+ checks += 1;
1155
+ includesAll(errors, "profile/CANDIDATES.md", candidates, [
1156
+ "State machine",
1157
+ "proposed",
1158
+ "confirmed",
1159
+ "edited",
1160
+ "dropped"
1161
+ ]);
1162
+
1163
+ for (const mechanism of requiredMechanismIds) {
1164
+ for (const file of ["README.md", "PROMPT.md", "TEMPLATE.md", "EXAMPLE.synthetic.md", "FAILURE_MODES.md"]) {
1165
+ const content = requireFile(errors, workspace, "mechanisms", mechanism, file);
1166
+ checks += 1;
1167
+ includesAll(errors, `${mechanism}/${file}`, content, ["AI Collaboration Open System", "local-first", "public-safe"]);
1168
+ if (/TBD|TODO|placeholder/i.test(content)) errors.push(`${mechanism}/${file} contains placeholder text`);
1169
+ }
1170
+ }
1171
+
1172
+ for (const file of ["README.md", "owner-controller.md", "executor.md", "system-guardian.md", "scout.md", "harvester.md"]) {
1173
+ requireFile(errors, workspace, "roles", file);
1174
+ checks += 1;
1175
+ }
1176
+
1177
+ for (const file of ["README.md", "execute.md", "review.md", "handoff.md", "harvest.md"]) {
1178
+ requireFile(errors, workspace, "modes", file);
1179
+ checks += 1;
1180
+ }
1181
+
1182
+ for (const file of ["README.md", "run-a-first-loop.md", "connect-a-tool.md", "review-a-half-product.md"]) {
1183
+ requireFile(errors, workspace, "cookbook", file);
1184
+ checks += 1;
1185
+ }
1186
+
1187
+ for (const file of ["CURRENT_STATE.md", "TASK_LOG.md", "DECISIONS.md"]) {
1188
+ requireFile(errors, workspace, "state", file);
1189
+ checks += 1;
1190
+ }
1191
+
1192
+ // P1 run-layer ledgers must exist (their content integrity is checked in the
1193
+ // deep ledger pass; here we only assert presence so a workspace that dropped a
1194
+ // ledger fails the base check too).
1195
+ for (const file of ["tasks.jsonl", "evidence.jsonl", "runs.jsonl", "receipts.jsonl", "learning-ledger.jsonl"]) {
1196
+ requireFile(errors, workspace, "state", file);
1197
+ checks += 1;
1198
+ }
1199
+
1200
+ for (const file of requiredPromptFiles) {
1201
+ const content = requireFile(errors, workspace, "prompts", file);
1202
+ checks += 1;
1203
+ includesAll(errors, file, content, ["Copy-paste prompt", "Expected output"]);
1204
+ if (/TBD|TODO|placeholder/i.test(content)) errors.push(`${file} contains placeholder text`);
1205
+ }
1206
+
1207
+ for (const skill of requiredSkillIds) {
1208
+ const content = requireFile(errors, workspace, "skills", skill, "SKILL.md");
1209
+ checks += 1;
1210
+ includesAll(errors, `${skill} skill`, content, ["name:", "When to use", "Output", "Safety"]);
1211
+ }
1212
+
1213
+ const shared = requireFile(errors, workspace, "adapters", "SHARED_CORE_CONTRACT.md");
1214
+ includesAll(errors, "shared core", shared, ["Profile", "Context", "Acceptance", "Guard", "Handoff", "Harvest"]);
1215
+ for (const adapter of requiredAdapterIds) {
1216
+ const content = requireFile(errors, workspace, "adapters", adapter, "ADAPTER.md");
1217
+ checks += 1;
1218
+ includesAll(errors, `${adapter} adapter`, content, ["SHARED_CORE_CONTRACT.md", "profile", "context", "acceptance", "guard", "handoff", "harvest"]);
1219
+ }
1220
+
1221
+ const caseDirs = exists(workspace, "examples")
1222
+ ? readdirSync(path.join(workspace, "examples"), { withFileTypes: true }).filter((entry) => entry.isDirectory()).map((entry) => entry.name)
1223
+ : [];
1224
+ for (const caseId of requiredCaseIds) {
1225
+ if (!caseDirs.includes(caseId)) errors.push(`missing synthetic case ${caseId}`);
1226
+ const content = requireFile(errors, workspace, "examples", caseId, "CASE.md");
1227
+ checks += 1;
1228
+ includesAll(errors, caseId, content, [
1229
+ "Confusing raw input",
1230
+ "Likely single-agent failure",
1231
+ "AI Collaboration OS process",
1232
+ "Context package",
1233
+ "Acceptance card",
1234
+ "Handoff note",
1235
+ "Harvest seed",
1236
+ "Before/after comparison",
1237
+ "Messy starting point",
1238
+ "Workspace setup",
1239
+ "Profile/context",
1240
+ "Acceptance",
1241
+ "Execution prompt",
1242
+ "Guard review",
1243
+ "Handoff",
1244
+ "Harvest",
1245
+ "What changes compared with a single raw AI chat"
1246
+ ]);
1247
+ for (const artifact of ["context-package.md", "acceptance-card.md", "execution-prompt.md", "guard-review.md", "handoff-note.md", "harvest-seed.md"]) {
1248
+ requireFile(errors, workspace, "examples", caseId, "artifacts", artifact);
1249
+ checks += 1;
1250
+ }
1251
+ }
1252
+
1253
+ for (const file of ["PRIVACY.md", "COMMERCIAL_BOUNDARY.md", "REDACTION_CHECKLIST.md"]) {
1254
+ requireFile(errors, workspace, "privacy", file);
1255
+ checks += 1;
1256
+ }
1257
+
1258
+ for (const file of ["10-minute-your-task.md", "10-minute.md", "30-minute.md", "60-minute.md", "synthetic-loop-transcript.md"]) {
1259
+ const content = requireFile(errors, workspace, "walkthroughs", file);
1260
+ checks += 1;
1261
+ includesAll(errors, file, content, ["Goal", "Expected"]);
1262
+ }
1263
+
1264
+ // The real first-run walkthrough (10-minute-your-task.md) is the main path the
1265
+ // init/guide/help all point users to, so it is held to a higher bar than the
1266
+ // generic "Goal/Expected" floor: its loop must actually carry the hardened
1267
+ // evidence chain (P0-4) and the profile-candidate buffer (P0-5), not a thin
1268
+ // "report what you did" + "drop it into the profile". If the walkthrough is
1269
+ // gutted back to the soft version, these anchors disappear and validation fails.
1270
+ const yourTask = requireFile(errors, workspace, "walkthroughs", "10-minute-your-task.md");
1271
+ checks += 1;
1272
+ includesAll(errors, "10-minute-your-task.md", yourTask, [
1273
+ "Evidence Pack", // Step 2 must produce a structured evidence pack
1274
+ // The Evidence Pack must keep its six concrete segments, not collapse back to
1275
+ // a vague "report what you changed". Each segment is what makes the Step 3
1276
+ // re-check checkable; dropping one silently re-softens the loop.
1277
+ "Changed files / diff", // segment 1: what changed
1278
+ "Commands run", // segment 2: how it was verified
1279
+ "Command output summary", // segment 3: real output, not paraphrase
1280
+ "exit code", // segment 4: exit codes (0 = passed)
1281
+ "Acceptance mapping", // segment 5: AC -> PASS/FAIL/NOT-VERIFIED
1282
+ "Not verified", // segment 6: what could not be proven
1283
+ "INSUFFICIENT_EVIDENCE", // Step 3 reviewer verdict when evidence is absent/thin
1284
+ "REJECT", // Step 3 verdict when an evidence-grounded hard defect exists
1285
+ "acceptance", // re-check maps evidence to acceptance criteria
1286
+ "CANDIDATES\\.md", // Step 4 buffers profile candidates instead of dropping them in
1287
+ "proposed", // ...via the proposed/confirmed/edited/dropped state machine
1288
+ "confirmed",
1289
+ "dropped"
1290
+ ]);
1291
+ // Guard the specific regression P0-5 fixed: candidates must not be dropped
1292
+ // straight into the long-term profile dir.
1293
+ if (/drop it into\s+`?\.\.\/profile\/`?[^C]/i.test(yourTask)) {
1294
+ errors.push("10-minute-your-task.md still drops profile candidates straight into ../profile/ (must buffer in CANDIDATES.md first)");
1295
+ }
1296
+
1297
+ // Deep structural validation (P2): goes past presence + keyword into the
1298
+ // substance / integrity of the workspace. Failures are appended to the same
1299
+ // `errors` list so CLI `check` and the contract validator keep failing on a
1300
+ // degraded workspace without any interface change; advisory findings go to
1301
+ // `warnings`.
1302
+ const counters = { deepChecks: 0 };
1303
+ deepValidate(workspace, errors, warnings, counters);
1304
+ checks += counters.deepChecks;
1305
+
1306
+ return { ok: errors.length === 0, errors, warnings, checks, deepChecks: counters.deepChecks };
1307
+ }