ai-collab-open-system 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. package/.aict/START_HERE.md +127 -0
  2. package/.aict/WORKSPACE_MANIFEST.json +91 -0
  3. package/.aict/acceptance/EXAMPLE.synthetic.md +49 -0
  4. package/.aict/acceptance/FAILURE_MODES.md +40 -0
  5. package/.aict/acceptance/PROMPT.md +47 -0
  6. package/.aict/acceptance/README.md +44 -0
  7. package/.aict/acceptance/TEMPLATE.md +57 -0
  8. package/.aict/adapters/SHARED_CORE_CONTRACT.md +106 -0
  9. package/.aict/adapters/claude-code/ADAPTER.md +28 -0
  10. package/.aict/adapters/cline/ADAPTER.md +28 -0
  11. package/.aict/adapters/codex/ADAPTER.md +28 -0
  12. package/.aict/adapters/copilot/ADAPTER.md +28 -0
  13. package/.aict/adapters/cursor/ADAPTER.md +28 -0
  14. package/.aict/adapters/windsurf/ADAPTER.md +28 -0
  15. package/.aict/context/EXAMPLE.synthetic.md +53 -0
  16. package/.aict/context/FAILURE_MODES.md +40 -0
  17. package/.aict/context/PROMPT.md +47 -0
  18. package/.aict/context/README.md +44 -0
  19. package/.aict/context/TEMPLATE.md +63 -0
  20. package/.aict/cookbook/README.md +8 -0
  21. package/.aict/cookbook/bridge-to-a-second-family.md +103 -0
  22. package/.aict/cookbook/connect-a-tool.md +67 -0
  23. package/.aict/cookbook/review-a-half-product.md +79 -0
  24. package/.aict/cookbook/run-a-first-loop.md +81 -0
  25. package/.aict/examples/README.md +21 -0
  26. package/.aict/examples/ai-coding-long-task/CASE.md +161 -0
  27. package/.aict/examples/ai-coding-long-task/artifacts/acceptance-card.md +36 -0
  28. package/.aict/examples/ai-coding-long-task/artifacts/context-package.md +30 -0
  29. package/.aict/examples/ai-coding-long-task/artifacts/execution-prompt.md +30 -0
  30. package/.aict/examples/ai-coding-long-task/artifacts/first-ai-output.md +109 -0
  31. package/.aict/examples/ai-coding-long-task/artifacts/guard-review.md +40 -0
  32. package/.aict/examples/ai-coding-long-task/artifacts/handoff-note.md +28 -0
  33. package/.aict/examples/ai-coding-long-task/artifacts/harvest-seed.md +28 -0
  34. package/.aict/examples/ai-coding-long-task/artifacts/revised-output.md +62 -0
  35. package/.aict/examples/content-production-harvest/CASE.md +87 -0
  36. package/.aict/examples/content-production-harvest/artifacts/acceptance-card.md +28 -0
  37. package/.aict/examples/content-production-harvest/artifacts/context-package.md +28 -0
  38. package/.aict/examples/content-production-harvest/artifacts/execution-prompt.md +30 -0
  39. package/.aict/examples/content-production-harvest/artifacts/guard-review.md +28 -0
  40. package/.aict/examples/content-production-harvest/artifacts/handoff-note.md +28 -0
  41. package/.aict/examples/content-production-harvest/artifacts/harvest-seed.md +28 -0
  42. package/.aict/examples/multi-tool-collaboration/CASE.md +87 -0
  43. package/.aict/examples/multi-tool-collaboration/artifacts/acceptance-card.md +28 -0
  44. package/.aict/examples/multi-tool-collaboration/artifacts/context-package.md +28 -0
  45. package/.aict/examples/multi-tool-collaboration/artifacts/execution-prompt.md +30 -0
  46. package/.aict/examples/multi-tool-collaboration/artifacts/guard-review.md +28 -0
  47. package/.aict/examples/multi-tool-collaboration/artifacts/handoff-note.md +28 -0
  48. package/.aict/examples/multi-tool-collaboration/artifacts/harvest-seed.md +28 -0
  49. package/.aict/examples/personal-judgment-growth-assistant/CASE.md +87 -0
  50. package/.aict/examples/personal-judgment-growth-assistant/artifacts/acceptance-card.md +28 -0
  51. package/.aict/examples/personal-judgment-growth-assistant/artifacts/context-package.md +28 -0
  52. package/.aict/examples/personal-judgment-growth-assistant/artifacts/execution-prompt.md +30 -0
  53. package/.aict/examples/personal-judgment-growth-assistant/artifacts/guard-review.md +28 -0
  54. package/.aict/examples/personal-judgment-growth-assistant/artifacts/handoff-note.md +28 -0
  55. package/.aict/examples/personal-judgment-growth-assistant/artifacts/harvest-seed.md +28 -0
  56. package/.aict/examples/research-knowledge-synthesis/CASE.md +87 -0
  57. package/.aict/examples/research-knowledge-synthesis/artifacts/acceptance-card.md +28 -0
  58. package/.aict/examples/research-knowledge-synthesis/artifacts/context-package.md +28 -0
  59. package/.aict/examples/research-knowledge-synthesis/artifacts/execution-prompt.md +30 -0
  60. package/.aict/examples/research-knowledge-synthesis/artifacts/guard-review.md +28 -0
  61. package/.aict/examples/research-knowledge-synthesis/artifacts/handoff-note.md +28 -0
  62. package/.aict/examples/research-knowledge-synthesis/artifacts/harvest-seed.md +28 -0
  63. package/.aict/guard/EXAMPLE.synthetic.md +51 -0
  64. package/.aict/guard/FAILURE_MODES.md +40 -0
  65. package/.aict/guard/PROMPT.md +47 -0
  66. package/.aict/guard/README.md +44 -0
  67. package/.aict/guard/TEMPLATE.md +60 -0
  68. package/.aict/handoff/EXAMPLE.synthetic.md +51 -0
  69. package/.aict/handoff/FAILURE_MODES.md +40 -0
  70. package/.aict/handoff/PROMPT.md +47 -0
  71. package/.aict/handoff/README.md +44 -0
  72. package/.aict/handoff/TEMPLATE.md +60 -0
  73. package/.aict/harvest/EXAMPLE.synthetic.md +51 -0
  74. package/.aict/harvest/FAILURE_MODES.md +40 -0
  75. package/.aict/harvest/PROMPT.md +47 -0
  76. package/.aict/harvest/README.md +44 -0
  77. package/.aict/harvest/TEMPLATE.md +60 -0
  78. package/.aict/mechanisms/README.md +34 -0
  79. package/.aict/mechanisms/anti-drift-partner/EXAMPLE.synthetic.md +46 -0
  80. package/.aict/mechanisms/anti-drift-partner/FAILURE_MODES.md +25 -0
  81. package/.aict/mechanisms/anti-drift-partner/PROMPT.md +75 -0
  82. package/.aict/mechanisms/anti-drift-partner/README.md +82 -0
  83. package/.aict/mechanisms/anti-drift-partner/TEMPLATE.md +74 -0
  84. package/.aict/mechanisms/blind-spot-scan/EXAMPLE.synthetic.md +39 -0
  85. package/.aict/mechanisms/blind-spot-scan/FAILURE_MODES.md +25 -0
  86. package/.aict/mechanisms/blind-spot-scan/PROMPT.md +72 -0
  87. package/.aict/mechanisms/blind-spot-scan/README.md +79 -0
  88. package/.aict/mechanisms/blind-spot-scan/TEMPLATE.md +70 -0
  89. package/.aict/mechanisms/collaboration-coach/EXAMPLE.synthetic.md +40 -0
  90. package/.aict/mechanisms/collaboration-coach/FAILURE_MODES.md +25 -0
  91. package/.aict/mechanisms/collaboration-coach/PROMPT.md +72 -0
  92. package/.aict/mechanisms/collaboration-coach/README.md +79 -0
  93. package/.aict/mechanisms/collaboration-coach/TEMPLATE.md +61 -0
  94. package/.aict/mechanisms/do-not-handle-yet/EXAMPLE.synthetic.md +15 -0
  95. package/.aict/mechanisms/do-not-handle-yet/FAILURE_MODES.md +16 -0
  96. package/.aict/mechanisms/do-not-handle-yet/PROMPT.md +41 -0
  97. package/.aict/mechanisms/do-not-handle-yet/README.md +30 -0
  98. package/.aict/mechanisms/do-not-handle-yet/TEMPLATE.md +38 -0
  99. package/.aict/mechanisms/dual-guard/EXAMPLE.synthetic.md +54 -0
  100. package/.aict/mechanisms/dual-guard/FAILURE_MODES.md +25 -0
  101. package/.aict/mechanisms/dual-guard/PROMPT.md +76 -0
  102. package/.aict/mechanisms/dual-guard/README.md +81 -0
  103. package/.aict/mechanisms/dual-guard/TEMPLATE.md +73 -0
  104. package/.aict/mechanisms/feedback-absorption-ledger/EXAMPLE.synthetic.md +49 -0
  105. package/.aict/mechanisms/feedback-absorption-ledger/FAILURE_MODES.md +25 -0
  106. package/.aict/mechanisms/feedback-absorption-ledger/PROMPT.md +74 -0
  107. package/.aict/mechanisms/feedback-absorption-ledger/README.md +81 -0
  108. package/.aict/mechanisms/feedback-absorption-ledger/TEMPLATE.md +69 -0
  109. package/.aict/mechanisms/half-product-review/EXAMPLE.synthetic.md +15 -0
  110. package/.aict/mechanisms/half-product-review/FAILURE_MODES.md +16 -0
  111. package/.aict/mechanisms/half-product-review/PROMPT.md +41 -0
  112. package/.aict/mechanisms/half-product-review/README.md +30 -0
  113. package/.aict/mechanisms/half-product-review/TEMPLATE.md +38 -0
  114. package/.aict/mechanisms/handoff-abc/EXAMPLE.synthetic.md +47 -0
  115. package/.aict/mechanisms/handoff-abc/FAILURE_MODES.md +25 -0
  116. package/.aict/mechanisms/handoff-abc/PROMPT.md +75 -0
  117. package/.aict/mechanisms/handoff-abc/README.md +82 -0
  118. package/.aict/mechanisms/handoff-abc/TEMPLATE.md +60 -0
  119. package/.aict/mechanisms/harvest-and-erc/EXAMPLE.synthetic.md +43 -0
  120. package/.aict/mechanisms/harvest-and-erc/FAILURE_MODES.md +25 -0
  121. package/.aict/mechanisms/harvest-and-erc/PROMPT.md +74 -0
  122. package/.aict/mechanisms/harvest-and-erc/README.md +81 -0
  123. package/.aict/mechanisms/harvest-and-erc/TEMPLATE.md +60 -0
  124. package/.aict/mechanisms/honest-calibration/EXAMPLE.synthetic.md +43 -0
  125. package/.aict/mechanisms/honest-calibration/FAILURE_MODES.md +25 -0
  126. package/.aict/mechanisms/honest-calibration/PROMPT.md +74 -0
  127. package/.aict/mechanisms/honest-calibration/README.md +81 -0
  128. package/.aict/mechanisms/honest-calibration/TEMPLATE.md +66 -0
  129. package/.aict/mechanisms/one-click-dispatch/EXAMPLE.synthetic.md +15 -0
  130. package/.aict/mechanisms/one-click-dispatch/FAILURE_MODES.md +16 -0
  131. package/.aict/mechanisms/one-click-dispatch/PROMPT.md +41 -0
  132. package/.aict/mechanisms/one-click-dispatch/README.md +30 -0
  133. package/.aict/mechanisms/one-click-dispatch/TEMPLATE.md +38 -0
  134. package/.aict/mechanisms/plain-language-first-screen/EXAMPLE.synthetic.md +15 -0
  135. package/.aict/mechanisms/plain-language-first-screen/FAILURE_MODES.md +16 -0
  136. package/.aict/mechanisms/plain-language-first-screen/PROMPT.md +41 -0
  137. package/.aict/mechanisms/plain-language-first-screen/README.md +30 -0
  138. package/.aict/mechanisms/plain-language-first-screen/TEMPLATE.md +38 -0
  139. package/.aict/mechanisms/root-cause-brake/EXAMPLE.synthetic.md +55 -0
  140. package/.aict/mechanisms/root-cause-brake/FAILURE_MODES.md +25 -0
  141. package/.aict/mechanisms/root-cause-brake/PROMPT.md +73 -0
  142. package/.aict/mechanisms/root-cause-brake/README.md +79 -0
  143. package/.aict/mechanisms/root-cause-brake/TEMPLATE.md +74 -0
  144. package/.aict/mechanisms/scout-review-controller/EXAMPLE.synthetic.md +15 -0
  145. package/.aict/mechanisms/scout-review-controller/FAILURE_MODES.md +16 -0
  146. package/.aict/mechanisms/scout-review-controller/PROMPT.md +41 -0
  147. package/.aict/mechanisms/scout-review-controller/README.md +30 -0
  148. package/.aict/mechanisms/scout-review-controller/TEMPLATE.md +38 -0
  149. package/.aict/mechanisms/single-tool-guard/EXAMPLE.synthetic.md +54 -0
  150. package/.aict/mechanisms/single-tool-guard/FAILURE_MODES.md +25 -0
  151. package/.aict/mechanisms/single-tool-guard/PROMPT.md +76 -0
  152. package/.aict/mechanisms/single-tool-guard/README.md +83 -0
  153. package/.aict/mechanisms/single-tool-guard/TEMPLATE.md +75 -0
  154. package/.aict/mechanisms/task-splitting/EXAMPLE.synthetic.md +53 -0
  155. package/.aict/mechanisms/task-splitting/FAILURE_MODES.md +25 -0
  156. package/.aict/mechanisms/task-splitting/PROMPT.md +72 -0
  157. package/.aict/mechanisms/task-splitting/README.md +79 -0
  158. package/.aict/mechanisms/task-splitting/TEMPLATE.md +76 -0
  159. package/.aict/modes/README.md +11 -0
  160. package/.aict/modes/execute.md +31 -0
  161. package/.aict/modes/handoff.md +29 -0
  162. package/.aict/modes/harvest.md +30 -0
  163. package/.aict/modes/review.md +28 -0
  164. package/.aict/modes/shape.md +34 -0
  165. package/.aict/privacy/COMMERCIAL_BOUNDARY.md +34 -0
  166. package/.aict/privacy/PRIVACY.md +36 -0
  167. package/.aict/privacy/REDACTION_CHECKLIST.md +12 -0
  168. package/.aict/profile/CANDIDATES.md +44 -0
  169. package/.aict/profile/EXAMPLE.synthetic.md +49 -0
  170. package/.aict/profile/FAILURE_MODES.md +40 -0
  171. package/.aict/profile/PROMPT.md +47 -0
  172. package/.aict/profile/README.md +44 -0
  173. package/.aict/profile/TEMPLATE.md +57 -0
  174. package/.aict/prompts/acceptance-definition.md +109 -0
  175. package/.aict/prompts/guard-review.md +116 -0
  176. package/.aict/prompts/handoff-generation.md +110 -0
  177. package/.aict/prompts/harvest-extraction.md +110 -0
  178. package/.aict/prompts/mode-switching.md +66 -0
  179. package/.aict/prompts/profile-creation.md +66 -0
  180. package/.aict/prompts/profile-refinement.md +66 -0
  181. package/.aict/prompts/project-context-packaging.md +113 -0
  182. package/.aict/prompts/red-team-challenge.md +106 -0
  183. package/.aict/prompts/rule-update-proposal.md +114 -0
  184. package/.aict/prompts/workflow-reset.md +109 -0
  185. package/.aict/roles/README.md +18 -0
  186. package/.aict/roles/executor.md +34 -0
  187. package/.aict/roles/harvester.md +33 -0
  188. package/.aict/roles/owner-controller.md +38 -0
  189. package/.aict/roles/scout.md +33 -0
  190. package/.aict/roles/supervisor.md +34 -0
  191. package/.aict/roles/system-guardian.md +34 -0
  192. package/.aict/skills/acceptance/SKILL.md +43 -0
  193. package/.aict/skills/context/SKILL.md +44 -0
  194. package/.aict/skills/evidence-pack/SKILL.md +42 -0
  195. package/.aict/skills/guard/SKILL.md +46 -0
  196. package/.aict/skills/handoff/SKILL.md +44 -0
  197. package/.aict/skills/harvest/SKILL.md +44 -0
  198. package/.aict/skills/mode-switch/SKILL.md +42 -0
  199. package/.aict/skills/profile/SKILL.md +42 -0
  200. package/.aict/skills/red-team/SKILL.md +42 -0
  201. package/.aict/skills/single-tool-guard/SKILL.md +42 -0
  202. package/.aict/state/CURRENT_STATE.md +13 -0
  203. package/.aict/state/DECISIONS.md +7 -0
  204. package/.aict/state/TASK_LOG.md +7 -0
  205. package/.aict/state/evidence.jsonl +2 -0
  206. package/.aict/state/learning-ledger.jsonl +1 -0
  207. package/.aict/state/receipts.jsonl +1 -0
  208. package/.aict/state/runs.jsonl +1 -0
  209. package/.aict/state/tasks.jsonl +1 -0
  210. package/.aict/walkthroughs/10-minute-your-task.md +107 -0
  211. package/.aict/walkthroughs/10-minute.md +43 -0
  212. package/.aict/walkthroughs/30-minute.md +22 -0
  213. package/.aict/walkthroughs/60-minute.md +27 -0
  214. package/.aict/walkthroughs/synthetic-loop-transcript.md +43 -0
  215. package/CHANGELOG.md +23 -0
  216. package/CODE_OF_CONDUCT.md +20 -0
  217. package/CONTRIBUTING.md +30 -0
  218. package/KNOWN_LIMITATIONS.md +54 -0
  219. package/LICENSE +199 -0
  220. package/PRODUCT_CONTRACT.md +446 -0
  221. package/README.md +245 -0
  222. package/RELEASE_CHECKLIST.md +78 -0
  223. package/SECURITY.md +56 -0
  224. package/START_HERE.md +89 -0
  225. package/bin/ai-collab.js +2 -0
  226. package/docs/DOGFOOD.md +85 -0
  227. package/docs/FEEDBACK.md +61 -0
  228. package/docs/FIRST_EXPERIENCE_SPEC.md +32 -0
  229. package/docs/FREE_VS_PAID.md +53 -0
  230. package/docs/PUBLIC_BOUNDARY.md +36 -0
  231. package/docs/PUBLIC_MAPPING.md +178 -0
  232. package/docs/RELEASE_PRIORITY.md +23 -0
  233. package/docs/WHY_THIS_EXISTS.md +36 -0
  234. package/docs/open-system/00-start-here.md +60 -0
  235. package/docs/open-system/01-ai-collaboration-os.md +33 -0
  236. package/docs/open-system/02-six-layer-architecture.md +45 -0
  237. package/docs/open-system/03-role-system.md +33 -0
  238. package/docs/open-system/04-core-mechanisms.md +34 -0
  239. package/docs/open-system/05-failure-patterns.md +31 -0
  240. package/docs/open-system/06-how-to-adapt-to-your-workflow.md +31 -0
  241. package/package.json +69 -0
  242. package/privacy-manifest.json +78 -0
  243. package/privacy-scan.local.json.example +18 -0
  244. package/scripts/lib/forbidden-in-pack.js +55 -0
  245. package/scripts/pack-check.js +154 -0
  246. package/scripts/privacy-scan.js +487 -0
  247. package/scripts/validate-contract.js +160 -0
  248. package/src/adapters.js +590 -0
  249. package/src/bootstrap.js +1184 -0
  250. package/src/catalog.js +2723 -0
  251. package/src/cli.js +2899 -0
  252. package/src/dialogue.js +470 -0
  253. package/src/i18n.js +1034 -0
  254. package/src/ledger.js +2011 -0
  255. package/src/render.js +1381 -0
  256. package/src/sendmodel.js +452 -0
  257. package/src/validate.js +1307 -0
  258. package/src/workspace.js +1679 -0
  259. package/tests/contract.test.js +8514 -0
@@ -0,0 +1,1679 @@
1
+ import { existsSync, mkdirSync, readdirSync, renameSync, statSync, writeFileSync } from "node:fs";
2
+ import path from "node:path";
3
+ import {
4
+ adapterDefinitions,
5
+ caseDefinitions,
6
+ layerDefinitions,
7
+ mechanismDefinitions,
8
+ promptDefinitions,
9
+ skillDefinitions
10
+ } from "./catalog.js";
11
+ import {
12
+ renderAdapter,
13
+ renderCase,
14
+ renderCaseArtifact,
15
+ renderCommercialBoundary,
16
+ renderExamplesIndex,
17
+ renderLayerExample,
18
+ renderLayerFailures,
19
+ renderLayerPrompt,
20
+ renderLayerReadme,
21
+ renderLayerTemplate,
22
+ renderMechanismExample,
23
+ renderMechanismFailures,
24
+ renderMechanismPrompt,
25
+ renderMechanismReadme,
26
+ renderMechanismTemplate,
27
+ renderPrivacyDoc,
28
+ renderPrompt,
29
+ renderSharedCoreContract,
30
+ renderSkill,
31
+ renderStartHere
32
+ } from "./render.js";
33
+
34
+ export const workspaceDirName = ".aict";
35
+
36
+ export const baseCaseArtifactNames = [
37
+ "context-package.md",
38
+ "acceptance-card.md",
39
+ "execution-prompt.md",
40
+ "guard-review.md",
41
+ "handoff-note.md",
42
+ "harvest-seed.md"
43
+ ];
44
+
45
+ // Cases that carry a real first-AI-output / revised-output pair (the flagship)
46
+ // generate two extra artifacts so the false-completion-claim story is runnable.
47
+ // The order keeps first-ai-output next to the execution prompt it answers, and
48
+ // revised-output right after the guard review that triggered it.
49
+ export function caseArtifactNames(caseItem) {
50
+ const names = [
51
+ "context-package.md",
52
+ "acceptance-card.md",
53
+ "execution-prompt.md"
54
+ ];
55
+ if (caseItem.firstAiOutput) names.push("first-ai-output.md");
56
+ names.push("guard-review.md");
57
+ if (caseItem.revisedOutput) names.push("revised-output.md");
58
+ names.push("handoff-note.md", "harvest-seed.md");
59
+ return names;
60
+ }
61
+
62
+ function ensureDir(dir) {
63
+ mkdirSync(dir, { recursive: true });
64
+ }
65
+
66
+ function writeText(file, content) {
67
+ ensureDir(path.dirname(file));
68
+ writeFileSync(file, `${content.trimEnd()}\n`, "utf8");
69
+ }
70
+
71
+ function formatTimestamp(date = new Date()) {
72
+ const pad = (value) => String(value).padStart(2, "0");
73
+ return [
74
+ date.getFullYear(),
75
+ pad(date.getMonth() + 1),
76
+ pad(date.getDate())
77
+ ].join("") + "-" + [pad(date.getHours()), pad(date.getMinutes()), pad(date.getSeconds())].join("");
78
+ }
79
+
80
+ function uniqueBackupPath(targetRoot) {
81
+ const base = path.join(targetRoot, `${workspaceDirName}.backup-${formatTimestamp()}`);
82
+ if (!existsSync(base)) return base;
83
+ for (let index = 2; index < 100; index += 1) {
84
+ const candidate = `${base}-${index}`;
85
+ if (!existsSync(candidate)) return candidate;
86
+ }
87
+ throw new Error(`Could not choose a backup path for ${workspaceDirName}.`);
88
+ }
89
+
90
+ function backupExistingWorkspace(targetRoot, workspaceRoot) {
91
+ const backupPath = uniqueBackupPath(targetRoot);
92
+ renameSync(workspaceRoot, backupPath);
93
+ return backupPath;
94
+ }
95
+
96
+ // A .gitignore written INTO a user's generated .aict/ workspace by `init` (see the
97
+ // `options.gitignore` write in createWorkspace). The five append-only JSONL ledgers
98
+ // are local RUNTIME STATE — they accumulate the real task titles, evidence, and
99
+ // learning a user records as they work. Ignoring them by default means running
100
+ // `git add .` inside the user's own repo never commits private task data into
101
+ // version control. Co-locating the .gitignore with the workspace makes it work in a
102
+ // nested repo too (rules are relative to this file's dir), and keeps the static
103
+ // templates + hand-written state/*.md notes versionable. A user who WANTS to version
104
+ // their collaboration state just deletes these lines. (Already-tracked files are
105
+ // unaffected, so this never untracks a repo's own seed ledgers.)
106
+ //
107
+ // IMPORTANT — this is NOT part of workspaceFileEntries (the byte-for-byte template
108
+ // the contract validator diffs against the committed .aict). It is written ONLY on a
109
+ // real `init`, never by the template generator. That keeps the committed .aict free
110
+ // of a nested .gitignore that npm pack would otherwise honor — which would strip the
111
+ // required seed ledgers out of the published tarball.
112
+ export function workspaceGitignore() {
113
+ return [
114
+ "# AI Collaboration Open System — local workspace (generated by init).",
115
+ "#",
116
+ "# These five append-only ledgers are your LOCAL RUNTIME STATE: they accumulate the",
117
+ "# real task titles, evidence, and learning you record as you work. They are",
118
+ "# git-ignored by default so that `git add .` in your own repo never commits your",
119
+ "# private task data into version control. Delete these lines if you DO want to",
120
+ "# version your collaboration state (e.g. to share it across a team). Everything",
121
+ "# else in .aict/ (templates, prompts, skills, and your hand-written state/*.md",
122
+ "# notes) is NOT ignored — only the machine-written ledgers are.",
123
+ "state/tasks.jsonl",
124
+ "state/evidence.jsonl",
125
+ "state/runs.jsonl",
126
+ "state/receipts.jsonl",
127
+ "state/learning-ledger.jsonl"
128
+ ].join("\n");
129
+ }
130
+
131
+ function workspaceFileEntries(workspaceRoot) {
132
+ const entries = [
133
+ [path.join(workspaceRoot, "START_HERE.md"), renderStartHere()],
134
+ [path.join(workspaceRoot, "WORKSPACE_MANIFEST.json"), JSON.stringify(workspaceManifest(), null, 2)]
135
+ ];
136
+
137
+ for (const layer of layerDefinitions) {
138
+ const dir = path.join(workspaceRoot, layer.id);
139
+ entries.push(
140
+ [path.join(dir, "README.md"), renderLayerReadme(layer)],
141
+ [path.join(dir, "PROMPT.md"), renderLayerPrompt(layer)],
142
+ [path.join(dir, "TEMPLATE.md"), renderLayerTemplate(layer)],
143
+ [path.join(dir, "EXAMPLE.synthetic.md"), renderLayerExample(layer)],
144
+ [path.join(dir, "FAILURE_MODES.md"), renderLayerFailures(layer)]
145
+ );
146
+ }
147
+
148
+ entries.push([path.join(workspaceRoot, "mechanisms", "README.md"), mechanismsReadme()]);
149
+ for (const mechanism of mechanismDefinitions) {
150
+ const dir = path.join(workspaceRoot, "mechanisms", mechanism.id);
151
+ entries.push(
152
+ [path.join(dir, "README.md"), renderMechanismReadme(mechanism)],
153
+ [path.join(dir, "PROMPT.md"), renderMechanismPrompt(mechanism)],
154
+ [path.join(dir, "TEMPLATE.md"), renderMechanismTemplate(mechanism)],
155
+ [path.join(dir, "EXAMPLE.synthetic.md"), renderMechanismExample(mechanism)],
156
+ [path.join(dir, "FAILURE_MODES.md"), renderMechanismFailures(mechanism)]
157
+ );
158
+ }
159
+
160
+ entries.push(
161
+ [path.join(workspaceRoot, "roles", "README.md"), rolesReadme()],
162
+ [path.join(workspaceRoot, "roles", "owner-controller.md"), roleOwnerController()],
163
+ [path.join(workspaceRoot, "roles", "executor.md"), roleExecutor()],
164
+ [path.join(workspaceRoot, "roles", "system-guardian.md"), roleSystemGuardian()],
165
+ [path.join(workspaceRoot, "roles", "scout.md"), roleScout()],
166
+ [path.join(workspaceRoot, "roles", "supervisor.md"), roleSupervisor()],
167
+ [path.join(workspaceRoot, "roles", "harvester.md"), roleHarvester()],
168
+ [path.join(workspaceRoot, "modes", "README.md"), modesReadme()],
169
+ [path.join(workspaceRoot, "modes", "execute.md"), modeExecute()],
170
+ [path.join(workspaceRoot, "modes", "shape.md"), modeShape()],
171
+ [path.join(workspaceRoot, "modes", "review.md"), modeReview()],
172
+ [path.join(workspaceRoot, "modes", "handoff.md"), modeHandoff()],
173
+ [path.join(workspaceRoot, "modes", "harvest.md"), modeHarvest()],
174
+ [path.join(workspaceRoot, "cookbook", "README.md"), cookbookReadme()],
175
+ [path.join(workspaceRoot, "cookbook", "run-a-first-loop.md"), cookbookFirstLoop()],
176
+ [path.join(workspaceRoot, "cookbook", "connect-a-tool.md"), cookbookConnectTool()],
177
+ [path.join(workspaceRoot, "cookbook", "review-a-half-product.md"), cookbookHalfProduct()],
178
+ [path.join(workspaceRoot, "cookbook", "bridge-to-a-second-family.md"), cookbookBridgeSecondFamily()],
179
+ [path.join(workspaceRoot, "state", "CURRENT_STATE.md"), stateCurrent()],
180
+ [path.join(workspaceRoot, "state", "TASK_LOG.md"), stateTaskLog()],
181
+ [path.join(workspaceRoot, "state", "DECISIONS.md"), stateDecisions()],
182
+ // P1 run-layer ledgers: five append-only JSONL logs seeded with one
183
+ // deterministic synthetic row each (see the generators below for why).
184
+ [path.join(workspaceRoot, "state", "tasks.jsonl"), tasksLedger()],
185
+ [path.join(workspaceRoot, "state", "evidence.jsonl"), evidenceLedger()],
186
+ [path.join(workspaceRoot, "state", "runs.jsonl"), runsLedger()],
187
+ [path.join(workspaceRoot, "state", "receipts.jsonl"), receiptsLedger()],
188
+ [path.join(workspaceRoot, "state", "learning-ledger.jsonl"), learningLedger()]
189
+ );
190
+
191
+ for (const prompt of promptDefinitions) {
192
+ entries.push([path.join(workspaceRoot, "prompts", prompt.file), renderPrompt(prompt)]);
193
+ }
194
+
195
+ for (const skill of skillDefinitions) {
196
+ entries.push([path.join(workspaceRoot, "skills", skill.id, "SKILL.md"), renderSkill(skill)]);
197
+ }
198
+
199
+ entries.push([path.join(workspaceRoot, "adapters", "SHARED_CORE_CONTRACT.md"), renderSharedCoreContract()]);
200
+ for (const adapter of adapterDefinitions) {
201
+ entries.push([path.join(workspaceRoot, "adapters", adapter.id, "ADAPTER.md"), renderAdapter(adapter)]);
202
+ }
203
+
204
+ entries.push([path.join(workspaceRoot, "examples", "README.md"), renderExamplesIndex()]);
205
+ for (const caseItem of caseDefinitions) {
206
+ const caseDir = path.join(workspaceRoot, "examples", caseItem.id);
207
+ entries.push([path.join(caseDir, "CASE.md"), renderCase(caseItem)]);
208
+ for (const artifact of caseArtifactNames(caseItem)) {
209
+ entries.push([path.join(caseDir, "artifacts", artifact), renderCaseArtifact(caseItem, artifact)]);
210
+ }
211
+ }
212
+
213
+ entries.push(
214
+ [path.join(workspaceRoot, "profile", "CANDIDATES.md"), profileCandidates()],
215
+ [path.join(workspaceRoot, "privacy", "PRIVACY.md"), renderPrivacyDoc()],
216
+ [path.join(workspaceRoot, "privacy", "COMMERCIAL_BOUNDARY.md"), renderCommercialBoundary()],
217
+ [path.join(workspaceRoot, "privacy", "REDACTION_CHECKLIST.md"), redactionChecklist()],
218
+ [path.join(workspaceRoot, "walkthroughs", "10-minute-your-task.md"), walkthrough10YourTask()],
219
+ [path.join(workspaceRoot, "walkthroughs", "10-minute.md"), walkthrough10()],
220
+ [path.join(workspaceRoot, "walkthroughs", "30-minute.md"), walkthrough30()],
221
+ [path.join(workspaceRoot, "walkthroughs", "60-minute.md"), walkthrough60()],
222
+ [path.join(workspaceRoot, "walkthroughs", "synthetic-loop-transcript.md"), syntheticTranscript()]
223
+ );
224
+
225
+ return entries;
226
+ }
227
+
228
+ export function createWorkspace(target, options = {}) {
229
+ const targetRoot = path.resolve(target);
230
+ const workspaceRoot = path.join(targetRoot, workspaceDirName);
231
+ const entries = workspaceFileEntries(workspaceRoot);
232
+
233
+ if (options.dryRun) {
234
+ // Honest preview: the plan must count EVERY file a real init would write, so
235
+ // "Files planned" == the later "Files written". A real init also drops a
236
+ // workspace .gitignore when options.gitignore is set (the user-facing `init`
237
+ // always does — see cli.js), and that file is NOT in workspaceFileEntries, so
238
+ // count it here too. Without this the preview under-reported by exactly 1 (209
239
+ // vs the real 210), making the "nothing-written preview" look untrustworthy.
240
+ const plannedFiles = entries.length + (options.gitignore ? 1 : 0);
241
+ return {
242
+ targetRoot,
243
+ workspaceRoot,
244
+ files: plannedFiles,
245
+ backupPath: null,
246
+ dryRun: true,
247
+ written: false,
248
+ existingWorkspace: existsSync(workspaceRoot)
249
+ };
250
+ }
251
+
252
+ if (existsSync(workspaceRoot) && !options.force) {
253
+ throw new Error(`${workspaceRoot} already exists. Pass --force to replace the generated workspace.`);
254
+ }
255
+
256
+ let backupPath = null;
257
+ if (existsSync(workspaceRoot) && options.force) {
258
+ backupPath = backupExistingWorkspace(targetRoot, workspaceRoot);
259
+ }
260
+
261
+ ensureDir(workspaceRoot);
262
+ for (const [file, content] of entries) {
263
+ writeText(file, content);
264
+ }
265
+
266
+ // Write the workspace .gitignore ONLY when asked (the user-facing `init` passes
267
+ // gitignore:true). The template generator (contract validator) and `demo` do NOT,
268
+ // so the committed/dogfooded .aict stays free of a nested .gitignore that would
269
+ // make npm pack drop the seed ledgers. Written before countFiles so init's
270
+ // "Files written" total includes it.
271
+ if (options.gitignore) {
272
+ writeText(path.join(workspaceRoot, ".gitignore"), workspaceGitignore());
273
+ }
274
+
275
+ return {
276
+ targetRoot,
277
+ workspaceRoot,
278
+ files: countFiles(workspaceRoot),
279
+ backupPath,
280
+ dryRun: false,
281
+ written: true
282
+ };
283
+ }
284
+
285
+ export function workspaceManifest() {
286
+ const workspaceDirs = [
287
+ "profile",
288
+ "context",
289
+ "acceptance",
290
+ "guard",
291
+ "handoff",
292
+ "harvest",
293
+ "roles",
294
+ "modes",
295
+ "mechanisms",
296
+ "prompts",
297
+ "skills",
298
+ "adapters",
299
+ "examples",
300
+ "cookbook",
301
+ "state",
302
+ "privacy"
303
+ ];
304
+ return {
305
+ name: "AI Collaboration Open System Workspace",
306
+ version: "0.1.0",
307
+ localFirst: true,
308
+ defaultNetworkUse: "none",
309
+ workspaceDirs,
310
+ layers: layerDefinitions.map((layer) => layer.id),
311
+ mechanisms: mechanismDefinitions.map((mechanism) => mechanism.id),
312
+ prompts: promptDefinitions.map((prompt) => prompt.file),
313
+ skills: skillDefinitions.map((skill) => skill.id),
314
+ adapters: adapterDefinitions.map((adapter) => adapter.id),
315
+ syntheticCases: caseDefinitions.map((caseItem) => caseItem.id),
316
+ firstExperience: "START_HERE.md -> 10/30/60 minute path -> synthetic loop -> real task adaptation"
317
+ };
318
+ }
319
+
320
+ export function countFiles(root) {
321
+ let total = 0;
322
+ for (const entry of readdirSync(root)) {
323
+ const fullPath = path.join(root, entry);
324
+ const stat = statSync(fullPath);
325
+ if (stat.isDirectory()) {
326
+ total += countFiles(fullPath);
327
+ } else {
328
+ total += 1;
329
+ }
330
+ }
331
+ return total;
332
+ }
333
+
334
+ export function profileCandidates() {
335
+ return `# Profile Candidates (buffer before the long-term profile)
336
+
337
+ This file is the holding area for **proposed** profile preferences. The 10-minute loop (\`../walkthroughs/10-minute-your-task.md\`, Step 4) can suggest a profile candidate when a stable preference shows up more than once. That suggestion is a guess, not a fact - so it lands here as \`proposed\` instead of editing your real profile, and an unreviewed guess never hardens into a standing rule future sessions obey.
338
+
339
+ This is local-first and public-safe. Keep only general, redacted preferences here - no private names, paths, customers, or internal numbers. The row below is a synthetic example, not real data.
340
+
341
+ ## State machine
342
+
343
+ Every candidate moves through exactly these four states:
344
+
345
+ | State | Meaning | Touches your long-term profile? |
346
+ | --- | --- | --- |
347
+ | \`proposed\` | The AI suggested it this loop; not yet reviewed, not trusted. | No |
348
+ | \`confirmed\` | You reviewed it and it is correct as written. | Yes - it may graduate as-is |
349
+ | \`edited\` | Correct only after you reword it; the edited line is what graduates. | Yes - the edited line graduates |
350
+ | \`dropped\` | You reviewed it and it does not belong; kept on the record so it is not re-proposed. | No |
351
+
352
+ Rule: **only \`confirmed\` and \`edited\` candidates graduate into your long-term profile, and only after you say so.** \`proposed\` and \`dropped\` never edit your profile. This is the same confirm / edit / drop discipline the harvest mechanism uses for harvested cards - nothing lands on the AI's say-so alone.
353
+
354
+ ## How to use this
355
+
356
+ 1. After a loop, a new candidate is appended below with status \`proposed\`.
357
+ 2. When you review it, change its status to \`confirmed\`, \`edited\`, or \`dropped\` (edit the wording in place if \`edited\`).
358
+ 3. Move \`confirmed\` / \`edited\` lines into your profile (\`EXAMPLE.synthetic.md\` here, or your own real profile file), then mark the row \`graduated\` in the Notes column or delete it.
359
+ 4. Leave \`dropped\` rows here so the same guess is not proposed every loop.
360
+
361
+ ## Candidates
362
+
363
+ | Candidate (one line) | Status | Source loop | Reviewed on | Notes |
364
+ | --- | --- | --- | --- | --- |
365
+ | (synthetic) Prefer direct risk calls over reassurance | proposed | synthetic-loop-01 | (not reviewed yet) | example row; replace with your own |
366
+
367
+ ## Why this buffer exists
368
+
369
+ Without it, the loop would "drop a candidate straight into the profile" - and a one-off observation from a single task could quietly become a permanent rule that every future session obeys, with no human in the loop. The buffer keeps your profile honest: it only ever grows from preferences you actually confirmed.
370
+
371
+ ## This file vs the learning ledger (two surfaces, same discipline)
372
+
373
+ There are two places a proposed preference can live, and they are partners, not rivals:
374
+
375
+ - **This file (\`CANDIDATES.md\`)** is the human view - a table you read and edit by hand while deciding what belongs in your profile. It covers profile candidates only.
376
+ - **The learning ledger (\`../state/learning-ledger.jsonl\`)** is the machine record the CLI writes - \`ai-collab learning add --type profile --content "..."\` appends a \`proposed\` row, \`learning confirm/edit/drop\` flips its state, and \`ai-collab status\` echoes back the one preference you most recently confirmed so the next task starts ahead. It also records \`harvest\` lessons, which this file does not.
377
+
378
+ Both use the exact same \`proposed / confirmed / edited / dropped\` states and the same graduation rule (only \`confirmed\`/\`edited\` graduate, only when you say so). But they are **two separate stores with no auto-sync, shared id, or dedupe between them** - so pick one place per candidate and keep it there. If you record the same preference in both and then change one, they will drift, and nothing reconciles them for you. When they do disagree, the **learning ledger is the source of truth**: it is what \`ai-collab status\` reads back and what the machine acts on; \`CANDIDATES.md\` is a human-only view that no command reads. Use whichever fits the moment - hand-edit this table, or run the \`learning\` commands - just not both for the same candidate, and let \`confirmed\`/\`edited\` lines graduate into your real profile.
379
+ `;
380
+ }
381
+
382
+ // --- Run-layer ledgers (P1) ------------------------------------------------
383
+ //
384
+ // The five append-only JSONL ledgers under state/ are the runtime substance of
385
+ // the run layer: real `ai-collab task/evidence/run/receipt` commands append to
386
+ // them at runtime with real timestamps and ids. The committed templates below,
387
+ // by contrast, must be FULLY DETERMINISTIC (no Date.now / random) so the
388
+ // generate-and-compare contract check (scripts/validate-contract.js diffs the
389
+ // committed .aict byte-for-byte against a fresh generation) keeps passing.
390
+ //
391
+ // Each ledger ships exactly one synthetic, public-safe seed row instead of being
392
+ // empty. Two reasons: (1) writeText() force-appends a trailing newline, so a
393
+ // truly empty body would collapse to a lone "\n" and be ambiguous; (2) one
394
+ // synthetic row gives the privacy scanner a real jsonl line to scan and gives
395
+ // the validator's cross-reference checks a consistent starting set. The seed rows
396
+ // are mutually consistent (evidence/run/receipt all point at task t0) so a clean
397
+ // generated workspace passes all six ledger checks with zero errors.
398
+ //
399
+ // SYNTHETIC_TS is a fixed date string (not a real timestamp) shared by every
400
+ // seed row to keep the templates deterministic.
401
+ const SYNTHETIC_TS = "2026-01-01T00:00:00.000Z";
402
+
403
+ export function tasksLedger() {
404
+ return JSON.stringify({
405
+ id: "t0",
406
+ title: "(synthetic) example task seed row - replace with your own",
407
+ status: "open",
408
+ createdAt: SYNTHETIC_TS
409
+ });
410
+ }
411
+
412
+ export function evidenceLedger() {
413
+ // Two synthetic evidence rows, both bound to task t0. e0 is a generic note;
414
+ // e1 is a kind:"cross_family_guard" row — the load-bearing evidence the seed
415
+ // receipt (c0, an L3 pass) must cite so its "binding cross-family" claim is
416
+ // actually backed (P2 evidence-gate). Without e1 the seed L3 pass would be a
417
+ // self-asserted level the validator now rejects. P2 structure gate goes
418
+ // further: a cross_family_guard row must NAME who/which family reviewed it
419
+ // (at least one of reviewer / family / ref), so e1 carries a synthetic
420
+ // `reviewer` + `family` — otherwise the seed row would be an empty shell the
421
+ // new structure check (and the L3-pass gate) rejects. Field order matches a
422
+ // live `evidence add` record so the on-disk shape is identical. One row/line.
423
+ return [
424
+ JSON.stringify({
425
+ id: "e0",
426
+ taskId: "t0",
427
+ kind: "note",
428
+ summary: "(synthetic) example evidence seed row bound to task t0",
429
+ createdAt: SYNTHETIC_TS
430
+ }),
431
+ JSON.stringify({
432
+ id: "e1",
433
+ taskId: "t0",
434
+ kind: "cross_family_guard",
435
+ summary: "(synthetic) cross-family guard review seed row bound to task t0",
436
+ reviewer: "(synthetic) example reviewer",
437
+ family: "(synthetic) other-model-family",
438
+ createdAt: SYNTHETIC_TS
439
+ })
440
+ ].join("\n");
441
+ }
442
+
443
+ export function runsLedger() {
444
+ return JSON.stringify({
445
+ id: "r0",
446
+ taskId: "t0",
447
+ command: "echo synthetic-seed",
448
+ startedAt: SYNTHETIC_TS,
449
+ finishedAt: SYNTHETIC_TS,
450
+ exitCode: 0,
451
+ status: "finished"
452
+ });
453
+ }
454
+
455
+ export function receiptsLedger() {
456
+ // The seed receipt must satisfy the SAME rules a live `receipt create` would.
457
+ // A1: the guardLevel is COMPUTED from the review method + evidence, not declared
458
+ // — so the seed records reviewMode "cross_family" and cites BOTH e0 (note) and
459
+ // e1 (kind:cross_family_guard, which carries its required family attribution).
460
+ // computeGuardLevel({reviewMode:"cross_family", hasCrossFamilyGuardEvidence:true,
461
+ // hasAnyEvidence:true}) yields level "L3" with familyUnverified:true (the family
462
+ // is self-declared and the tool cannot verify it; only a rerun reconciled to a
463
+ // recorded run, on top of this cross-family review, would reach the strongest
464
+ // local-trust level, L4). Under those inputs guardLevelVerdictError("L3",
465
+ // "pass", rerun=false, crossFamily=true) is null and receiptStatusFor("pass",
466
+ // [e0,e1], false) computes "accepted", so the seed is self-consistent with every
467
+ // runtime rule: check 8c sees stored L3 == computed L3 (not over-claimed) and
468
+ // check 8d sees the required familyUnverified:true marker present. Field order
469
+ // matches the live `receipt create` record (id, taskId, verdict, guardLevel,
470
+ // reviewMode, evidenceIds, familyUnverified, status, createdAt) so the on-disk
471
+ // shape is identical.
472
+ return JSON.stringify({
473
+ id: "c0",
474
+ taskId: "t0",
475
+ verdict: "pass",
476
+ guardLevel: "L3",
477
+ reviewMode: "cross_family",
478
+ evidenceIds: ["e0", "e1"],
479
+ familyUnverified: true,
480
+ status: "accepted",
481
+ createdAt: SYNTHETIC_TS
482
+ });
483
+ }
484
+
485
+ export function learningLedger() {
486
+ return JSON.stringify({
487
+ id: "l0",
488
+ taskId: "t0",
489
+ type: "harvest",
490
+ content: "(synthetic) example learning seed row - written by the P4 harvest flow",
491
+ status: "proposed",
492
+ createdAt: SYNTHETIC_TS
493
+ });
494
+ }
495
+
496
+ export function redactionChecklist() {
497
+ return `# Redaction Checklist
498
+
499
+ Use this before publishing an example or sharing a workspace.
500
+
501
+ - [ ] The case is synthetic or public-safe.
502
+ - [ ] No actual client, employer, or account names appear.
503
+ - [ ] No local machine paths appear.
504
+ - [ ] No raw private conversations appear.
505
+ - [ ] No private tool-routing details or hooks appear.
506
+ - [ ] No tokens, keys, cookies, credentials, or session IDs appear.
507
+ - [ ] No private knowledge-base source material appears.
508
+ - [ ] The example can stand alone without revealing the owner's private system.
509
+ `;
510
+ }
511
+
512
+ export function rolesReadme() {
513
+ return `# Roles
514
+
515
+ Roles keep the AI Collaboration Open System human-centered. They define responsibility, not hidden authority.
516
+
517
+ Each role card below is a responsibility matrix, not a vibe. It states six things so two different tools (or two different sessions) read the same boundary: what the role CAN do, what it CANNOT do, what it takes in, what it produces, who it escalates to when something exceeds its authority, and one synthetic overreach example showing what breaks when the boundary is crossed.
518
+
519
+ ## Public roles
520
+
521
+ - Owner / controller: decides goals and acceptance, and holds final judgment.
522
+ - Executor: produces the artifact inside the agreed boundary.
523
+ - System guardian: challenges risk and evidence before output is trusted.
524
+ - Scout: gathers options and external facts before a decision.
525
+ - Supervisor: translates the AI's state into plain language and watches the main line and the wording, distinct from the guardian who watches the facts.
526
+ - Harvester: extracts reusable learning after a loop.
527
+
528
+ ## Why a matrix and not just "does / does not"
529
+
530
+ A two-line "does / does not" tells a tool the gist but not the seams: where work enters, where it leaves, and who catches it when it exceeds the role. The missing seams are exactly where collaboration fails — an executor quietly becomes a rule-changer, a guardian quietly becomes an editor, a scout quietly becomes a decider, a supervisor quietly becomes a second guardian. Naming inputs, outputs, the escalation target, and a concrete overreach example closes those seams.
531
+ `;
532
+ }
533
+
534
+ export function roleOwnerController() {
535
+ return `# Owner / Controller
536
+
537
+ ## Purpose
538
+
539
+ Keep human judgment at the center of the workflow. The controller is the top of the chain: it sets direction and owns the final call, so the separation of decision from production stays intact.
540
+
541
+ ## Can do
542
+
543
+ - Define the goal, the scope, and the acceptance criteria for a piece of work.
544
+ - Issue instructions and choose between options the executor or scout brings back.
545
+ - Accept or reject delivered work, and decide when residual risk is acceptable.
546
+ - Make the final call and close the loop.
547
+
548
+ ## Cannot do
549
+
550
+ - Approve its own judgment by pretending agreement is independent review (it must not be both author and reviewer of the same decision).
551
+ - Make the guardian's call for it, or treat its own opinion as a guard pass.
552
+ - Step in and personally do the heavy production work that should have been delegated, because then no independent reviewer is left to check it.
553
+
554
+ ## Inputs
555
+
556
+ - The task or problem to be solved.
557
+ - Proposed plans, options, and trade-offs from the executor or scout.
558
+ - Returned artifacts, guard verdicts, and harvest cards awaiting confirmation.
559
+
560
+ ## Outputs
561
+
562
+ - Clear instructions and a defined boundary for each piece of work.
563
+ - Acceptance or rejection decisions with the reason.
564
+ - The final, recorded decision that lets the loop close.
565
+
566
+ ## Escalates to
567
+
568
+ - No one above it — the controller is the top of the responsibility chain. When it lacks facts it tasks a scout; when it needs an independent check it tasks a guardian; but the decision itself does not get handed upward.
569
+
570
+ ## Overreach example (synthetic)
571
+
572
+ A controller decides a feature is simple, sits down, and writes the implementation itself instead of delegating it. Because the controller is now the author, there is no independent party left to review whether the code actually meets acceptance — the controller would be grading its own homework. The separation that the whole system relies on collapses, and a defect ships unnoticed because the only person who could have caught it is the one who wrote it.
573
+ `;
574
+ }
575
+
576
+ export function roleExecutor() {
577
+ return `# Executor
578
+
579
+ ## Purpose
580
+
581
+ Produce the requested artifact inside the context and acceptance boundary, and prove it with evidence rather than claims.
582
+
583
+ ## Can do
584
+
585
+ - Implement the task exactly as instructed, working from the provided files.
586
+ - Change the agreed artifacts, save state, and record what was done.
587
+ - Self-verify the work and report changed files or sections with verification evidence.
588
+
589
+ ## Cannot do
590
+
591
+ - Make the controller's decisions or accept its own work as done.
592
+ - Silently expand scope beyond the instructed task.
593
+ - Cross a core boundary (governance rules, security-sensitive areas, anything outside the task) without stopping to ask first.
594
+
595
+ ## Inputs
596
+
597
+ - A task packet: the goal, the boundary, the relevant files, known constraints, and the acceptance criteria.
598
+
599
+ ## Outputs
600
+
601
+ - The requested artifact.
602
+ - A three-part report: what changed, the actual verification evidence, and what remains unverified.
603
+
604
+ ## Escalates to
605
+
606
+ - The controller — whenever the task is ambiguous, the scope needs to grow, or a core boundary is in the way, the executor stops and hands the decision back up rather than deciding for itself.
607
+
608
+ ## Overreach example (synthetic)
609
+
610
+ While fixing one small bug, an executor notices a shared rule it thinks is wrong and edits it on the spot without asking. A scoped one-line fix has now quietly become a change to the rules everyone else relies on — a change no one reviewed and no one approved. The next session inherits an altered rule with no decision behind it, and tracing why behavior changed becomes a hunt because the change was never surfaced as a decision.
611
+ `;
612
+ }
613
+
614
+ export function roleSystemGuardian() {
615
+ return `# System Guardian
616
+
617
+ ## Purpose
618
+
619
+ Challenge output before it becomes trusted state. The guardian is a referee, not a player: it finds problems and points to evidence, but it does not take over the work.
620
+
621
+ ## Can do
622
+
623
+ - Independently review the artifact for acceptance fit, privacy, evidence quality, and handoff readiness.
624
+ - Surface blind spots and name required fixes, leading with findings.
625
+ - Issue one of the four standard verdicts (pass / reject / insufficient_evidence / pass_with_risk) with the guard level (L0-L4) for the evidence seen, and point every finding to a specific line, section, or missing piece of evidence. A plain pass needs L3+ (a cross-family evidence pack); a pass_with_risk needs an explicit owner sign-off before it counts as accepted.
626
+
627
+ ## Cannot do
628
+
629
+ - Only find — it does not give orders, and it does not execute.
630
+ - Rewrite or fix the artifact itself by default (fixing what it judges makes it both referee and player).
631
+ - Make the decision for anyone; a concern is not an approval, and an approval is not the controller's acceptance.
632
+
633
+ ## Inputs
634
+
635
+ - The object under review: the artifact, plus its acceptance card, context boundary, and the verification evidence behind any completion claim.
636
+
637
+ ## Outputs
638
+
639
+ - A verdict and a findings list, each finding tied to concrete evidence.
640
+ - Required fixes and named residual risk, handed back for a decision — not applied directly.
641
+
642
+ ## Escalates to
643
+
644
+ - The controller — the guardian reports findings and a verdict, then the controller decides what to fix, what to accept as residual risk, and whether to close.
645
+
646
+ ## Overreach example (synthetic)
647
+
648
+ A guardian reviewing an artifact spots a flaw and, instead of reporting it, just edits the artifact to fix it. Now the same party both judged the work and changed it, so its independence is gone: no one is left to check whether the "fix" is actually correct or whether it quietly broke something else. The verdict can no longer be trusted, because the referee walked onto the field and started playing.
649
+ `;
650
+ }
651
+
652
+ export function roleScout() {
653
+ return `# Scout
654
+
655
+ ## Purpose
656
+
657
+ Collect options and decision-changing evidence before the controller chooses. The scout gathers facts; it does not judge them.
658
+
659
+ ## Can do
660
+
661
+ - Gather external facts, candidate paths, and industry comparisons relevant to a pending decision.
662
+ - List evidence gaps and label how time-sensitive each finding is.
663
+ - Bring back sourced material so the controller can decide on solid ground.
664
+
665
+ ## Cannot do
666
+
667
+ - Interpret, judge, or rule on the evidence it gathers.
668
+ - Recommend a path or lean toward an option ("you should pick A").
669
+ - Turn exploration into implementation, or decide anything itself.
670
+
671
+ ## Inputs
672
+
673
+ - The specific question or unknown to investigate, framed by the controller.
674
+
675
+ ## Outputs
676
+
677
+ - A fact card: candidate options and findings, each with its source and a freshness label, and no verdict attached.
678
+
679
+ ## Escalates to
680
+
681
+ - The controller — the scout delivers sourced facts and hands the synthesis and the decision upward, keeping fact-gathering separate from judgment.
682
+
683
+ ## Overreach example (synthetic)
684
+
685
+ Asked only to gather options, a scout instead returns "you should choose A." By folding a recommendation into the fact-gathering, it has mixed evidence with judgment — and now the controller can no longer reason from clean facts, because the conclusion is already baked in. The independence of the later decision is contaminated before it even starts, and the scout has quietly made a call that was never its to make.
686
+ `;
687
+ }
688
+
689
+ export function roleHarvester() {
690
+ return `# Harvester
691
+
692
+ ## Purpose
693
+
694
+ Extract reusable learning after a loop. The harvester proposes; it does not file to the source of truth on its own.
695
+
696
+ ## Can do
697
+
698
+ - Sweep a conversation or finished loop and lift the reusable bits into harvest cards.
699
+ - Redact private material into a general, public-safe form before anything is proposed.
700
+ - Draft candidate prompts, decisions, lessons, and rule suggestions for confirmation.
701
+
702
+ ## Cannot do
703
+
704
+ - Write directly into the knowledge-base source of truth.
705
+ - Accept its own cards as final, or skip the human confirmation step.
706
+ - Generalize a single incident into a permanent rule without evidence and without sign-off.
707
+
708
+ ## Inputs
709
+
710
+ - The conversation, loop, or raw material to harvest from.
711
+
712
+ ## Outputs
713
+
714
+ - Harvest cards (one item per card) in a public-safe form, presented as candidates awaiting confirmation — not filed yet.
715
+
716
+ ## Escalates to
717
+
718
+ - The owner / controller — nothing lands in the knowledge base until the owner confirms each card; the harvester stages, the owner files.
719
+
720
+ ## Overreach example (synthetic)
721
+
722
+ A harvester writes a card and, without waiting for confirmation, files it straight into the knowledge base. An unverified "lesson" has now been frozen into a standing rule that future loops will obey — except no one checked whether it was actually true. A one-off observation becomes durable doctrine by accident, and later work is silently shaped by a rule that was never approved and may be wrong.
723
+ `;
724
+ }
725
+
726
+ export function roleSupervisor() {
727
+ return `# Supervisor
728
+
729
+ ## Purpose
730
+
731
+ Lower the human's cost of watching the work, without taking the wheel. The supervisor is a state translator: it turns what the AI is doing into plain language and watches whether the work is still on track. It does not steer direction and it does not check facts line by line — that is the guardian's job. The split is deliberate: the guardian watches the facts (is this claim backed, does this code work, did scope drift); the supervisor watches the main line and the wording (are we still going where we meant to, and is the AI being honest about how done it is).
732
+
733
+ ## Can do
734
+
735
+ - Translate the AI's current state into plain language for the human: where the main line is, what just happened, what the next step is.
736
+ - Watch three things on every pass: (1) is the main line drifting — is the work quietly chasing a side-quest while the real goal stalls; (2) is "done-pending-verification" being passed off as "accepted" — is unproven work being described as finished; (3) is a decision being punted back to the human — is the AI bouncing a choice it should have made itself.
737
+ - Issue one of three plain verdicts: SEND (it can go forward), SEND WITH A CORRECTION (a small fix rides along, no need to redo), or STOP AND FIX FIRST (something on the main line is wrong enough to halt).
738
+
739
+ ## Cannot do
740
+
741
+ - Steer the direction or make the call — it flags, it does not decide, and "looks on track to me" is not the human's approval.
742
+ - Do the guardian's job: it does not write a formal verdict on facts, does not hunt code-level defects, does not rule on evidence quality. When it strays into line-by-line fact-checking it has stopped being a supervisor and become a second guardian.
743
+ - Open a side issue into a new main line, or let the human get pulled into doing a judgment the AI should have made.
744
+
745
+ ## Inputs
746
+
747
+ - The AI's current state to translate (a status update, a handoff packet, a plan, a progress report) and the main line it is supposed to be serving, so drift can be measured across steps, not just within one.
748
+
749
+ ## Outputs
750
+
751
+ - A short plain-language status read (where the main line is, the current step, the next action) plus the three-question check, ending in one of the three verdicts: send / send-with-a-correction / stop-and-fix-first.
752
+ - For a send-with-a-correction, the exact small fix the next step should carry; for a stop-and-fix-first, what specifically must be repaired before the work moves on.
753
+
754
+ ## Escalates to
755
+
756
+ - The owner / controller — the supervisor reports its plain-language read and its verdict, then the human decides. For a true facts-and-evidence judgment it hands off to the guardian rather than ruling on facts itself.
757
+
758
+ ## Overreach example (synthetic)
759
+
760
+ A supervisor reviewing a status update stops translating and starts grading the code: it digs into a function, declares the implementation correct, and issues a pass on the technical work. But checking facts and clearing evidence is the guardian's role, and now the same pass mixes "the main line looks on track" with "the code is verified" — two different judgments the human can no longer tell apart. The supervisor has quietly become a second guardian, the real fact-check never gets an independent pass, and a plain-language safety net the human relied on to stay cheap has turned into one more heavyweight reviewer.
761
+ `;
762
+ }
763
+
764
+ export function modesReadme() {
765
+ return `# Modes
766
+
767
+ Modes state what kind of work is happening now. A mode is a boundary, not a personality.
768
+
769
+ Use one mode at a time: shape, execute, review, handoff, or harvest.
770
+
771
+ Each mode card below is a full spec, not a one-liner. It states six things so a tool always knows the edges of the current mode: the entry condition that lets you start, the actions allowed, the actions forbidden, the output format, the exit condition that ends the mode, and how it hands off to the other modes. The forbidden line and the handoff line are what keep modes from blurring into one another.
772
+
773
+ ## The loop between modes
774
+
775
+ Shape comes first when the request is still fuzzy: it turns a rough intent into a signable thin contract before anything is built, so execute starts from a boundary instead of a guess. Then the core loop runs. Execute produces, then review challenges what execute produced; a rejection sends it back to execute, a pass moves it toward handoff or close. Handoff carries state across a session or tool boundary so the receiver re-enters execute cleanly. Harvest runs at a seam or close to lift reusable learning, then returns to whatever mode was active. Naming each entry and exit explicitly is what stops "shape" from sliding into design, "review" from quietly editing, or "execute" from drifting past its task.
776
+ `;
777
+ }
778
+
779
+ export function modeExecute() {
780
+ return `# Execute Mode
781
+
782
+ Build the agreed artifact, and only that.
783
+
784
+ ## Entry condition
785
+
786
+ There is a clearly defined task with execution authority granted: a goal, a boundary, and acceptance criteria are all in place.
787
+
788
+ ## Allowed actions
789
+
790
+ - Create or edit the agreed artifact.
791
+ - Change the in-scope files or sections and save state.
792
+ - Self-verify the work and capture the evidence.
793
+
794
+ ## Forbidden actions
795
+
796
+ - Doing work outside the stated task or boundary.
797
+ - Crossing a core boundary (rules, security-sensitive areas, anything out of scope) without stopping.
798
+ - Declaring the work done without running the checks.
799
+
800
+ ## Output format
801
+
802
+ The artifact, the list of changed files or sections, the verification evidence, and an explicit note of what is still unverified.
803
+
804
+ ## Exit condition
805
+
806
+ The task is wrapped up and has passed acceptance, or it is blocked and must be handed off.
807
+
808
+ ## Inter-mode handoff
809
+
810
+ When the artifact is done, move to review so an independent pass can challenge it before it is trusted; if work must cross a session or tool boundary first, move to handoff and let the receiver re-enter execute.
811
+ `;
812
+ }
813
+
814
+ export function modeReview() {
815
+ return `# Review Mode
816
+
817
+ Inspect and challenge the artifact — without changing it.
818
+
819
+ ## Entry condition
820
+
821
+ There is a produced artifact that needs to be checked before anyone trusts it.
822
+
823
+ ## Allowed actions
824
+
825
+ - Inspect the artifact against context, acceptance, and evidence.
826
+ - Challenge claims, surface blind spots, and point each finding to a specific line, section, or missing piece of evidence.
827
+
828
+ ## Forbidden actions
829
+
830
+ - Editing or fixing the artifact under review. Review and repair stay separate, so the reviewer never becomes the author of what it judges.
831
+
832
+ ## Output format
833
+
834
+ One of the four standard verdicts (pass / reject / insufficient_evidence / pass_with_risk) plus the guard level (L0-L4) for the evidence seen, the findings with severity, the required fixes, and the named residual risk. A plain pass requires L3+ (a cross-family evidence pack); a pass_with_risk is not accepted until the owner explicitly signs off on the residual risk.
835
+
836
+ ## Exit condition
837
+
838
+ A verdict has been issued.
839
+
840
+ ## Inter-mode handoff
841
+
842
+ On reject, hand the required fixes back to execute for repair; on pass, move to handoff or to close. Review never applies the fix itself — it returns the artifact to execute for that.
843
+ `;
844
+ }
845
+
846
+ export function modeHandoff() {
847
+ return `# Handoff Mode
848
+
849
+ Compress state so the next session or tool can pick up exactly where this one stopped.
850
+
851
+ ## Entry condition
852
+
853
+ Work is about to cross a boundary: a session is ending, a different tool is taking over, or a long task has reached a natural seam.
854
+
855
+ ## Allowed actions
856
+
857
+ - Compress the current state into a structured handoff packet.
858
+ - Seal the baseline (the exact point being handed off) so the receiver starts from a known state.
859
+
860
+ ## Forbidden actions
861
+
862
+ - Dropping context the receiver needs.
863
+ - Omitting the exact first action the next session should take.
864
+
865
+ ## Output format
866
+
867
+ A handoff packet: what is done, what is pending, what is blocked, what is unverified, the sealed baseline, and the exact next step.
868
+
869
+ ## Exit condition
870
+
871
+ The receiver confirms they can pick up from the packet alone, without re-reading the whole history.
872
+
873
+ ## Inter-mode handoff
874
+
875
+ The receiver reads the packet and re-enters execute on the stated first action, continuing the loop from the sealed baseline rather than from zero.
876
+ `;
877
+ }
878
+
879
+ export function modeHarvest() {
880
+ return `# Harvest Mode
881
+
882
+ Lift reusable learning out of finished work — into staged, public-safe cards.
883
+
884
+ ## Entry condition
885
+
886
+ Harvest is triggered, or a phase is closing, and there is reusable value worth saving before it is lost.
887
+
888
+ ## Allowed actions
889
+
890
+ - Sweep the conversation or finished loop for reusable bits.
891
+ - Draft harvest cards (one item per card) for decisions, lessons, methods, and stable preferences.
892
+ - Redact private material into a general, public-safe form.
893
+
894
+ ## Forbidden actions
895
+
896
+ - Filing anything into the knowledge base without redacting it first.
897
+ - Deciding on the user's behalf whether a card lands; that confirmation belongs to the user.
898
+
899
+ ## Output format
900
+
901
+ Harvest cards in a public-safe form, presented as candidates awaiting confirmation.
902
+
903
+ ## Exit condition
904
+
905
+ The user has confirmed which cards land in the knowledge base.
906
+
907
+ ## Inter-mode handoff
908
+
909
+ Harvest runs at a seam without taking over the work; once cards are confirmed and filed, it returns control to whatever mode was active before it (typically execute or a close).
910
+ `;
911
+ }
912
+
913
+ export function modeShape() {
914
+ return `# Shape Mode
915
+
916
+ Turn a fuzzy idea into a signable thin contract — before any solution is designed or built.
917
+
918
+ ## Entry condition
919
+
920
+ The person has a rough intent but nothing crisp enough to act on yet: "I want to improve X", "this feels off", "I have an idea". It sits between exploring the current state and designing a solution; you enter it instead of jumping straight to a plan from a vague request.
921
+
922
+ ## Allowed actions
923
+
924
+ - Pull the intent into a few anchors (the situation, the wanted result, the result that would be unacceptable, what this round must protect) and name the two or three ambiguities that actually matter.
925
+ - Offer choices instead of asking for a blank-page description: pose comparison questions ("more like A or like B?"), and proactively recommend one to three reference points (an existing product or feature) so the person reacts to something concrete instead of recalling from nothing.
926
+ - Lead with weaknesses before any direction: name the two or three ways the current instinct most easily goes wrong, including what an automated step would amplify and what would be hardest to undo.
927
+ - Rewrite the problem statement when the framing itself is the trap, instead of politely optimizing along the original wording.
928
+ - Give two or three candidate directions in experience terms (what the person will feel, and the cost), each with its single biggest failure point, and let them pick one.
929
+ - Run a preview gate: before the person signs off, show a perceivable preview matched to the work — a mock or wireframe for UI, a 1-2-3 journey walk for a flow, or sample request/response and failure-case examples for backend — so "yes, that's the feeling" is grounded in something they can see, not an abstract description.
930
+
931
+ ## Forbidden actions
932
+
933
+ - Discussing implementation, writing code, or proposing a technical solution before the contract is confirmed and the preview gate is passed.
934
+ - Asking the person to describe implementation detail (they are not here for that), or handing back "please describe your requirements in detail" instead of offering choices.
935
+ - Giving only one direction with no choice, pushing the job of "say what you want" back onto the person, or skipping the preview gate straight into design or build.
936
+
937
+ ## Output format
938
+
939
+ A thin contract the person can sign: the success definition in their words, the failure definition and non-goals, the most likely wrong assumption, a short negative list (the two or three things most likely to be misread, missed, or amplified), and the confirmed reference points — followed by a matched preview the person has reacted to.
940
+
941
+ ## Exit condition
942
+
943
+ The person confirms the thin contract AND the preview gate passes ("yes, that is the feeling"). Before exit, three adversarial questions must be answered: if this is pushed forward as currently understood, what is most likely done wrong; which weakness, left unfixed now, gets amplified by later automation; and is the real fix to the problem statement rather than the answer. Unanswered, the mode does not exit.
944
+
945
+ ## Inter-mode handoff
946
+
947
+ On a confirmed contract and a passed preview, move to design for a technical plan (or straight to execute for a simple task), carrying the contract as the boundary the build is judged against. If the person rejects the direction, return to the choice step and re-pose it rather than optimizing the dead direction. A discovered framing error is a problem-statement rewrite, not a silent patch.
948
+ `;
949
+ }
950
+
951
+ // One-line "what it does" per mechanism, keyed by id. Kept short on purpose so
952
+ // the overview reads as a map, not a wall of text; each mechanism's own README
953
+ // carries the full purpose/trigger/process. If a mechanism is added to the
954
+ // catalog without an entry here, mechanismsReadme throws so the gap is caught
955
+ // at generate time instead of shipping a silently incomplete overview.
956
+ const MECHANISM_ONE_LINERS = {
957
+ "dual-guard":
958
+ "Trust an artifact only after a guard from a different model family (binding) plus an optional same-family guard (reference) have pressed on it, so a fluent answer is not believed just because it reads well.",
959
+ "scout-review-controller":
960
+ "Separate exploration from the decision: a SCOUT gathers options and evidence without choosing, so the controller decides on a real spread instead of the first path that came up.",
961
+ "one-click-dispatch":
962
+ "Turn a messy task into one self-contained work packet another AI tool can run without inheriting the whole chat.",
963
+ "task-splitting":
964
+ "Run a five-question pre-dispatch check before handing work to another AI, and split by topic or deliverable so a too-large prompt does not stall or collapse midway.",
965
+ "anti-drift-partner":
966
+ "Run a long thinking conversation with an AI that pushes back instead of agreeing — it surfaces your blind spots, probes at most two rounds, then commits to a judgment, so the talk never drifts into fluent confirmation.",
967
+ "blind-spot-scan":
968
+ "Borrow an outside viewpoint (customer, competitor, expert, opponent, your-future-self), re-read the decision through that seat, and get back the concrete dead angles you cannot see from your own plus the one counter-question most worth sitting with — and the borrowed viewpoint must genuinely challenge, never flatter from a costume.",
969
+ "root-cause-brake":
970
+ "When the same artifact is rejected twice in a row, trip a brake: no more patches until you answer four diagnostic questions, name the real cause, and rebuild the next version around it.",
971
+ "half-product-review":
972
+ "Block confident \"done\" when there are docs, demos, and architecture but no runnable first experience a stranger can actually complete.",
973
+ "handoff-abc":
974
+ "Externalize the current state into a structured packet so any session or tool resumes from where the work really is, instead of re-explaining the background each time.",
975
+ "harvest-and-erc":
976
+ "Capture the reusable lesson, prompt fragment, or rule candidate from finished work before it leaks away, including across multiple sessions.",
977
+ "do-not-handle-yet":
978
+ "Protect the main line by explicitly parking tempting but lower-priority work, on the record, instead of silently dropping or drifting into it.",
979
+ "plain-language-first-screen":
980
+ "Make the first screen explain the result, the path, and the proof before any concept or framework name.",
981
+ "honest-calibration":
982
+ "Lead every ask for a rating or recommendation with a short candor prefix (be candid, do not inflate, do not over-hedge) that offsets the model's pull to please and re-aims the baseline from make-you-happy to tell-the-truth.",
983
+ "feedback-absorption-ledger":
984
+ "When merging feedback from several sources, score each item across five tiers (absorb fully / refine / add a boundary / partly absorb / reject with a reason) so you keep independent judgment instead of rubber-stamping — the absorb/reject ratio is an outcome, not a target.",
985
+ "collaboration-coach":
986
+ "Proactively remind the user of the matching collaboration step at key moments, restrained by default.",
987
+ "single-tool-guard":
988
+ "The default starting guard for one-model-family users (most solo users) — new conversation + adversarial prompt turns a trusted \"looks fine\" into an evidence-backed, re-checkable result; honestly capped at L2 and explicitly not a passed cross-family gate, which is the upgrade ceiling."
989
+ };
990
+
991
+ export function mechanismsReadme() {
992
+ const lines = mechanismDefinitions.map((mechanism) => {
993
+ const oneLiner = MECHANISM_ONE_LINERS[mechanism.id];
994
+ if (!oneLiner) {
995
+ throw new Error(`mechanismsReadme: missing one-liner for mechanism "${mechanism.id}"`);
996
+ }
997
+ return `- \`${mechanism.id}/\` — **${mechanism.title}.** ${oneLiner}`;
998
+ });
999
+ return `# Mechanisms
1000
+
1001
+ The reusable collaboration moves of the AI Collaboration Open System. Each one is a local-first, public-safe Markdown package you can copy-paste into any AI tool. A mechanism is a self-contained directory with five files:
1002
+
1003
+ - \`README.md\` — what it is, when to use it, and when not to.
1004
+ - \`PROMPT.md\` — the copy-paste prompt that runs it.
1005
+ - \`TEMPLATE.md\` — a blank you fill in for your own task.
1006
+ - \`EXAMPLE.synthetic.md\` — a worked synthetic example (no private data).
1007
+ - \`FAILURE_MODES.md\` — how it goes wrong and how to keep it honest.
1008
+
1009
+ These are the standing moves; the six layers (profile, context, acceptance, guard, handoff, harvest) are the spine they plug into, and the \`cookbook/\` recipes show how to run them on a real task.
1010
+
1011
+ ## The ${mechanismDefinitions.length} mechanisms
1012
+
1013
+ ${lines.join("\n")}
1014
+
1015
+ ## How to use one
1016
+
1017
+ Open the mechanism's \`README.md\` to confirm it fits, copy the body of its \`PROMPT.md\` into your AI tool, and paste your own material where the \`TEMPLATE.md\` marks it. Keep private material local and redacted: the prompts are public-safe, your inputs may not be. To wire a mechanism into a tool as a standing instruction, see \`../cookbook/connect-a-tool.md\`.
1018
+ `;
1019
+ }
1020
+
1021
+ export function cookbookReadme() {
1022
+ return `# Cookbook
1023
+
1024
+ Do-it recipes for running the AI Collaboration Open System. Each recipe is a full configuration: when to use it, prerequisites, steps, a copy-paste block you can actually run, expected output, failure handling, a privacy note, and a next step. The walkthroughs are operation cards ("press these in this order"); these recipes explain why each step exists and how to adapt it to your own task.
1025
+
1026
+ - \`run-a-first-loop.md\`: run one complete collaboration loop end to end on your own real task; the prepared synthetic case is an optional "watch the flow first" track.
1027
+ - \`connect-a-tool.md\`: wire any AI tool (general chat AI, coding assistant, command-line AI) to the shared contract by copying files into its instruction slot.
1028
+ - \`review-a-half-product.md\`: audit a "done but maybe not" deliverable by forcing an independent AI to cite evidence and find the gap.
1029
+ - \`bridge-to-a-second-family.md\`: stand up the second, different-model-family AI the cross-family guard needs, and route a review across it — manual copy-paste (works anywhere) or an optional auto bridge.
1030
+ `;
1031
+ }
1032
+
1033
+ export function cookbookFirstLoop() {
1034
+ return `# Run a First Loop
1035
+
1036
+ A do-it recipe: run one complete AI collaboration loop end to end on your own real (lightly redacted) task, and watch a guard catch a false completion claim that a single agent would have accepted. This is the recipe; \`../walkthroughs/10-minute-your-task.md\` is the operation card for that real-task run. The walkthrough says "press these buttons in this order"; this recipe says "here is why each step exists, and here is how to adapt it to whatever you are actually working on." If you would rather watch the loop on a prepared example before pointing it at your own work, the synthetic case is the optional "look first" track — see the box below.
1037
+
1038
+ > Optional "look first" track: if your task feels too sensitive to paste right now, or you just want to see the shape of the loop first, run it once on the prepared synthetic case using \`../walkthroughs/10-minute.md\` (the demo preview), then come back and run it on your own task with the copy-paste block below.
1039
+
1040
+ ## When to use this
1041
+
1042
+ - Your first time through the system, and you want to feel the whole loop on work you actually care about.
1043
+ - You can describe the loop but have never watched a guard actually reject a fluent "done".
1044
+ - You are about to start a real task and want a tested prompt sequence to adapt, not a blank page.
1045
+
1046
+ Skip it if you have already run the loop and just need the fast operation card; go straight to \`../walkthroughs/10-minute-your-task.md\`.
1047
+
1048
+ ## Prerequisites
1049
+
1050
+ - This workspace exists (you are reading a file inside it).
1051
+ - One real task of your own you can describe in a few sentences (lightly redacted: swap private names, paths, and numbers for placeholders). No private file needs to be uploaded — a redacted description is enough.
1052
+ - One AI tool you can paste into (any general chat AI, coding assistant, or command-line AI). One tool is enough for a first pass; a second tool of a different model family makes the guard step stronger but is optional.
1053
+ - Five to ten minutes. Nothing is uploaded; you only read and copy local files plus your own redacted description.
1054
+
1055
+ ## Steps
1056
+
1057
+ Run these five moves on your own task. (Each move maps to one shipped artifact in \`../examples/ai-coding-long-task/artifacts/\` — open the matching file there any time you want to see the move done once on the prepared synthetic case.)
1058
+
1059
+ 1. Set context. Describe your task to the AI and have it write a context package: the goal in one sentence, what is in scope, and explicit non-goals. This turns a tangled request into a boundary. Reference: \`context-package.md\`.
1060
+ 2. Set acceptance. Turn that context into an acceptance card — a short numbered list of checkable "done" criteria a reviewer can verify, not a vibe. This is the step people skip and then regret. Reference: \`acceptance-card.md\`.
1061
+ 3. Produce the first output. Have the AI do only the accepted slice and report what changed, what it ran, what failed, and what it did NOT verify. Read its completion claim against the actual code or evidence — this is where a fluent "done" usually overstates the work. References: \`execution-prompt.md\`, \`first-ai-output.md\`.
1062
+ 4. Run the guard. Paste that output plus \`../guard/PROMPT.md\` into a second AI tool (or the same one in a fresh turn) and ask it to review against the acceptance card. A good guard returns a cause-and-effect chain tied to specific spots and a reject, not a one-line "looks good". Reference: \`guard-review.md\`.
1063
+ 5. Revise and close. Fix the named blocker and re-show it with evidence, then write a handoff (done / pending / unverified) and harvest one reusable lesson with all private specifics removed. References: \`revised-output.md\`, \`handoff-note.md\`, \`harvest-seed.md\`.
1064
+
1065
+ The copy-paste block below is the prompt sequence that drives exactly these five moves on your task.
1066
+
1067
+ ## Copy-paste block
1068
+
1069
+ Paste these in order into your AI tool, filling the bracketed parts with your own redacted task. This is the same loop as the steps above.
1070
+
1071
+ \`\`\`text
1072
+ [1 / CONTEXT]
1073
+ Help me write a context package for this task. Capture: the goal in one sentence, what is in scope, and explicit non-goals. Keep it local-first; I will not upload private material.
1074
+ Task (redacted): [describe your task; replace any private name, path, or number with a placeholder]
1075
+
1076
+ [2 / ACCEPTANCE]
1077
+ Now turn that context into an acceptance card: a short numbered list of checkable criteria that define "done". Each criterion must be something a reviewer can verify, not a vibe. Mark anything explicitly out of scope.
1078
+
1079
+ [3 / EXECUTION]
1080
+ Do only the work the acceptance card describes. Do not expand scope. When done, report: what changed, what you ran to check it, what failed, and what you did NOT verify.
1081
+
1082
+ [4 / GUARD - run this in a SECOND tool, ideally a different model family]
1083
+ Review the output below against the context and acceptance card. Point to concrete defects, missing evidence, privacy leaks, unsupported claims, and scope drift, each tied to a specific spot. Return findings by severity and a pass or reject. Do not approve a claim that the evidence does not back.
1084
+ Output under review: [paste the step-3 output]
1085
+ Acceptance card: [paste the step-2 card]
1086
+
1087
+ [5 / HANDOFF + HARVEST]
1088
+ Write two short artifacts. Handoff: where the work is now, split into done / pending / unverified, plus the single next action and the exact baseline to start from. Harvest: one reusable lesson from this loop, written generally enough to apply to a future task, with all private specifics removed.
1089
+ \`\`\`
1090
+
1091
+ ## Expected output
1092
+
1093
+ - A context package and an acceptance card with checkable criteria (not prose).
1094
+ - A first output whose completion claim you can check against evidence.
1095
+ - A guard review that names a real, line-level defect and returns reject when the claim outruns the evidence, or pass with named residual risk when it does not.
1096
+ - A revised output where the named blocker is fixed and re-shown with evidence.
1097
+ - A handoff that separates done / pending / unverified, and one reusable harvest lesson.
1098
+
1099
+ ## Failure handling
1100
+
1101
+ - The guard just says "looks good" and finds nothing. It is probably grading tone, not claims. Re-run step 4 and force it to check each completion claim against the acceptance card and point to a specific line or a missing piece of evidence; an empty finding list is only valid if it can say what it checked.
1102
+ - The first output looks perfect and you cannot spot the defect. Re-read the completion claim next to the code or evidence it rests on. The classic failure is a claim ("keyboard reorder works") that the code does not actually perform.
1103
+ - You only have one AI tool. Run the guard in a fresh turn or a fresh session of the same tool. It is weaker than a second model family (same family tends to miss the same things), but far better than no guard.
1104
+ - The loop feels like overhead on a tiny task. It is, for a one-line change. Use the full loop on work another session or person will build on; for throwaway work, skip it.
1105
+
1106
+ ## Privacy note
1107
+
1108
+ Redact before you paste: replace real product names, file paths, customer or person names, and internal numbers with placeholders. Do not paste a private profile, raw private chat logs, or a non-public path into an external AI. The loop works on a redacted description; it does not need the private original. (If you take the optional "look first" track instead, the shipped synthetic case uploads nothing at all — there is nothing of yours to redact.)
1109
+
1110
+ ## Next step
1111
+
1112
+ - Connect this loop to the AI tool you actually use day to day: \`connect-a-tool.md\`.
1113
+ - When you receive a "done" artifact you did not produce, pressure-test it: \`review-a-half-product.md\`.
1114
+ - Reuse the full mechanism behind step 4 on higher-stakes work: \`../mechanisms/dual-guard/README.md\`.
1115
+ `;
1116
+ }
1117
+
1118
+ export function cookbookConnectTool() {
1119
+ return `# Connect a Tool
1120
+
1121
+ A do-it recipe: point any AI tool you already use at this workspace, so the same profile, context, acceptance, guard, handoff, and harvest rules drive every tool instead of six drifting rule systems. The key idea is that every mechanism here is just a Markdown file. You connect a tool by copying file contents into that tool's instruction slot. Nothing depends on this CLI staying installed; the CLI only writes the files.
1122
+
1123
+ ## When to use this
1124
+
1125
+ - You have a favorite AI tool (a general chat AI, a coding assistant, or a command-line AI) and want it to follow this system's loop.
1126
+ - You use more than one tool and they each behave differently because each has its own ad hoc rules.
1127
+ - You want a mechanism (like a guard pass) available inside your tool as a reusable instruction, not something you re-type every time.
1128
+
1129
+ Skip it if you only ever read these files by hand and never paste them into a tool.
1130
+
1131
+ ## Prerequisites
1132
+
1133
+ - This workspace exists locally.
1134
+ - The AI tool you want to connect, and knowledge of where it accepts standing instructions. Three common shapes: a general chat AI uses a "system prompt" or "custom instructions" box; a coding assistant uses a project rules file (for example a \`CLAUDE.md\`, an \`AGENTS.md\`, a \`.cursorrules\`, a \`.clinerules\`, or an equivalent); a command-line AI uses its config or a per-project instruction file.
1135
+ - Two minutes per tool. This is copy and paste, not installation.
1136
+
1137
+ ## Steps
1138
+
1139
+ 1. Open the shared contract. Open \`../adapters/SHARED_CORE_CONTRACT.md\`. This is the one rule source every tool should share so the loop does not drift between tools.
1140
+ 2. Open the adapter for your tool family. Look in \`../adapters/\` for the closest match to your tool (each adapter is a thin pointer, intentionally not a second copy of the contract). If none matches exactly, pick the nearest one. The adapter shows the minimal instruction your tool needs.
1141
+ 3. Put the contract where your tool reads standing instructions. For a chat AI, paste the contract into the system-prompt or custom-instructions box. For a coding assistant, save it (or a pointer to it) into that tool's project rules file. For a command-line AI, add it to the tool's config or per-project instruction file. Use the copy-paste block below.
1142
+ 4. Add one mechanism as a reusable instruction (optional but the high-value move). Pick a mechanism you want on tap, for example \`../mechanisms/dual-guard/PROMPT.md\` or \`../guard/PROMPT.md\`. Copy the prompt body from that file's "Copy-paste prompt" block into a saved prompt, snippet, or rule in your tool, so a guard pass is one trigger away instead of a retype.
1143
+ 5. Verify the wiring with a throwaway ask. Tell the tool: "State the core loop you are now following and where each step's rules live." A correctly connected tool names profile, context, acceptance, guard, handoff, harvest and treats them as explicit files, instead of inventing hidden memory.
1144
+ 6. Save anything worth keeping back into this workspace (a filled template, a handoff, a harvest card) so the next tool or session starts from the same files.
1145
+
1146
+ ## Copy-paste block
1147
+
1148
+ Two pieces. The first wires the whole loop into a tool. The second drops a single mechanism in as a reusable instruction. Before pasting, open the referenced file and paste its real contents where marked; do not paste the file path and expect the tool to read your disk.
1149
+
1150
+ \`\`\`text
1151
+ [A / WIRE THE LOOP INTO A TOOL - paste into the tool's system prompt or project rules file]
1152
+ Follow this shared contract for our work. Treat profile, context, acceptance, guard/review, handoff, and harvest as explicit files in a local-first workspace, not as hidden memory. Work local-first; do not upload my content by default. Label facts, assumptions, decisions, and unverified claims. Use synthetic, redacted examples for anything I might share publicly.
1153
+ --- shared contract begins ---
1154
+ [paste the full contents of ../adapters/SHARED_CORE_CONTRACT.md here]
1155
+ --- shared contract ends ---
1156
+
1157
+ [B / ADD ONE MECHANISM AS A REUSABLE INSTRUCTION - save as a snippet, saved prompt, or rule]
1158
+ When I invoke this, run the mechanism below on the material I provide. Keep private material local and redacted. Point findings to specific spots. Return the mechanism's stated output shape, not a vague summary.
1159
+ --- mechanism prompt begins ---
1160
+ [paste the "Copy-paste prompt" block from ../mechanisms/<mechanism>/PROMPT.md here]
1161
+ --- mechanism prompt ends ---
1162
+ \`\`\`
1163
+
1164
+ ## Expected output
1165
+
1166
+ - Your tool, when asked, can name the core loop (profile, context, acceptance, guard, handoff, harvest) and treats each as a file rather than invented memory.
1167
+ - At least one mechanism is reachable inside the tool as a saved instruction you can trigger without retyping it.
1168
+ - The same contract now drives every tool you connected this way, so behavior is consistent across tools.
1169
+
1170
+ ## Failure handling
1171
+
1172
+ - The tool ignores the standing instruction. You likely pasted into a one-off chat turn instead of the persistent slot. Move the contract into the actual system-prompt box or project rules file so it survives across turns.
1173
+ - The tool "can't find" a referenced file. Tools generally cannot read your disk from a path in a prompt. Paste the file's contents inline (as the block marks), not just its path. Files are the source of truth; pasting is how a tool sees them.
1174
+ - Behavior still drifts between two tools. Confirm both point at the same single \`SHARED_CORE_CONTRACT.md\` and that neither has an older private rule set fighting it. One contract, many thin adapters; never six full rule systems.
1175
+ - The adapter looks too thin and you want to fatten it. Do not. The adapter is meant to be a pointer; thickening it recreates the drift the shared contract exists to prevent.
1176
+
1177
+ ## Privacy note
1178
+
1179
+ Connecting a tool means standing instructions, not your private data. Paste the contract and mechanism prompts (they are public-safe). Do not paste a private profile, raw private chat logs, internal numbers, or non-public paths into a tool's instruction slot or an external AI. When you later run real tasks through the connected tool, redact first and keep originals local; the loop is designed to work on a redacted description.
1180
+
1181
+ ## Next step
1182
+
1183
+ - Run a full loop through the tool you just connected: \`run-a-first-loop.md\`.
1184
+ - Use the connected tool to pressure-test a "done" artifact: \`review-a-half-product.md\`.
1185
+ - Browse the other mechanisms you can wire in the same way: \`../mechanisms/README.md\`.
1186
+ `;
1187
+ }
1188
+
1189
+ export function cookbookHalfProduct() {
1190
+ return `# Review a Half Product
1191
+
1192
+ A do-it recipe: audit a deliverable that says "done" but might not be, by forcing an independent AI to point at evidence and find the gap, instead of nodding along with "looks good". It uses the review mode plus the dual-guard and half-product-review mechanisms. The target is the classic half product: lots of docs, demo, and confident prose, but the thing it claims a stranger can do does not actually run.
1193
+
1194
+ ## When to use this
1195
+
1196
+ - Someone (a tool, another session, a contributor) hands you work claimed complete and you will build on it or ship it.
1197
+ - A project has a polished README and architecture talk but you are not sure the first-run experience actually works.
1198
+ - A completion claim feels too smooth and you want a second, independent pass before you trust it.
1199
+
1200
+ Skip it for low-stakes, easily reversible work, or a step you are about to fully re-check yourself anyway. Running a full review on trivial work is ceremony, and ceremony with no payoff trains people to skip review when it matters.
1201
+
1202
+ ## Prerequisites
1203
+
1204
+ - The artifact under review, with stable references the reviewer can point to (line numbers, section anchors, or named files).
1205
+ - Its definition of done: an acceptance card, or at least the public claim it makes ("a stranger can do X in ten minutes").
1206
+ - The evidence that supposedly backs the claim: command output, test results, a reproduced result, or a clear note that none exists.
1207
+ - An AI tool to run the review in, ideally a different model family from whatever produced the artifact, since a different family is the pass most likely to see what the author cannot.
1208
+
1209
+ ## Steps
1210
+
1211
+ 1. Pin the claim. Write down, in one line, exactly what the artifact claims is done or usable. A claim you cannot state is a claim you cannot test. If it has an acceptance card, use that; if not, lift the strongest promise from its README or start page.
1212
+ 2. Trace each claim to evidence. For every claim, find the file, command output, or test that proves it, or note that none exists. The half-product pattern is docs and demos that point at nothing runnable. A claim with no evidence is the finding.
1213
+ 3. Try the first-run path. If the claim is "a stranger can do X", do X the way a stranger would: run the entry command, open the file the docs point to, follow the start page. Watch where it breaks or where a referenced artifact is missing.
1214
+ 4. Run the independent guard. Paste the artifact, the acceptance card or pinned claim, and any evidence into the review prompt below, in a second tool. Demand findings tied to specific lines or missing evidence, ordered by severity, with a pass or reject. Do not accept a fluent "looks fine".
1215
+ 5. Merge by strictness, not vote. If the guard names one real, evidence-grounded blocker, the artifact does not pass, even if everything else reads well. One concrete defect outweighs a pile of fluent approval. Compare against \`../mechanisms/dual-guard/README.md\` for how the binding pass works.
1216
+ 6. Decide the wording. If the first-run path is not actually runnable, downgrade the release language (from "anyone can use this" to "early / not yet runnable end to end") or carry the gap as named residual risk the owner accepts on the record. Silent "good enough" is not allowed.
1217
+
1218
+ ## Copy-paste block
1219
+
1220
+ Paste this into an independent AI tool, ideally a different model family from the one that produced the work. It is tuned to make the reviewer hunt for the gap and cite it, not to praise.
1221
+
1222
+ \`\`\`text
1223
+ You are an independent reviewer. The work below claims to be complete or usable. Assume it might not be, and prove it either way against the evidence, not the tone.
1224
+
1225
+ Claim under test: [paste the one-line "done"/usable claim, or the acceptance card]
1226
+ Artifact: [paste the artifact, or the README/start page and the key files it points to]
1227
+ Evidence provided: [paste command output / test results / reproduced result, or write "none provided"]
1228
+
1229
+ Do this:
1230
+ 1. For each claim, name the specific evidence that backs it, or state that none was provided.
1231
+ 2. Walk the first-run path a stranger would take. Say exactly where it breaks or where a referenced file/command is missing.
1232
+ 3. List defects ordered by severity. Tie each to a line, section, or the specific missing evidence. No vague "looks good" or "seems fine".
1233
+ 4. If any one real, evidence-grounded blocker exists, the verdict is REJECT even if the rest reads well.
1234
+
1235
+ Return:
1236
+ - Verdict: pass / reject / insufficient_evidence / pass_with_risk (a plain pass needs an L3+ cross-family evidence pack; a single tool tops out at pass_with_risk; summary-only is insufficient_evidence)
1237
+ - Guard level: L0-L4, the strength of the evidence you actually had. The CLI COMPUTES this from your review method + the evidence (it is not self-declared); a cross-family L3 is shown "self-declared, unverified" because a local tool cannot verify the reviewer's family — L4 (that cross-family review AND a rerun reconciled to a recorded run) is the strongest LOCAL-trust level, not cryptographic proof.
1238
+ - Findings (each tied to a line, section, or missing evidence)
1239
+ - Required fixes (the smallest change each blocker needs)
1240
+ - Residual risk (what stays unverified and who must accept it; a pass_with_risk needs an explicit owner sign-off)
1241
+ - Recommended release wording (downgrade it if the first-run path is not runnable)
1242
+
1243
+ Rules: work only from what I provided; if key evidence is missing, say so rather than assuming it passes; keep examples public-safe.
1244
+ \`\`\`
1245
+
1246
+ ## Expected output
1247
+
1248
+ - A verdict: pass, reject, insufficient_evidence, or pass_with_risk, plus the guard level (L0-L4) for the evidence seen.
1249
+ - A findings list where each item points to a line, section, or a specific missing piece of evidence, not a vibe.
1250
+ - The exact first-run step where the experience breaks, if it does.
1251
+ - The smallest fixes required before the work can wear its completion label, and recommended release wording.
1252
+
1253
+ ## Failure handling
1254
+
1255
+ - The reviewer just approves it. It is grading tone, not claims. Re-run and force step 1: every claim must be matched to specific evidence or marked unproven; "looks good" is not a finding.
1256
+ - The reviewer invents evidence or assumes the path works. Tell it to work only from what you pasted and to say "none provided" rather than assume. If it cannot see the evidence, that absence is itself the result.
1257
+ - Two reviewers disagree (one approves, one rejects). Do not average them. If the rejection points to a real, evidence-grounded defect, it wins; one concrete blocker beats fluent approval.
1258
+ - You only have the same tool the author used. Run it anyway in a fresh session, but treat the pass as weaker: same model family tends to miss the same things, so a clean result here is a reference, not a guarantee.
1259
+
1260
+ ## Privacy note
1261
+
1262
+ Review the work, not your private data. Redact before pasting: replace real product names, customer or person names, file paths, and internal numbers with placeholders. Do not paste a private profile, raw private chat logs, or non-public paths into an external AI for review. The review works on a redacted artifact plus its evidence; it does not need the private original.
1263
+
1264
+ ## Next step
1265
+
1266
+ - Use the full two-layer review behind this on higher-stakes artifacts: \`../mechanisms/dual-guard/README.md\`.
1267
+ - See the dedicated mechanism for the docs-outrun-runtime pattern: \`../mechanisms/half-product-review/README.md\`.
1268
+ - After a reject, package the exact remaining work for whoever fixes it: \`../mechanisms/handoff-abc/README.md\`.
1269
+ `;
1270
+ }
1271
+
1272
+ export function cookbookBridgeSecondFamily() {
1273
+ return `# Bridge to a Second Family
1274
+
1275
+ A do-it recipe: set up the second, different-model-family AI that the cross-family guard needs, and route a review across it. The rest of the system keeps telling you "when a second, different model family is available, you can upgrade to the cross-family double guard" — this recipe is the missing how. It does not redefine the guard: \`../mechanisms/dual-guard/README.md\` owns the judgment rules (L3 vs L4, binding vs reference, layered strictness over majority vote, the pass and reject bars). This recipe only covers the part those rules assume you already did: pick a second family, get your material across to it safely, and keep evidence that the second family actually ran.
1276
+
1277
+ There are two tracks. The manual bridge (copy-paste between two AIs) is the main path: it works with any two tools, needs no setup, and never breaks. The auto bridge (a tool that dispatches the review to another family for you) is an optional convenience. Start manual; reach for auto only if a tool you already use offers it.
1278
+
1279
+ ## When to use this
1280
+
1281
+ - A completion claim is about to be trusted by another session, tool, or person, and you want the cross-family binding gate the dual-guard mechanism describes — but you only have one tool wired up so far.
1282
+ - You keep stopping at "upgrade to a second model family" and do not know how to actually stand one up.
1283
+ - You have run \`single-tool-guard\` and want to move a result above its L2 ceiling with a genuine cross-family pass.
1284
+
1285
+ Skip it for low-stakes, easily reversible work a human will fully re-check anyway. A single tool's own adversarial pass (\`../mechanisms/single-tool-guard/README.md\`) is the right tool there; bridging to a second family is the upgrade for work that will propagate.
1286
+
1287
+ ## Prerequisites
1288
+
1289
+ - A redacted version of the artifact under review (swap private names, paths, and numbers for placeholders). Nothing private needs to leave your machine; a redacted copy is enough.
1290
+ - Its acceptance card or one-line "done" claim, and the evidence that supposedly backs it (command output, test result, a reproduced result, or a clear note that none exists).
1291
+ - One AI tool you already use as your primary (the family that drafted the work, or any family you treat as home base).
1292
+ - A second AI that is a DIFFERENT model family from your primary. That is the whole point: a different family does not share your primary's blind spots. (How to choose one is Step 1 below — you do not need it set up before you start.)
1293
+
1294
+ ## Track A — the manual bridge (main path, works with any two tools)
1295
+
1296
+ ### Step 1. Choose a second family
1297
+
1298
+ Pick any AI that is a different model family from your primary tool. The families differ; the move does not. Concrete examples (each is just an example — substitute freely):
1299
+
1300
+ - Primary is a Claude-family tool? Use a GPT-family or a Gemini-family AI as the second.
1301
+ - Primary is a GPT-family tool? Use a Claude-family or a Gemini-family AI as the second.
1302
+ - Primary is a Gemini-family tool? Use a Claude-family or a GPT-family AI as the second.
1303
+
1304
+ Any different-family pairing works — the names above are illustrations, not a required list. What matters is "different family", not which brands. Two tools that wrap the same underlying family (for example two products both built on the same model) do NOT count as a cross-family pair; the dual-guard mechanism treats that as same-family, capped below the cross-family gate. If you are unsure whether two tools share a family, treat them as same-family until you can confirm otherwise.
1305
+
1306
+ ### Step 2. Redact before it leaves your primary
1307
+
1308
+ The second AI cannot read your disk; to review your material it has to be pasted in. So redact first, exactly as in \`connect-a-tool.md\`: replace real product names, customer or person names, file paths, and internal numbers with placeholders. Do not paste a private profile, raw private chat logs, internal numbers, or non-public paths into the second AI. The review works on a redacted artifact plus its evidence; it does not need the private original.
1309
+
1310
+ ### Step 3. Send the package across and run the cross-family review
1311
+
1312
+ This is your move to make, not the second AI's — it cannot read your disk, so you fetch the file contents and paste them in. On your own machine, open \`../mechanisms/dual-guard/PROMPT.md\` and copy its "Copy-paste prompt" body. Then paste one combined message into the second (different-family) AI: the carrier wrapper below, the dual-guard body you just copied, and your redacted material (artifact, acceptance card, context boundary, evidence). The carrier is a thin wrapper that hands the pasted dual-guard body to the second AI as its instructions; it does not restate the guard's rules, because the dual-guard mechanism owns them.
1313
+
1314
+ ### Step 4. Collect the verdict and record binding evidence
1315
+
1316
+ Read back the verdict using the dual-guard pass and reject bars (do not re-derive them here). Then record what makes the result trustworthy later: which family was the binding guard, the findings, the fixes, and the residual risk. A bridge to a second family reaches L3 (a structured evidence pack reviewed by a different family). To reach L4, the binding guard must independently re-run the key evidence and reconcile it to a recorded run — re-running the critical check yourself across the second family is what raises a cross-family L3 to L4 (the strongest LOCAL-trust level, not cryptographic proof).
1317
+
1318
+ ## Copy-paste block (manual bridge)
1319
+
1320
+ Paste this into the second, different-family AI. It carries your material to the dual-guard prompt; it deliberately does not repeat the guard's judgment rules.
1321
+
1322
+ \`\`\`text
1323
+ Run a cross-family review for me. You are the second, different-model-family guard.
1324
+
1325
+ Your full instructions are the Dual Guard prompt body, which I have pasted directly below. Follow it exactly (process, output shape, pass bar, reject bar, guard-level rules). It is the source of truth; do not invent your own rubric. You cannot read my disk, so I am pasting the body in here rather than pointing you at a local file.
1326
+
1327
+ --- Dual Guard PROMPT body (begin) ---
1328
+ [Paste the Dual Guard PROMPT.md "Copy-paste prompt" body here — open it locally and copy it in yourself; the second AI cannot read your disk.]
1329
+ --- Dual Guard PROMPT body (end) ---
1330
+
1331
+ Then review this material under the Dual Guard prompt body pasted above:
1332
+ - Drafting model family (my primary): [name the family that produced the work]
1333
+ - Artifact under review (redacted, with line/section refs): [paste]
1334
+ - Acceptance card / definition of done: [paste]
1335
+ - Context boundary (goal, in-scope, non-goals): [paste]
1336
+ - Evidence provided: [paste command output / test result / reproduced result, or write "none provided"]
1337
+
1338
+ Return exactly the dual-guard output shape (verdict, guard level, binding findings, required fixes, residual risk, next action). Work only from what I pasted; if key evidence is missing, say so rather than assume it passes. Keep examples public-safe.
1339
+ \`\`\`
1340
+
1341
+ ## Track B — the auto bridge (optional, point-to-point, depends on your tool)
1342
+
1343
+ Some tools can dispatch the review to a second family for you, so you do not hand-carry the paste. The shape is the same cross-family pass; the tool just automates the hand-off.
1344
+
1345
+ Concrete example (an example, not a requirement): a coding tool that supports a "rescue" or cross-model plugin can route a review to a different family — for instance a Claude-family coding tool with a plugin that sends the review to a GPT-family model. That auto-dispatched second model is your cross-family bridge.
1346
+
1347
+ Two rules make an auto bridge safe to trust:
1348
+
1349
+ - Read-only is mandatory. The auto-dispatched second AI must review only — it must NOT be allowed to edit your files. If the bridge lets the second AI change the work, it is no longer an independent reviewer of that work, and the cross-family independence the whole gate depends on is gone. Configure the dispatch as read-only and confirm it actually ran read-only before you trust the verdict.
1350
+ - It is convenience, not a requirement. The auto bridge depends on a specific tool and integration, and those change over time. The manual bridge in Track A always works. Treat the auto bridge as a way to save effort, never as the only way to reach a second family.
1351
+
1352
+ Everything else — which verdicts are allowed, the L3/L4 boundary, binding vs reference — is unchanged and still lives in \`../mechanisms/dual-guard/README.md\`. The auto bridge changes how the material gets there, not what counts as a pass.
1353
+
1354
+ ## Expected output
1355
+
1356
+ - A cross-family review of your artifact, returned in the dual-guard output shape (verdict, guard level, findings, fixes, residual risk, next action).
1357
+ - A recorded note of which family was the binding guard, so a later session can trust the result without re-litigating it.
1358
+ - Honest leveling: an L3 result from the second family's review of your evidence pack, or L4 only if the binding guard re-ran the key evidence and showed that output. A bare claim that "a second family looked at it" is not a pass — the evidence is.
1359
+
1360
+ ## Failure handling
1361
+
1362
+ - Your two tools turn out to share a model family. Then this is a same-family reference pass, not a cross-family gate; under the dual-guard rules it cannot clear the binding gate or move you above L2. Find a genuinely different family for the binding pass.
1363
+ - The second family just says "looks good" with no specifics. It is grading tone, not claims. Make sure you pasted the dual-guard prompt body (not only the carrier wrapper) so its "each finding cites a line/section/missing evidence" rule is in force; an empty finding list is only valid if it can say what it checked.
1364
+ - You only claimed a second family but kept no evidence. Family can be faked — anyone can say "a different AI reviewed this". What is hard to fake is a rerun and a reconciliation: record the binding family, the findings, and, for L4, your own rerun output. If you cannot show the review happened, treat the result as single-tool (L2), not cross-family.
1365
+ - The auto bridge ran with write access. Discard the verdict and re-run it read-only. A reviewer that could edit the work is not independent of it.
1366
+
1367
+ ## Privacy note
1368
+
1369
+ A second family means a second place your material gets pasted, so the privacy bar is the same as \`connect-a-tool.md\`, applied twice. Redact before pasting into either tool: replace real product names, customer or person names, file paths, and internal numbers with placeholders. Do not paste a private profile, raw private chat logs, internal numbers, or non-public paths into any external AI. For an auto bridge, confirm the dispatched second AI is read-only and does not exfiltrate or store your content beyond the review. The cross-family review works on a redacted artifact plus its evidence; it never needs the private original.
1370
+
1371
+ ## Next step
1372
+
1373
+ - Read the judgment rules this recipe feeds into: \`../mechanisms/dual-guard/README.md\` (L3 vs L4, binding vs reference, pass and reject bars).
1374
+ - Coming from one tool? See the L2 front door you are upgrading from: \`../mechanisms/single-tool-guard/README.md\`.
1375
+ - Wire the second family in as a standing tool so the bridge is one trigger away: \`connect-a-tool.md\`.
1376
+ `;
1377
+ }
1378
+
1379
+ export function stateCurrent() {
1380
+ return `# Current State
1381
+
1382
+ Use this file for local state only. Do not publish private task details.
1383
+
1384
+ ## Current goal
1385
+
1386
+ ## Active context package
1387
+
1388
+ ## Active acceptance card
1389
+
1390
+ ## Current mode
1391
+
1392
+ ## Next action
1393
+ `;
1394
+ }
1395
+
1396
+ export function stateTaskLog() {
1397
+ return `# Task Log
1398
+
1399
+ Use this local log to keep AI work resumable.
1400
+
1401
+ | Date | Task | Mode | Evidence | Next action |
1402
+ | --- | --- | --- | --- | --- |
1403
+ | synthetic | First loop | review | guard review exists | write handoff |
1404
+ `;
1405
+ }
1406
+
1407
+ export function stateDecisions() {
1408
+ return `# Decisions
1409
+
1410
+ Record decisions that future sessions should not reopen without new evidence.
1411
+
1412
+ | Date | Decision | Evidence | Revisit condition |
1413
+ | --- | --- | --- | --- |
1414
+ | synthetic | Keep examples synthetic | privacy boundary | public-safe replacement needed |
1415
+ `;
1416
+ }
1417
+
1418
+ export function walkthrough10() {
1419
+ return `# 10-Minute Walkthrough (Demo preview)
1420
+
1421
+ This is the demo preview: it runs the loop on a prepared case so you can see the flow without pasting anything of your own. To run the same loop on your own real task, use \`10-minute-your-task.md\` instead (that is the recommended first run). Pick this preview if your task feels too sensitive to paste right now, or you just want to watch the shape of the loop first.
1422
+
1423
+ Goal: walk one AI collaboration loop end to end on the prepared TaskBoard case, and watch a guard catch a false completion claim that a single agent would have accepted.
1424
+
1425
+ The case: a user asks an AI to add task reordering to a TaskBoard. The AI says it added mouse and keyboard reorder with tests. The guard proves the keyboard part was never implemented. You will see context, acceptance, first output, guard review, revised output, handoff, and harvest.
1426
+
1427
+ Everything is local-first and synthetic. You only read and copy files; nothing is uploaded.
1428
+
1429
+ ## Step 1 (1 min) - Open the case
1430
+
1431
+ Open \`../examples/ai-coding-long-task/CASE.md\` and read "Confusing raw input" and "Likely single-agent failure". This is the messy request and the answer a raw chat usually gives.
1432
+
1433
+ Expected: you can say in one line why "I will refactor, add drag, keyboard, polish, and tests" is unsafe (it mixes scope and defines no pass standard).
1434
+
1435
+ ## Step 2 (2 min) - Set context and acceptance
1436
+
1437
+ Open \`../examples/ai-coding-long-task/artifacts/context-package.md\`, then \`acceptance-card.md\`. Copy both into your AI tool together with \`../adapters/SHARED_CORE_CONTRACT.md\`.
1438
+
1439
+ Expected: your tool now has five checkable acceptance criteria (AC1 mouse, AC2 keyboard, AC3 tests for both, AC4 data preserved, AC5 visual polish out of scope).
1440
+
1441
+ ## Step 3 (2 min) - Read the first AI output
1442
+
1443
+ Open \`../examples/ai-coding-long-task/artifacts/first-ai-output.md\`. Read the completion claim, then the \`TaskBoard.tsx\` code block.
1444
+
1445
+ Expected: you can point to the defect yourself. The claim says arrow-key reorder works, but \`onKeyDown\` (lines 27-30 of that code block) only logs the key and never calls \`moveTask\`, and the test block has no keyboard test.
1446
+
1447
+ ## Step 4 (2 min) - Run the guard review
1448
+
1449
+ Open \`../examples/ai-coding-long-task/artifacts/guard-review.md\`. Optionally paste \`first-ai-output.md\` plus \`../guard/PROMPT.md\` into a second AI tool and ask it to review against the acceptance card.
1450
+
1451
+ Expected: the guard returns a cause-and-effect chain, not a one-line verdict. It cites \`first-ai-output.md\` lines 27-30 (stub handler) and the missing keyboard test, maps them to AC2 and AC3, and returns reject. This is the line the guard checks.
1452
+
1453
+ ## Step 5 (2 min) - Read the revised output and close the loop
1454
+
1455
+ Open \`../examples/ai-coding-long-task/artifacts/revised-output.md\`, then \`handoff-note.md\`, then \`harvest-seed.md\`.
1456
+
1457
+ Expected: \`onKeyDown\` now calls \`moveTask\` for ArrowUp/ArrowDown, a keyboard test was added that fails on the old stub and passes on the fix, the handoff separates done / pending / unverified (visual polish), and the harvest seed is the reusable artifact you keep: verify completion claims with code and test evidence, do not trust a fluent "done".
1458
+
1459
+ ## Completion check
1460
+
1461
+ You have walked context -> acceptance -> first output -> guard -> revised -> handoff -> harvest on one case, you can name the exact line the guard pointed to, and you leave with one reusable artifact (\`harvest-seed.md\`) you can apply to your own next task.
1462
+ `;
1463
+ }
1464
+
1465
+ export function walkthrough10YourTask() {
1466
+ return `# 10-Minute Walkthrough (Your own task)
1467
+
1468
+ This is the recommended first run. You run the whole collaboration loop on one real task of your own, instead of a prepared example, and feel the value on work you actually care about. If you would rather watch the flow on a prepared case first, use \`10-minute.md\` (the demo preview) and then come back here.
1469
+
1470
+ Goal: take one messy task of yours and, in three short rounds, force the AI to (1) define "done" before it acts, (2) do only that, and (3) get re-checked by an independent AI that hunts for a thin "done" - then spend two minutes closing the loop into reusable cards so the next task starts ahead.
1471
+
1472
+ Everything stays local-first. You paste a redacted description into the AI tools you already use; nothing is uploaded by this workspace. Redact before you paste: replace any real name, path, customer, or internal number with a placeholder. The loop works on a redacted description; it does not need the private original.
1473
+
1474
+ What you need: one real task that is a bit messy, and one AI tool you can paste into. A second tool of a different model family (a different AI brand) makes Step 3 much stronger, but you can run all three rounds in one tool if that is all you have.
1475
+
1476
+ Want the AI to prompt you for these steps on its own - to ping you to review every time it says "done", instead of you remembering to paste Step 3? Install the adapter into your tool's always-on instructions with \`node bin/ai-collab.js adapters install --target <repo>\`; it turns on the coaching reminders, and if you only have one tool it routes the completion-claim check through \`single-tool-guard\` (a fresh adversarial pass in the same tool).
1477
+
1478
+ ## Step 1 (2 min) - Define done before any work
1479
+
1480
+ Paste this into your AI tool, with your own task in the brackets:
1481
+
1482
+ \`\`\`text
1483
+ I have a task in front of me that is a bit messy. Do NOT write any implementation yet.
1484
+ Task (redacted): [describe your task in plain language; replace any private name, path, or number with a placeholder]
1485
+ Return two things:
1486
+ 1) Boundary card: this run does only this one small slice; explicitly list what is NOT in scope.
1487
+ 2) Acceptance card: a numbered list of hard, checkable standards (AC1, AC2, ...). Mark anything that would be out of scope.
1488
+ \`\`\`
1489
+
1490
+ Expected: a boundary card and an acceptance card. You now have a written definition of "done" for your own task, before a line of work exists. This is the step people skip and then regret.
1491
+
1492
+ ## Step 2 (3 min) - Do only the accepted slice, then produce an Evidence Pack
1493
+
1494
+ Paste this next, so the AI builds only what the acceptance card described and hands back a structured **Evidence Pack** the next round can actually check - not a prose "it's done":
1495
+
1496
+ \`\`\`text
1497
+ Do only the work the acceptance card describes. Do not expand scope.
1498
+ When you are done, produce an "Evidence Pack" in exactly this shape (it is the artifact the re-check will judge):
1499
+ 1) Changed files / diff: the list of files you changed, with the key diff hunks (or the full patch). If you changed nothing, say so.
1500
+ 2) Commands run: the exact commands you ran to verify the work (tests, build, lint, a manual reproduction). If you ran none, write "none".
1501
+ 3) Command output summary: the real output of each command (paste it, do not paraphrase), trimmed to the relevant lines.
1502
+ 4) Exit codes: the exit code of each command (0 = passed). If a command failed, keep its non-zero code and error visible - do NOT hide it.
1503
+ 5) Acceptance mapping: for each acceptance criterion (AC1, AC2, ...), say PASS / FAIL / NOT-VERIFIED and point to the evidence above that backs it.
1504
+ 6) Not verified: everything you could NOT prove (edge cases, things you skipped, criteria with no command behind them).
1505
+ Do not claim "done" for anything that does not have evidence in this pack.
1506
+ \`\`\`
1507
+
1508
+ Expected: an Evidence Pack with the six numbered parts above (changed files/diff, commands run, output summary, exit codes, acceptance mapping, not-verified). Keep this whole pack - it is exactly what the next round pressure-tests, and a missing or empty pack is itself a finding in Step 3.
1509
+
1510
+ ## Step 3 (3 min, the aha moment) - Independent re-check
1511
+
1512
+ Open a fresh chat. Ideally use a different AI brand than the one that did Step 2 - a different model family is the pass most likely to catch what the first one missed. Paste this:
1513
+
1514
+ \`\`\`text
1515
+ You are an independent reviewer. The work below claims to be done. Assume it is NOT done and prove it from the evidence, not the tone.
1516
+ Acceptance card: [paste your Step 1 acceptance card]
1517
+ Evidence Pack under review: [paste the Step 2 Evidence Pack: changed files/diff, commands run, output summary, exit codes, acceptance mapping, not-verified]
1518
+ Do this, in order:
1519
+ 1) First check the Evidence Pack itself. If there is no Evidence Pack, or it is missing real command output / exit codes, or a claimed PASS has no command behind it, you CANNOT pass the work: return the verdict INSUFFICIENT_EVIDENCE and list exactly what evidence is missing. A confident "done" with no evidence is INSUFFICIENT_EVIDENCE, not pass.
1520
+ 2) For each acceptance criterion, point to the exact line/output in the Evidence Pack that backs it, or say there is no evidence for it.
1521
+ 3) Walk it the way a stranger would actually use it and say exactly where it breaks.
1522
+ 4) List defects by severity, each pinned to a specific location.
1523
+ 5) Pick the verdict: REJECT if an evidence-grounded hard defect exists; INSUFFICIENT_EVIDENCE if the pack cannot support a pass; pass only if every criterion is backed by real evidence.
1524
+ Return: verdict (pass / REJECT / INSUFFICIENT_EVIDENCE) + defect or missing-evidence list (with locations) + the smallest fix for each + what is still unverified.
1525
+ \`\`\`
1526
+
1527
+ Expected (the aha): the independent reviewer first weighs your Evidence Pack. If Step 2 handed over a fluent "done" with no real evidence, it returns \`INSUFFICIENT_EVIDENCE\` and names what is missing; if the evidence exists but a criterion is not actually met, it returns \`REJECT\` with the defect pinned to a location - on your own task, not a tutorial's. Either way, that is the gap a single fluent chat would have hidden from you: no evidence pack means no pass.
1528
+
1529
+ ## Step 4 (2 min) - Close the loop so it compounds
1530
+
1531
+ The re-check is the safety net; this step is where the loop starts paying you back. Keep it light - three short cards, not a report. Paste this:
1532
+
1533
+ \`\`\`text
1534
+ Close out this task in three short cards. Keep each card to a few lines - do NOT write a long report.
1535
+ 1) Handoff card (so the next session or tool resumes without re-explaining), three columns:
1536
+ - Done: what is finished and evidence-backed.
1537
+ - To do: what is left.
1538
+ - Not verified: what was claimed but not proven (carry over anything the re-check flagged).
1539
+ 2) Harvest card: one reusable lesson from this task, as a single sentence I could apply to a future task.
1540
+ 3) Profile candidate (only if one applies): if a stable preference about how I want you to work showed up more than once, propose it as one line, with status \`proposed\`. Do NOT add it to my long-term profile yet. If nothing stable showed up, say "no profile candidate this time".
1541
+ \`\`\`
1542
+
1543
+ Expected: a three-column handoff, a one-line harvest lesson, and either one \`proposed\` profile candidate or an explicit "none". Save the handoff and harvest cards into your workspace (\`../handoff/\` and \`../harvest/\`). A profile candidate does NOT go straight into your long-term profile - it lands in \`../profile/CANDIDATES.md\` as \`proposed\` first. It only moves into \`profile/EXAMPLE.synthetic.md\` (or your real profile) after you review it: mark it \`confirmed\` (use as-is), \`edited\` (reword first), or \`dropped\` (discard) in CANDIDATES.md, and only \`confirmed\`/\`edited\` ones graduate. That buffer is why one task makes the next one start ahead without an unreviewed guess hardening into a standing rule - you walk away with a re-checked result *and* something reusable, but nothing edits your profile behind your back.
1544
+
1545
+ ### Profile-candidate buffer (the state machine)
1546
+
1547
+ A profile candidate is a guess about a standing preference. An unreviewed guess must not silently become a rule future sessions obey, so candidates move through four states in \`../profile/CANDIDATES.md\`:
1548
+
1549
+ - \`proposed\` — the AI suggested it this loop; not yet trusted, not in your profile.
1550
+ - \`confirmed\` — you reviewed it and it is correct as written; it may now graduate into your profile.
1551
+ - \`edited\` — correct after you reword it; the edited line graduates, the original does not.
1552
+ - \`dropped\` — you reviewed it and it does not belong; it stays recorded as dropped so it is not re-proposed every loop.
1553
+
1554
+ Rule: only \`confirmed\` and \`edited\` candidates graduate into your long-term profile, and only after you say so. \`proposed\` and \`dropped\` never edit your profile. Open \`../profile/CANDIDATES.md\` for the table and how to use it.
1555
+
1556
+ Prefer to let the tool track this for you instead of hand-editing a table? The same four states are available as commands: \`ai-collab learning add --type profile --content "..."\` records the candidate (and \`--type harvest\` records the one-line lesson from card 2), then \`ai-collab learning confirm\` / \`learning edit\` / \`learning drop\` keep, reword, or discard it. Next time you run \`ai-collab status\`, it echoes back the one preference you most recently confirmed - so the next task literally starts with "still working the way you confirmed last time." Use the table or the commands, whichever you like; they share the same states, so you are never maintaining two systems.
1557
+
1558
+ ## Two-track comparison (optional, makes the point undeniable)
1559
+
1560
+ Run your task once with no discipline first, then with the loop, and compare:
1561
+
1562
+ 1. Track A (no discipline): in a fresh chat, paste your messy task with no structure and just ask the AI to do it. Save the smooth "Sure, I will do X, Y, Z" reply. That smooth line is your real before-evidence, generated on your own task.
1563
+ 2. Track B (the loop): the three steps above.
1564
+ 3. Side by side: ask the AI to put both tracks into one table with four rows - scope, definition of done, completion claim, and what would have been missed. The messy half is real evidence from your own task, not something the tutorial invented.
1565
+
1566
+ ## Want the why behind each step
1567
+
1568
+ This walkthrough is the operation card. For the reasoning behind each move and a longer copy-paste sequence to adapt, open \`../cookbook/run-a-first-loop.md\` (it runs this same loop on your own task and explains why each step exists). To turn Step 3 into a reusable habit on higher-stakes work, see \`../cookbook/review-a-half-product.md\` and \`../mechanisms/dual-guard/README.md\`.
1569
+
1570
+ ## Completion check
1571
+
1572
+ You defined "done" before the work, had the AI do only that, had an independent AI re-check it against evidence, and closed the loop into a handoff card, a one-line harvest lesson, and (if one applied) a profile candidate - all on a real task of your own. You can name the exact place the re-check pointed to, and you leave with a re-checked result, reusable cards, and a habit (define done, do only that, get re-checked, then capture what is reusable) that makes your next task start ahead instead of from scratch.
1573
+ `;
1574
+ }
1575
+
1576
+ export function walkthrough30() {
1577
+ return `# 30-Minute Walkthrough
1578
+
1579
+ Goal: adapt one layer to a real task.
1580
+
1581
+ ## Input
1582
+
1583
+ Choose one current task and redact private identifiers.
1584
+
1585
+ ## Steps
1586
+
1587
+ 1. Open \`../context/TEMPLATE.md\`.
1588
+ 2. Fill goal, current state, constraints, facts, assumptions, risks, and open questions.
1589
+ 3. Open the adapter for your tool in \`../adapters/\`.
1590
+ 4. Ask the tool to produce one acceptance card or review note from your context.
1591
+
1592
+ ## Expected output file
1593
+
1594
+ One completed context package or acceptance card.
1595
+
1596
+ ## Completion check
1597
+
1598
+ Another session can tell what the task is, what is out of scope, and what evidence is still missing.
1599
+ `;
1600
+ }
1601
+
1602
+ export function walkthrough60() {
1603
+ return `# 60-Minute Walkthrough
1604
+
1605
+ Goal: run one complete AI collaboration loop.
1606
+
1607
+ ## Steps
1608
+
1609
+ 1. Fill a light profile.
1610
+ 2. Package task context.
1611
+ 3. Define acceptance.
1612
+ 4. Run one execution prompt.
1613
+ 5. Challenge the result with guard review.
1614
+ 6. Write a handoff note.
1615
+ 7. Extract one harvest seed.
1616
+
1617
+ ## Expected output files
1618
+
1619
+ - profile card
1620
+ - context package
1621
+ - acceptance card
1622
+ - execution artifact
1623
+ - guard review
1624
+ - handoff note
1625
+ - harvest seed
1626
+
1627
+ ## Completion check
1628
+
1629
+ The next AI session can resume without asking what happened, and the useful lesson is saved for future reuse.
1630
+ `;
1631
+ }
1632
+
1633
+ export function syntheticTranscript() {
1634
+ const item = caseDefinitions[0];
1635
+ return `# Synthetic Loop Transcript
1636
+
1637
+ This transcript demonstrates one complete loop using \`${item.id}\`.
1638
+
1639
+ ## Goal
1640
+
1641
+ Show that one user can move from a messy starting point to context, acceptance, execution, guard review, handoff, and harvest without relying on a raw chat memory.
1642
+
1643
+ ## Expected output
1644
+
1645
+ A complete artifact chain: context package, acceptance card, execution request, guard review result, handoff note, harvest seed, and a short comparison against single raw AI chat.
1646
+
1647
+ ## User
1648
+
1649
+ ${item.messy}
1650
+
1651
+ ## Context package
1652
+
1653
+ ${item.profileContext}
1654
+
1655
+ ## Acceptance card
1656
+
1657
+ ${item.acceptance}
1658
+
1659
+ ## Execution request
1660
+
1661
+ ${item.executionPrompt}
1662
+
1663
+ ## Guard review result
1664
+
1665
+ ${item.guardReview}
1666
+
1667
+ ## Handoff note
1668
+
1669
+ ${item.handoff}
1670
+
1671
+ ## Harvest seed
1672
+
1673
+ ${item.harvest}
1674
+
1675
+ ## Difference from raw chat
1676
+
1677
+ ${item.comparison}
1678
+ `;
1679
+ }