npm - ai-collab-open-system - Versions diffs - 0.1.0 - Mend

ai-collab-open-system 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (259) hide show

package/.aict/START_HERE.md +127 -0
package/.aict/WORKSPACE_MANIFEST.json +91 -0
package/.aict/acceptance/EXAMPLE.synthetic.md +49 -0
package/.aict/acceptance/FAILURE_MODES.md +40 -0
package/.aict/acceptance/PROMPT.md +47 -0
package/.aict/acceptance/README.md +44 -0
package/.aict/acceptance/TEMPLATE.md +57 -0
package/.aict/adapters/SHARED_CORE_CONTRACT.md +106 -0
package/.aict/adapters/claude-code/ADAPTER.md +28 -0
package/.aict/adapters/cline/ADAPTER.md +28 -0
package/.aict/adapters/codex/ADAPTER.md +28 -0
package/.aict/adapters/copilot/ADAPTER.md +28 -0
package/.aict/adapters/cursor/ADAPTER.md +28 -0
package/.aict/adapters/windsurf/ADAPTER.md +28 -0
package/.aict/context/EXAMPLE.synthetic.md +53 -0
package/.aict/context/FAILURE_MODES.md +40 -0
package/.aict/context/PROMPT.md +47 -0
package/.aict/context/README.md +44 -0
package/.aict/context/TEMPLATE.md +63 -0
package/.aict/cookbook/README.md +8 -0
package/.aict/cookbook/bridge-to-a-second-family.md +103 -0
package/.aict/cookbook/connect-a-tool.md +67 -0
package/.aict/cookbook/review-a-half-product.md +79 -0
package/.aict/cookbook/run-a-first-loop.md +81 -0
package/.aict/examples/README.md +21 -0
package/.aict/examples/ai-coding-long-task/CASE.md +161 -0
package/.aict/examples/ai-coding-long-task/artifacts/acceptance-card.md +36 -0
package/.aict/examples/ai-coding-long-task/artifacts/context-package.md +30 -0
package/.aict/examples/ai-coding-long-task/artifacts/execution-prompt.md +30 -0
package/.aict/examples/ai-coding-long-task/artifacts/first-ai-output.md +109 -0
package/.aict/examples/ai-coding-long-task/artifacts/guard-review.md +40 -0
package/.aict/examples/ai-coding-long-task/artifacts/handoff-note.md +28 -0
package/.aict/examples/ai-coding-long-task/artifacts/harvest-seed.md +28 -0
package/.aict/examples/ai-coding-long-task/artifacts/revised-output.md +62 -0
package/.aict/examples/content-production-harvest/CASE.md +87 -0
package/.aict/examples/content-production-harvest/artifacts/acceptance-card.md +28 -0
package/.aict/examples/content-production-harvest/artifacts/context-package.md +28 -0
package/.aict/examples/content-production-harvest/artifacts/execution-prompt.md +30 -0
package/.aict/examples/content-production-harvest/artifacts/guard-review.md +28 -0
package/.aict/examples/content-production-harvest/artifacts/handoff-note.md +28 -0
package/.aict/examples/content-production-harvest/artifacts/harvest-seed.md +28 -0
package/.aict/examples/multi-tool-collaboration/CASE.md +87 -0
package/.aict/examples/multi-tool-collaboration/artifacts/acceptance-card.md +28 -0
package/.aict/examples/multi-tool-collaboration/artifacts/context-package.md +28 -0
package/.aict/examples/multi-tool-collaboration/artifacts/execution-prompt.md +30 -0
package/.aict/examples/multi-tool-collaboration/artifacts/guard-review.md +28 -0
package/.aict/examples/multi-tool-collaboration/artifacts/handoff-note.md +28 -0
package/.aict/examples/multi-tool-collaboration/artifacts/harvest-seed.md +28 -0
package/.aict/examples/personal-judgment-growth-assistant/CASE.md +87 -0
package/.aict/examples/personal-judgment-growth-assistant/artifacts/acceptance-card.md +28 -0
package/.aict/examples/personal-judgment-growth-assistant/artifacts/context-package.md +28 -0
package/.aict/examples/personal-judgment-growth-assistant/artifacts/execution-prompt.md +30 -0
package/.aict/examples/personal-judgment-growth-assistant/artifacts/guard-review.md +28 -0
package/.aict/examples/personal-judgment-growth-assistant/artifacts/handoff-note.md +28 -0
package/.aict/examples/personal-judgment-growth-assistant/artifacts/harvest-seed.md +28 -0
package/.aict/examples/research-knowledge-synthesis/CASE.md +87 -0
package/.aict/examples/research-knowledge-synthesis/artifacts/acceptance-card.md +28 -0
package/.aict/examples/research-knowledge-synthesis/artifacts/context-package.md +28 -0
package/.aict/examples/research-knowledge-synthesis/artifacts/execution-prompt.md +30 -0
package/.aict/examples/research-knowledge-synthesis/artifacts/guard-review.md +28 -0
package/.aict/examples/research-knowledge-synthesis/artifacts/handoff-note.md +28 -0
package/.aict/examples/research-knowledge-synthesis/artifacts/harvest-seed.md +28 -0
package/.aict/guard/EXAMPLE.synthetic.md +51 -0
package/.aict/guard/FAILURE_MODES.md +40 -0
package/.aict/guard/PROMPT.md +47 -0
package/.aict/guard/README.md +44 -0
package/.aict/guard/TEMPLATE.md +60 -0
package/.aict/handoff/EXAMPLE.synthetic.md +51 -0
package/.aict/handoff/FAILURE_MODES.md +40 -0
package/.aict/handoff/PROMPT.md +47 -0
package/.aict/handoff/README.md +44 -0
package/.aict/handoff/TEMPLATE.md +60 -0
package/.aict/harvest/EXAMPLE.synthetic.md +51 -0
package/.aict/harvest/FAILURE_MODES.md +40 -0
package/.aict/harvest/PROMPT.md +47 -0
package/.aict/harvest/README.md +44 -0
package/.aict/harvest/TEMPLATE.md +60 -0
package/.aict/mechanisms/README.md +34 -0
package/.aict/mechanisms/anti-drift-partner/EXAMPLE.synthetic.md +46 -0
package/.aict/mechanisms/anti-drift-partner/FAILURE_MODES.md +25 -0
package/.aict/mechanisms/anti-drift-partner/PROMPT.md +75 -0
package/.aict/mechanisms/anti-drift-partner/README.md +82 -0
package/.aict/mechanisms/anti-drift-partner/TEMPLATE.md +74 -0
package/.aict/mechanisms/blind-spot-scan/EXAMPLE.synthetic.md +39 -0
package/.aict/mechanisms/blind-spot-scan/FAILURE_MODES.md +25 -0
package/.aict/mechanisms/blind-spot-scan/PROMPT.md +72 -0
package/.aict/mechanisms/blind-spot-scan/README.md +79 -0
package/.aict/mechanisms/blind-spot-scan/TEMPLATE.md +70 -0
package/.aict/mechanisms/collaboration-coach/EXAMPLE.synthetic.md +40 -0
package/.aict/mechanisms/collaboration-coach/FAILURE_MODES.md +25 -0
package/.aict/mechanisms/collaboration-coach/PROMPT.md +72 -0
package/.aict/mechanisms/collaboration-coach/README.md +79 -0
package/.aict/mechanisms/collaboration-coach/TEMPLATE.md +61 -0
package/.aict/mechanisms/do-not-handle-yet/EXAMPLE.synthetic.md +15 -0
package/.aict/mechanisms/do-not-handle-yet/FAILURE_MODES.md +16 -0
package/.aict/mechanisms/do-not-handle-yet/PROMPT.md +41 -0
package/.aict/mechanisms/do-not-handle-yet/README.md +30 -0
package/.aict/mechanisms/do-not-handle-yet/TEMPLATE.md +38 -0
package/.aict/mechanisms/dual-guard/EXAMPLE.synthetic.md +54 -0
package/.aict/mechanisms/dual-guard/FAILURE_MODES.md +25 -0
package/.aict/mechanisms/dual-guard/PROMPT.md +76 -0
package/.aict/mechanisms/dual-guard/README.md +81 -0
package/.aict/mechanisms/dual-guard/TEMPLATE.md +73 -0
package/.aict/mechanisms/feedback-absorption-ledger/EXAMPLE.synthetic.md +49 -0
package/.aict/mechanisms/feedback-absorption-ledger/FAILURE_MODES.md +25 -0
package/.aict/mechanisms/feedback-absorption-ledger/PROMPT.md +74 -0
package/.aict/mechanisms/feedback-absorption-ledger/README.md +81 -0
package/.aict/mechanisms/feedback-absorption-ledger/TEMPLATE.md +69 -0
package/.aict/mechanisms/half-product-review/EXAMPLE.synthetic.md +15 -0
package/.aict/mechanisms/half-product-review/FAILURE_MODES.md +16 -0
package/.aict/mechanisms/half-product-review/PROMPT.md +41 -0
package/.aict/mechanisms/half-product-review/README.md +30 -0
package/.aict/mechanisms/half-product-review/TEMPLATE.md +38 -0
package/.aict/mechanisms/handoff-abc/EXAMPLE.synthetic.md +47 -0
package/.aict/mechanisms/handoff-abc/FAILURE_MODES.md +25 -0
package/.aict/mechanisms/handoff-abc/PROMPT.md +75 -0
package/.aict/mechanisms/handoff-abc/README.md +82 -0
package/.aict/mechanisms/handoff-abc/TEMPLATE.md +60 -0
package/.aict/mechanisms/harvest-and-erc/EXAMPLE.synthetic.md +43 -0
package/.aict/mechanisms/harvest-and-erc/FAILURE_MODES.md +25 -0
package/.aict/mechanisms/harvest-and-erc/PROMPT.md +74 -0
package/.aict/mechanisms/harvest-and-erc/README.md +81 -0
package/.aict/mechanisms/harvest-and-erc/TEMPLATE.md +60 -0
package/.aict/mechanisms/honest-calibration/EXAMPLE.synthetic.md +43 -0
package/.aict/mechanisms/honest-calibration/FAILURE_MODES.md +25 -0
package/.aict/mechanisms/honest-calibration/PROMPT.md +74 -0
package/.aict/mechanisms/honest-calibration/README.md +81 -0
package/.aict/mechanisms/honest-calibration/TEMPLATE.md +66 -0
package/.aict/mechanisms/one-click-dispatch/EXAMPLE.synthetic.md +15 -0
package/.aict/mechanisms/one-click-dispatch/FAILURE_MODES.md +16 -0
package/.aict/mechanisms/one-click-dispatch/PROMPT.md +41 -0
package/.aict/mechanisms/one-click-dispatch/README.md +30 -0
package/.aict/mechanisms/one-click-dispatch/TEMPLATE.md +38 -0
package/.aict/mechanisms/plain-language-first-screen/EXAMPLE.synthetic.md +15 -0
package/.aict/mechanisms/plain-language-first-screen/FAILURE_MODES.md +16 -0
package/.aict/mechanisms/plain-language-first-screen/PROMPT.md +41 -0
package/.aict/mechanisms/plain-language-first-screen/README.md +30 -0
package/.aict/mechanisms/plain-language-first-screen/TEMPLATE.md +38 -0
package/.aict/mechanisms/root-cause-brake/EXAMPLE.synthetic.md +55 -0
package/.aict/mechanisms/root-cause-brake/FAILURE_MODES.md +25 -0
package/.aict/mechanisms/root-cause-brake/PROMPT.md +73 -0
package/.aict/mechanisms/root-cause-brake/README.md +79 -0
package/.aict/mechanisms/root-cause-brake/TEMPLATE.md +74 -0
package/.aict/mechanisms/scout-review-controller/EXAMPLE.synthetic.md +15 -0
package/.aict/mechanisms/scout-review-controller/FAILURE_MODES.md +16 -0
package/.aict/mechanisms/scout-review-controller/PROMPT.md +41 -0
package/.aict/mechanisms/scout-review-controller/README.md +30 -0
package/.aict/mechanisms/scout-review-controller/TEMPLATE.md +38 -0
package/.aict/mechanisms/single-tool-guard/EXAMPLE.synthetic.md +54 -0
package/.aict/mechanisms/single-tool-guard/FAILURE_MODES.md +25 -0
package/.aict/mechanisms/single-tool-guard/PROMPT.md +76 -0
package/.aict/mechanisms/single-tool-guard/README.md +83 -0
package/.aict/mechanisms/single-tool-guard/TEMPLATE.md +75 -0
package/.aict/mechanisms/task-splitting/EXAMPLE.synthetic.md +53 -0
package/.aict/mechanisms/task-splitting/FAILURE_MODES.md +25 -0
package/.aict/mechanisms/task-splitting/PROMPT.md +72 -0
package/.aict/mechanisms/task-splitting/README.md +79 -0
package/.aict/mechanisms/task-splitting/TEMPLATE.md +76 -0
package/.aict/modes/README.md +11 -0
package/.aict/modes/execute.md +31 -0
package/.aict/modes/handoff.md +29 -0
package/.aict/modes/harvest.md +30 -0
package/.aict/modes/review.md +28 -0
package/.aict/modes/shape.md +34 -0
package/.aict/privacy/COMMERCIAL_BOUNDARY.md +34 -0
package/.aict/privacy/PRIVACY.md +36 -0
package/.aict/privacy/REDACTION_CHECKLIST.md +12 -0
package/.aict/profile/CANDIDATES.md +44 -0
package/.aict/profile/EXAMPLE.synthetic.md +49 -0
package/.aict/profile/FAILURE_MODES.md +40 -0
package/.aict/profile/PROMPT.md +47 -0
package/.aict/profile/README.md +44 -0
package/.aict/profile/TEMPLATE.md +57 -0
package/.aict/prompts/acceptance-definition.md +109 -0
package/.aict/prompts/guard-review.md +116 -0
package/.aict/prompts/handoff-generation.md +110 -0
package/.aict/prompts/harvest-extraction.md +110 -0
package/.aict/prompts/mode-switching.md +66 -0
package/.aict/prompts/profile-creation.md +66 -0
package/.aict/prompts/profile-refinement.md +66 -0
package/.aict/prompts/project-context-packaging.md +113 -0
package/.aict/prompts/red-team-challenge.md +106 -0
package/.aict/prompts/rule-update-proposal.md +114 -0
package/.aict/prompts/workflow-reset.md +109 -0
package/.aict/roles/README.md +18 -0
package/.aict/roles/executor.md +34 -0
package/.aict/roles/harvester.md +33 -0
package/.aict/roles/owner-controller.md +38 -0
package/.aict/roles/scout.md +33 -0
package/.aict/roles/supervisor.md +34 -0
package/.aict/roles/system-guardian.md +34 -0
package/.aict/skills/acceptance/SKILL.md +43 -0
package/.aict/skills/context/SKILL.md +44 -0
package/.aict/skills/evidence-pack/SKILL.md +42 -0
package/.aict/skills/guard/SKILL.md +46 -0
package/.aict/skills/handoff/SKILL.md +44 -0
package/.aict/skills/harvest/SKILL.md +44 -0
package/.aict/skills/mode-switch/SKILL.md +42 -0
package/.aict/skills/profile/SKILL.md +42 -0
package/.aict/skills/red-team/SKILL.md +42 -0
package/.aict/skills/single-tool-guard/SKILL.md +42 -0
package/.aict/state/CURRENT_STATE.md +13 -0
package/.aict/state/DECISIONS.md +7 -0
package/.aict/state/TASK_LOG.md +7 -0
package/.aict/state/evidence.jsonl +2 -0
package/.aict/state/learning-ledger.jsonl +1 -0
package/.aict/state/receipts.jsonl +1 -0
package/.aict/state/runs.jsonl +1 -0
package/.aict/state/tasks.jsonl +1 -0
package/.aict/walkthroughs/10-minute-your-task.md +107 -0
package/.aict/walkthroughs/10-minute.md +43 -0
package/.aict/walkthroughs/30-minute.md +22 -0
package/.aict/walkthroughs/60-minute.md +27 -0
package/.aict/walkthroughs/synthetic-loop-transcript.md +43 -0
package/CHANGELOG.md +23 -0
package/CODE_OF_CONDUCT.md +20 -0
package/CONTRIBUTING.md +30 -0
package/KNOWN_LIMITATIONS.md +54 -0
package/LICENSE +199 -0
package/PRODUCT_CONTRACT.md +446 -0
package/README.md +245 -0
package/RELEASE_CHECKLIST.md +78 -0
package/SECURITY.md +56 -0
package/START_HERE.md +89 -0
package/bin/ai-collab.js +2 -0
package/docs/DOGFOOD.md +85 -0
package/docs/FEEDBACK.md +61 -0
package/docs/FIRST_EXPERIENCE_SPEC.md +32 -0
package/docs/FREE_VS_PAID.md +53 -0
package/docs/PUBLIC_BOUNDARY.md +36 -0
package/docs/PUBLIC_MAPPING.md +178 -0
package/docs/RELEASE_PRIORITY.md +23 -0
package/docs/WHY_THIS_EXISTS.md +36 -0
package/docs/open-system/00-start-here.md +60 -0
package/docs/open-system/01-ai-collaboration-os.md +33 -0
package/docs/open-system/02-six-layer-architecture.md +45 -0
package/docs/open-system/03-role-system.md +33 -0
package/docs/open-system/04-core-mechanisms.md +34 -0
package/docs/open-system/05-failure-patterns.md +31 -0
package/docs/open-system/06-how-to-adapt-to-your-workflow.md +31 -0
package/package.json +69 -0
package/privacy-manifest.json +78 -0
package/privacy-scan.local.json.example +18 -0
package/scripts/lib/forbidden-in-pack.js +55 -0
package/scripts/pack-check.js +154 -0
package/scripts/privacy-scan.js +487 -0
package/scripts/validate-contract.js +160 -0
package/src/adapters.js +590 -0
package/src/bootstrap.js +1184 -0
package/src/catalog.js +2723 -0
package/src/cli.js +2899 -0
package/src/dialogue.js +470 -0
package/src/i18n.js +1034 -0
package/src/ledger.js +2011 -0
package/src/render.js +1381 -0
package/src/sendmodel.js +452 -0
package/src/validate.js +1307 -0
package/src/workspace.js +1679 -0
package/tests/contract.test.js +8514 -0

package/src/workspace.js ADDED Viewed

@@ -0,0 +1,1679 @@
+import { existsSync, mkdirSync, readdirSync, renameSync, statSync, writeFileSync } from "node:fs";
+import path from "node:path";
+import {
+  adapterDefinitions,
+  caseDefinitions,
+  layerDefinitions,
+  mechanismDefinitions,
+  promptDefinitions,
+  skillDefinitions
+} from "./catalog.js";
+import {
+  renderAdapter,
+  renderCase,
+  renderCaseArtifact,
+  renderCommercialBoundary,
+  renderExamplesIndex,
+  renderLayerExample,
+  renderLayerFailures,
+  renderLayerPrompt,
+  renderLayerReadme,
+  renderLayerTemplate,
+  renderMechanismExample,
+  renderMechanismFailures,
+  renderMechanismPrompt,
+  renderMechanismReadme,
+  renderMechanismTemplate,
+  renderPrivacyDoc,
+  renderPrompt,
+  renderSharedCoreContract,
+  renderSkill,
+  renderStartHere
+} from "./render.js";
+export const workspaceDirName = ".aict";
+export const baseCaseArtifactNames = [
+  "context-package.md",
+  "acceptance-card.md",
+  "execution-prompt.md",
+  "guard-review.md",
+  "handoff-note.md",
+  "harvest-seed.md"
+];
+// Cases that carry a real first-AI-output / revised-output pair (the flagship)
+// generate two extra artifacts so the false-completion-claim story is runnable.
+// The order keeps first-ai-output next to the execution prompt it answers, and
+// revised-output right after the guard review that triggered it.
+export function caseArtifactNames(caseItem) {
+  const names = [
+    "context-package.md",
+    "acceptance-card.md",
+    "execution-prompt.md"
+  ];
+  if (caseItem.firstAiOutput) names.push("first-ai-output.md");
+  names.push("guard-review.md");
+  if (caseItem.revisedOutput) names.push("revised-output.md");
+  names.push("handoff-note.md", "harvest-seed.md");
+  return names;
+}
+function ensureDir(dir) {
+  mkdirSync(dir, { recursive: true });
+}
+function writeText(file, content) {
+  ensureDir(path.dirname(file));
+  writeFileSync(file, `${content.trimEnd()}\n`, "utf8");
+}
+function formatTimestamp(date = new Date()) {
+  const pad = (value) => String(value).padStart(2, "0");
+  return [
+    date.getFullYear(),
+    pad(date.getMonth() + 1),
+    pad(date.getDate())
+  ].join("") + "-" + [pad(date.getHours()), pad(date.getMinutes()), pad(date.getSeconds())].join("");
+}
+function uniqueBackupPath(targetRoot) {
+  const base = path.join(targetRoot, `${workspaceDirName}.backup-${formatTimestamp()}`);
+  if (!existsSync(base)) return base;
+  for (let index = 2; index < 100; index += 1) {
+    const candidate = `${base}-${index}`;
+    if (!existsSync(candidate)) return candidate;
+  }
+  throw new Error(`Could not choose a backup path for ${workspaceDirName}.`);
+}
+function backupExistingWorkspace(targetRoot, workspaceRoot) {
+  const backupPath = uniqueBackupPath(targetRoot);
+  renameSync(workspaceRoot, backupPath);
+  return backupPath;
+}
+// A .gitignore written INTO a user's generated .aict/ workspace by `init` (see the
+// `options.gitignore` write in createWorkspace). The five append-only JSONL ledgers
+// are local RUNTIME STATE — they accumulate the real task titles, evidence, and
+// learning a user records as they work. Ignoring them by default means running
+// `git add .` inside the user's own repo never commits private task data into
+// version control. Co-locating the .gitignore with the workspace makes it work in a
+// nested repo too (rules are relative to this file's dir), and keeps the static
+// templates + hand-written state/*.md notes versionable. A user who WANTS to version
+// their collaboration state just deletes these lines. (Already-tracked files are
+// unaffected, so this never untracks a repo's own seed ledgers.)
+//
+// IMPORTANT — this is NOT part of workspaceFileEntries (the byte-for-byte template
+// the contract validator diffs against the committed .aict). It is written ONLY on a
+// real `init`, never by the template generator. That keeps the committed .aict free
+// of a nested .gitignore that npm pack would otherwise honor — which would strip the
+// required seed ledgers out of the published tarball.
+export function workspaceGitignore() {
+  return [
+    "# AI Collaboration Open System — local workspace (generated by init).",
+    "#",
+    "# These five append-only ledgers are your LOCAL RUNTIME STATE: they accumulate the",
+    "# real task titles, evidence, and learning you record as you work. They are",
+    "# git-ignored by default so that `git add .` in your own repo never commits your",
+    "# private task data into version control. Delete these lines if you DO want to",
+    "# version your collaboration state (e.g. to share it across a team). Everything",
+    "# else in .aict/ (templates, prompts, skills, and your hand-written state/*.md",
+    "# notes) is NOT ignored — only the machine-written ledgers are.",
+    "state/tasks.jsonl",
+    "state/evidence.jsonl",
+    "state/runs.jsonl",
+    "state/receipts.jsonl",
+    "state/learning-ledger.jsonl"
+  ].join("\n");
+}
+function workspaceFileEntries(workspaceRoot) {
+  const entries = [
+    [path.join(workspaceRoot, "START_HERE.md"), renderStartHere()],
+    [path.join(workspaceRoot, "WORKSPACE_MANIFEST.json"), JSON.stringify(workspaceManifest(), null, 2)]
+  ];
+  for (const layer of layerDefinitions) {
+    const dir = path.join(workspaceRoot, layer.id);
+    entries.push(
+      [path.join(dir, "README.md"), renderLayerReadme(layer)],
+      [path.join(dir, "PROMPT.md"), renderLayerPrompt(layer)],
+      [path.join(dir, "TEMPLATE.md"), renderLayerTemplate(layer)],
+      [path.join(dir, "EXAMPLE.synthetic.md"), renderLayerExample(layer)],
+      [path.join(dir, "FAILURE_MODES.md"), renderLayerFailures(layer)]
+    );
+  }
+  entries.push([path.join(workspaceRoot, "mechanisms", "README.md"), mechanismsReadme()]);
+  for (const mechanism of mechanismDefinitions) {
+    const dir = path.join(workspaceRoot, "mechanisms", mechanism.id);
+    entries.push(
+      [path.join(dir, "README.md"), renderMechanismReadme(mechanism)],
+      [path.join(dir, "PROMPT.md"), renderMechanismPrompt(mechanism)],
+      [path.join(dir, "TEMPLATE.md"), renderMechanismTemplate(mechanism)],
+      [path.join(dir, "EXAMPLE.synthetic.md"), renderMechanismExample(mechanism)],
+      [path.join(dir, "FAILURE_MODES.md"), renderMechanismFailures(mechanism)]
+    );
+  }
+  entries.push(
+    [path.join(workspaceRoot, "roles", "README.md"), rolesReadme()],
+    [path.join(workspaceRoot, "roles", "owner-controller.md"), roleOwnerController()],
+    [path.join(workspaceRoot, "roles", "executor.md"), roleExecutor()],
+    [path.join(workspaceRoot, "roles", "system-guardian.md"), roleSystemGuardian()],
+    [path.join(workspaceRoot, "roles", "scout.md"), roleScout()],
+    [path.join(workspaceRoot, "roles", "supervisor.md"), roleSupervisor()],
+    [path.join(workspaceRoot, "roles", "harvester.md"), roleHarvester()],
+    [path.join(workspaceRoot, "modes", "README.md"), modesReadme()],
+    [path.join(workspaceRoot, "modes", "execute.md"), modeExecute()],
+    [path.join(workspaceRoot, "modes", "shape.md"), modeShape()],
+    [path.join(workspaceRoot, "modes", "review.md"), modeReview()],
+    [path.join(workspaceRoot, "modes", "handoff.md"), modeHandoff()],
+    [path.join(workspaceRoot, "modes", "harvest.md"), modeHarvest()],
+    [path.join(workspaceRoot, "cookbook", "README.md"), cookbookReadme()],
+    [path.join(workspaceRoot, "cookbook", "run-a-first-loop.md"), cookbookFirstLoop()],
+    [path.join(workspaceRoot, "cookbook", "connect-a-tool.md"), cookbookConnectTool()],
+    [path.join(workspaceRoot, "cookbook", "review-a-half-product.md"), cookbookHalfProduct()],
+    [path.join(workspaceRoot, "cookbook", "bridge-to-a-second-family.md"), cookbookBridgeSecondFamily()],
+    [path.join(workspaceRoot, "state", "CURRENT_STATE.md"), stateCurrent()],
+    [path.join(workspaceRoot, "state", "TASK_LOG.md"), stateTaskLog()],
+    [path.join(workspaceRoot, "state", "DECISIONS.md"), stateDecisions()],
+    // P1 run-layer ledgers: five append-only JSONL logs seeded with one
+    // deterministic synthetic row each (see the generators below for why).
+    [path.join(workspaceRoot, "state", "tasks.jsonl"), tasksLedger()],
+    [path.join(workspaceRoot, "state", "evidence.jsonl"), evidenceLedger()],
+    [path.join(workspaceRoot, "state", "runs.jsonl"), runsLedger()],
+    [path.join(workspaceRoot, "state", "receipts.jsonl"), receiptsLedger()],
+    [path.join(workspaceRoot, "state", "learning-ledger.jsonl"), learningLedger()]
+  );
+  for (const prompt of promptDefinitions) {
+    entries.push([path.join(workspaceRoot, "prompts", prompt.file), renderPrompt(prompt)]);
+  }
+  for (const skill of skillDefinitions) {
+    entries.push([path.join(workspaceRoot, "skills", skill.id, "SKILL.md"), renderSkill(skill)]);
+  }
+  entries.push([path.join(workspaceRoot, "adapters", "SHARED_CORE_CONTRACT.md"), renderSharedCoreContract()]);
+  for (const adapter of adapterDefinitions) {
+    entries.push([path.join(workspaceRoot, "adapters", adapter.id, "ADAPTER.md"), renderAdapter(adapter)]);
+  }
+  entries.push([path.join(workspaceRoot, "examples", "README.md"), renderExamplesIndex()]);
+  for (const caseItem of caseDefinitions) {
+    const caseDir = path.join(workspaceRoot, "examples", caseItem.id);
+    entries.push([path.join(caseDir, "CASE.md"), renderCase(caseItem)]);
+    for (const artifact of caseArtifactNames(caseItem)) {
+      entries.push([path.join(caseDir, "artifacts", artifact), renderCaseArtifact(caseItem, artifact)]);
+    }
+  }
+  entries.push(
+    [path.join(workspaceRoot, "profile", "CANDIDATES.md"), profileCandidates()],
+    [path.join(workspaceRoot, "privacy", "PRIVACY.md"), renderPrivacyDoc()],
+    [path.join(workspaceRoot, "privacy", "COMMERCIAL_BOUNDARY.md"), renderCommercialBoundary()],
+    [path.join(workspaceRoot, "privacy", "REDACTION_CHECKLIST.md"), redactionChecklist()],
+    [path.join(workspaceRoot, "walkthroughs", "10-minute-your-task.md"), walkthrough10YourTask()],
+    [path.join(workspaceRoot, "walkthroughs", "10-minute.md"), walkthrough10()],
+    [path.join(workspaceRoot, "walkthroughs", "30-minute.md"), walkthrough30()],
+    [path.join(workspaceRoot, "walkthroughs", "60-minute.md"), walkthrough60()],
+    [path.join(workspaceRoot, "walkthroughs", "synthetic-loop-transcript.md"), syntheticTranscript()]
+  );
+  return entries;
+}
+export function createWorkspace(target, options = {}) {
+  const targetRoot = path.resolve(target);
+  const workspaceRoot = path.join(targetRoot, workspaceDirName);
+  const entries = workspaceFileEntries(workspaceRoot);
+  if (options.dryRun) {
+    // Honest preview: the plan must count EVERY file a real init would write, so
+    // "Files planned" == the later "Files written". A real init also drops a
+    // workspace .gitignore when options.gitignore is set (the user-facing `init`
+    // always does — see cli.js), and that file is NOT in workspaceFileEntries, so
+    // count it here too. Without this the preview under-reported by exactly 1 (209
+    // vs the real 210), making the "nothing-written preview" look untrustworthy.
+    const plannedFiles = entries.length + (options.gitignore ? 1 : 0);
+    return {
+      targetRoot,
+      workspaceRoot,
+      files: plannedFiles,
+      backupPath: null,
+      dryRun: true,
+      written: false,
+      existingWorkspace: existsSync(workspaceRoot)
+    };
+  }
+  if (existsSync(workspaceRoot) && !options.force) {
+    throw new Error(`${workspaceRoot} already exists. Pass --force to replace the generated workspace.`);
+  }
+  let backupPath = null;
+  if (existsSync(workspaceRoot) && options.force) {
+    backupPath = backupExistingWorkspace(targetRoot, workspaceRoot);
+  }
+  ensureDir(workspaceRoot);
+  for (const [file, content] of entries) {
+    writeText(file, content);
+  }
+  // Write the workspace .gitignore ONLY when asked (the user-facing `init` passes
+  // gitignore:true). The template generator (contract validator) and `demo` do NOT,
+  // so the committed/dogfooded .aict stays free of a nested .gitignore that would
+  // make npm pack drop the seed ledgers. Written before countFiles so init's
+  // "Files written" total includes it.
+  if (options.gitignore) {
+    writeText(path.join(workspaceRoot, ".gitignore"), workspaceGitignore());
+  }
+  return {
+    targetRoot,
+    workspaceRoot,
+    files: countFiles(workspaceRoot),
+    backupPath,
+    dryRun: false,
+    written: true
+  };
+}
+export function workspaceManifest() {
+  const workspaceDirs = [
+    "profile",
+    "context",
+    "acceptance",
+    "guard",
+    "handoff",
+    "harvest",
+    "roles",
+    "modes",
+    "mechanisms",
+    "prompts",
+    "skills",
+    "adapters",
+    "examples",
+    "cookbook",
+    "state",
+    "privacy"
+  ];
+  return {
+    name: "AI Collaboration Open System Workspace",
+    version: "0.1.0",
+    localFirst: true,
+    defaultNetworkUse: "none",
+    workspaceDirs,
+    layers: layerDefinitions.map((layer) => layer.id),
+    mechanisms: mechanismDefinitions.map((mechanism) => mechanism.id),
+    prompts: promptDefinitions.map((prompt) => prompt.file),
+    skills: skillDefinitions.map((skill) => skill.id),
+    adapters: adapterDefinitions.map((adapter) => adapter.id),
+    syntheticCases: caseDefinitions.map((caseItem) => caseItem.id),
+    firstExperience: "START_HERE.md -> 10/30/60 minute path -> synthetic loop -> real task adaptation"
+  };
+}
+export function countFiles(root) {
+  let total = 0;
+  for (const entry of readdirSync(root)) {
+    const fullPath = path.join(root, entry);
+    const stat = statSync(fullPath);
+    if (stat.isDirectory()) {
+      total += countFiles(fullPath);
+    } else {
+      total += 1;
+    }
+  }
+  return total;
+}
+export function profileCandidates() {
+  return `# Profile Candidates (buffer before the long-term profile)
+This file is the holding area for **proposed** profile preferences. The 10-minute loop (\`../walkthroughs/10-minute-your-task.md\`, Step 4) can suggest a profile candidate when a stable preference shows up more than once. That suggestion is a guess, not a fact - so it lands here as \`proposed\` instead of editing your real profile, and an unreviewed guess never hardens into a standing rule future sessions obey.
+This is local-first and public-safe. Keep only general, redacted preferences here - no private names, paths, customers, or internal numbers. The row below is a synthetic example, not real data.
+## State machine
+Every candidate moves through exactly these four states:
+| State | Meaning | Touches your long-term profile? |
+| --- | --- | --- |
+| \`proposed\` | The AI suggested it this loop; not yet reviewed, not trusted. | No |
+| \`confirmed\` | You reviewed it and it is correct as written. | Yes - it may graduate as-is |
+| \`edited\` | Correct only after you reword it; the edited line is what graduates. | Yes - the edited line graduates |
+| \`dropped\` | You reviewed it and it does not belong; kept on the record so it is not re-proposed. | No |
+Rule: **only \`confirmed\` and \`edited\` candidates graduate into your long-term profile, and only after you say so.** \`proposed\` and \`dropped\` never edit your profile. This is the same confirm / edit / drop discipline the harvest mechanism uses for harvested cards - nothing lands on the AI's say-so alone.
+## How to use this
+1. After a loop, a new candidate is appended below with status \`proposed\`.
+2. When you review it, change its status to \`confirmed\`, \`edited\`, or \`dropped\` (edit the wording in place if \`edited\`).
+3. Move \`confirmed\` / \`edited\` lines into your profile (\`EXAMPLE.synthetic.md\` here, or your own real profile file), then mark the row \`graduated\` in the Notes column or delete it.
+4. Leave \`dropped\` rows here so the same guess is not proposed every loop.
+## Candidates
+| Candidate (one line) | Status | Source loop | Reviewed on | Notes |
+| --- | --- | --- | --- | --- |
+| (synthetic) Prefer direct risk calls over reassurance | proposed | synthetic-loop-01 | (not reviewed yet) | example row; replace with your own |
+## Why this buffer exists
+Without it, the loop would "drop a candidate straight into the profile" - and a one-off observation from a single task could quietly become a permanent rule that every future session obeys, with no human in the loop. The buffer keeps your profile honest: it only ever grows from preferences you actually confirmed.
+## This file vs the learning ledger (two surfaces, same discipline)
+There are two places a proposed preference can live, and they are partners, not rivals:
+- **This file (\`CANDIDATES.md\`)** is the human view - a table you read and edit by hand while deciding what belongs in your profile. It covers profile candidates only.
+- **The learning ledger (\`../state/learning-ledger.jsonl\`)** is the machine record the CLI writes - \`ai-collab learning add --type profile --content "..."\` appends a \`proposed\` row, \`learning confirm/edit/drop\` flips its state, and \`ai-collab status\` echoes back the one preference you most recently confirmed so the next task starts ahead. It also records \`harvest\` lessons, which this file does not.
+Both use the exact same \`proposed / confirmed / edited / dropped\` states and the same graduation rule (only \`confirmed\`/\`edited\` graduate, only when you say so). But they are **two separate stores with no auto-sync, shared id, or dedupe between them** - so pick one place per candidate and keep it there. If you record the same preference in both and then change one, they will drift, and nothing reconciles them for you. When they do disagree, the **learning ledger is the source of truth**: it is what \`ai-collab status\` reads back and what the machine acts on; \`CANDIDATES.md\` is a human-only view that no command reads. Use whichever fits the moment - hand-edit this table, or run the \`learning\` commands - just not both for the same candidate, and let \`confirmed\`/\`edited\` lines graduate into your real profile.
+`;
+}
+// --- Run-layer ledgers (P1) ------------------------------------------------
+//
+// The five append-only JSONL ledgers under state/ are the runtime substance of
+// the run layer: real `ai-collab task/evidence/run/receipt` commands append to
+// them at runtime with real timestamps and ids. The committed templates below,
+// by contrast, must be FULLY DETERMINISTIC (no Date.now / random) so the
+// generate-and-compare contract check (scripts/validate-contract.js diffs the
+// committed .aict byte-for-byte against a fresh generation) keeps passing.
+//
+// Each ledger ships exactly one synthetic, public-safe seed row instead of being
+// empty. Two reasons: (1) writeText() force-appends a trailing newline, so a
+// truly empty body would collapse to a lone "\n" and be ambiguous; (2) one
+// synthetic row gives the privacy scanner a real jsonl line to scan and gives
+// the validator's cross-reference checks a consistent starting set. The seed rows
+// are mutually consistent (evidence/run/receipt all point at task t0) so a clean
+// generated workspace passes all six ledger checks with zero errors.
+//
+// SYNTHETIC_TS is a fixed date string (not a real timestamp) shared by every
+// seed row to keep the templates deterministic.
+const SYNTHETIC_TS = "2026-01-01T00:00:00.000Z";
+export function tasksLedger() {
+  return JSON.stringify({
+    id: "t0",
+    title: "(synthetic) example task seed row - replace with your own",
+    status: "open",
+    createdAt: SYNTHETIC_TS
+  });
+}
+export function evidenceLedger() {
+  // Two synthetic evidence rows, both bound to task t0. e0 is a generic note;
+  // e1 is a kind:"cross_family_guard" row — the load-bearing evidence the seed
+  // receipt (c0, an L3 pass) must cite so its "binding cross-family" claim is
+  // actually backed (P2 evidence-gate). Without e1 the seed L3 pass would be a
+  // self-asserted level the validator now rejects. P2 structure gate goes
+  // further: a cross_family_guard row must NAME who/which family reviewed it
+  // (at least one of reviewer / family / ref), so e1 carries a synthetic
+  // `reviewer` + `family` — otherwise the seed row would be an empty shell the
+  // new structure check (and the L3-pass gate) rejects. Field order matches a
+  // live `evidence add` record so the on-disk shape is identical. One row/line.
+  return [
+    JSON.stringify({
+      id: "e0",
+      taskId: "t0",
+      kind: "note",
+      summary: "(synthetic) example evidence seed row bound to task t0",
+      createdAt: SYNTHETIC_TS
+    }),
+    JSON.stringify({
+      id: "e1",
+      taskId: "t0",
+      kind: "cross_family_guard",
+      summary: "(synthetic) cross-family guard review seed row bound to task t0",
+      reviewer: "(synthetic) example reviewer",
+      family: "(synthetic) other-model-family",
+      createdAt: SYNTHETIC_TS
+    })
+  ].join("\n");
+}
+export function runsLedger() {
+  return JSON.stringify({
+    id: "r0",
+    taskId: "t0",
+    command: "echo synthetic-seed",
+    startedAt: SYNTHETIC_TS,
+    finishedAt: SYNTHETIC_TS,
+    exitCode: 0,
+    status: "finished"
+  });
+}
+export function receiptsLedger() {
+  // The seed receipt must satisfy the SAME rules a live `receipt create` would.
+  // A1: the guardLevel is COMPUTED from the review method + evidence, not declared
+  // — so the seed records reviewMode "cross_family" and cites BOTH e0 (note) and
+  // e1 (kind:cross_family_guard, which carries its required family attribution).
+  // computeGuardLevel({reviewMode:"cross_family", hasCrossFamilyGuardEvidence:true,
+  // hasAnyEvidence:true}) yields level "L3" with familyUnverified:true (the family
+  // is self-declared and the tool cannot verify it; only a rerun reconciled to a
+  // recorded run, on top of this cross-family review, would reach the strongest
+  // local-trust level, L4). Under those inputs guardLevelVerdictError("L3",
+  // "pass", rerun=false, crossFamily=true) is null and receiptStatusFor("pass",
+  // [e0,e1], false) computes "accepted", so the seed is self-consistent with every
+  // runtime rule: check 8c sees stored L3 == computed L3 (not over-claimed) and
+  // check 8d sees the required familyUnverified:true marker present. Field order
+  // matches the live `receipt create` record (id, taskId, verdict, guardLevel,
+  // reviewMode, evidenceIds, familyUnverified, status, createdAt) so the on-disk
+  // shape is identical.
+  return JSON.stringify({
+    id: "c0",
+    taskId: "t0",
+    verdict: "pass",
+    guardLevel: "L3",
+    reviewMode: "cross_family",
+    evidenceIds: ["e0", "e1"],
+    familyUnverified: true,
+    status: "accepted",
+    createdAt: SYNTHETIC_TS
+  });
+}
+export function learningLedger() {
+  return JSON.stringify({
+    id: "l0",
+    taskId: "t0",
+    type: "harvest",
+    content: "(synthetic) example learning seed row - written by the P4 harvest flow",
+    status: "proposed",
+    createdAt: SYNTHETIC_TS
+  });
+}
+export function redactionChecklist() {
+  return `# Redaction Checklist
+Use this before publishing an example or sharing a workspace.
+- [ ] The case is synthetic or public-safe.
+- [ ] No actual client, employer, or account names appear.
+- [ ] No local machine paths appear.
+- [ ] No raw private conversations appear.
+- [ ] No private tool-routing details or hooks appear.
+- [ ] No tokens, keys, cookies, credentials, or session IDs appear.
+- [ ] No private knowledge-base source material appears.
+- [ ] The example can stand alone without revealing the owner's private system.
+`;
+}
+export function rolesReadme() {
+  return `# Roles
+Roles keep the AI Collaboration Open System human-centered. They define responsibility, not hidden authority.
+Each role card below is a responsibility matrix, not a vibe. It states six things so two different tools (or two different sessions) read the same boundary: what the role CAN do, what it CANNOT do, what it takes in, what it produces, who it escalates to when something exceeds its authority, and one synthetic overreach example showing what breaks when the boundary is crossed.
+## Public roles
+- Owner / controller: decides goals and acceptance, and holds final judgment.
+- Executor: produces the artifact inside the agreed boundary.
+- System guardian: challenges risk and evidence before output is trusted.
+- Scout: gathers options and external facts before a decision.
+- Supervisor: translates the AI's state into plain language and watches the main line and the wording, distinct from the guardian who watches the facts.
+- Harvester: extracts reusable learning after a loop.
+## Why a matrix and not just "does / does not"
+A two-line "does / does not" tells a tool the gist but not the seams: where work enters, where it leaves, and who catches it when it exceeds the role. The missing seams are exactly where collaboration fails — an executor quietly becomes a rule-changer, a guardian quietly becomes an editor, a scout quietly becomes a decider, a supervisor quietly becomes a second guardian. Naming inputs, outputs, the escalation target, and a concrete overreach example closes those seams.
+`;
+}
+export function roleOwnerController() {
+  return `# Owner / Controller
+## Purpose
+Keep human judgment at the center of the workflow. The controller is the top of the chain: it sets direction and owns the final call, so the separation of decision from production stays intact.
+## Can do
+- Define the goal, the scope, and the acceptance criteria for a piece of work.
+- Issue instructions and choose between options the executor or scout brings back.
+- Accept or reject delivered work, and decide when residual risk is acceptable.
+- Make the final call and close the loop.
+## Cannot do
+- Approve its own judgment by pretending agreement is independent review (it must not be both author and reviewer of the same decision).
+- Make the guardian's call for it, or treat its own opinion as a guard pass.
+- Step in and personally do the heavy production work that should have been delegated, because then no independent reviewer is left to check it.
+## Inputs
+- The task or problem to be solved.
+- Proposed plans, options, and trade-offs from the executor or scout.
+- Returned artifacts, guard verdicts, and harvest cards awaiting confirmation.
+## Outputs
+- Clear instructions and a defined boundary for each piece of work.
+- Acceptance or rejection decisions with the reason.
+- The final, recorded decision that lets the loop close.
+## Escalates to
+- No one above it — the controller is the top of the responsibility chain. When it lacks facts it tasks a scout; when it needs an independent check it tasks a guardian; but the decision itself does not get handed upward.
+## Overreach example (synthetic)
+A controller decides a feature is simple, sits down, and writes the implementation itself instead of delegating it. Because the controller is now the author, there is no independent party left to review whether the code actually meets acceptance — the controller would be grading its own homework. The separation that the whole system relies on collapses, and a defect ships unnoticed because the only person who could have caught it is the one who wrote it.
+`;
+}
+export function roleExecutor() {
+  return `# Executor
+## Purpose
+Produce the requested artifact inside the context and acceptance boundary, and prove it with evidence rather than claims.
+## Can do
+- Implement the task exactly as instructed, working from the provided files.
+- Change the agreed artifacts, save state, and record what was done.
+- Self-verify the work and report changed files or sections with verification evidence.
+## Cannot do
+- Make the controller's decisions or accept its own work as done.
+- Silently expand scope beyond the instructed task.
+- Cross a core boundary (governance rules, security-sensitive areas, anything outside the task) without stopping to ask first.
+## Inputs
+- A task packet: the goal, the boundary, the relevant files, known constraints, and the acceptance criteria.
+## Outputs
+- The requested artifact.
+- A three-part report: what changed, the actual verification evidence, and what remains unverified.
+## Escalates to
+- The controller — whenever the task is ambiguous, the scope needs to grow, or a core boundary is in the way, the executor stops and hands the decision back up rather than deciding for itself.
+## Overreach example (synthetic)
+While fixing one small bug, an executor notices a shared rule it thinks is wrong and edits it on the spot without asking. A scoped one-line fix has now quietly become a change to the rules everyone else relies on — a change no one reviewed and no one approved. The next session inherits an altered rule with no decision behind it, and tracing why behavior changed becomes a hunt because the change was never surfaced as a decision.
+`;
+}
+export function roleSystemGuardian() {
+  return `# System Guardian
+## Purpose
+Challenge output before it becomes trusted state. The guardian is a referee, not a player: it finds problems and points to evidence, but it does not take over the work.
+## Can do
+- Independently review the artifact for acceptance fit, privacy, evidence quality, and handoff readiness.
+- Surface blind spots and name required fixes, leading with findings.
+- Issue one of the four standard verdicts (pass / reject / insufficient_evidence / pass_with_risk) with the guard level (L0-L4) for the evidence seen, and point every finding to a specific line, section, or missing piece of evidence. A plain pass needs L3+ (a cross-family evidence pack); a pass_with_risk needs an explicit owner sign-off before it counts as accepted.
+## Cannot do
+- Only find — it does not give orders, and it does not execute.
+- Rewrite or fix the artifact itself by default (fixing what it judges makes it both referee and player).
+- Make the decision for anyone; a concern is not an approval, and an approval is not the controller's acceptance.
+## Inputs
+- The object under review: the artifact, plus its acceptance card, context boundary, and the verification evidence behind any completion claim.
+## Outputs
+- A verdict and a findings list, each finding tied to concrete evidence.
+- Required fixes and named residual risk, handed back for a decision — not applied directly.
+## Escalates to
+- The controller — the guardian reports findings and a verdict, then the controller decides what to fix, what to accept as residual risk, and whether to close.
+## Overreach example (synthetic)
+A guardian reviewing an artifact spots a flaw and, instead of reporting it, just edits the artifact to fix it. Now the same party both judged the work and changed it, so its independence is gone: no one is left to check whether the "fix" is actually correct or whether it quietly broke something else. The verdict can no longer be trusted, because the referee walked onto the field and started playing.
+`;
+}
+export function roleScout() {
+  return `# Scout
+## Purpose
+Collect options and decision-changing evidence before the controller chooses. The scout gathers facts; it does not judge them.
+## Can do
+- Gather external facts, candidate paths, and industry comparisons relevant to a pending decision.
+- List evidence gaps and label how time-sensitive each finding is.
+- Bring back sourced material so the controller can decide on solid ground.
+## Cannot do
+- Interpret, judge, or rule on the evidence it gathers.
+- Recommend a path or lean toward an option ("you should pick A").
+- Turn exploration into implementation, or decide anything itself.
+## Inputs
+- The specific question or unknown to investigate, framed by the controller.
+## Outputs
+- A fact card: candidate options and findings, each with its source and a freshness label, and no verdict attached.
+## Escalates to
+- The controller — the scout delivers sourced facts and hands the synthesis and the decision upward, keeping fact-gathering separate from judgment.
+## Overreach example (synthetic)
+Asked only to gather options, a scout instead returns "you should choose A." By folding a recommendation into the fact-gathering, it has mixed evidence with judgment — and now the controller can no longer reason from clean facts, because the conclusion is already baked in. The independence of the later decision is contaminated before it even starts, and the scout has quietly made a call that was never its to make.
+`;
+}
+export function roleHarvester() {
+  return `# Harvester
+## Purpose
+Extract reusable learning after a loop. The harvester proposes; it does not file to the source of truth on its own.
+## Can do
+- Sweep a conversation or finished loop and lift the reusable bits into harvest cards.
+- Redact private material into a general, public-safe form before anything is proposed.
+- Draft candidate prompts, decisions, lessons, and rule suggestions for confirmation.
+## Cannot do
+- Write directly into the knowledge-base source of truth.
+- Accept its own cards as final, or skip the human confirmation step.
+- Generalize a single incident into a permanent rule without evidence and without sign-off.
+## Inputs
+- The conversation, loop, or raw material to harvest from.
+## Outputs
+- Harvest cards (one item per card) in a public-safe form, presented as candidates awaiting confirmation — not filed yet.
+## Escalates to
+- The owner / controller — nothing lands in the knowledge base until the owner confirms each card; the harvester stages, the owner files.
+## Overreach example (synthetic)
+A harvester writes a card and, without waiting for confirmation, files it straight into the knowledge base. An unverified "lesson" has now been frozen into a standing rule that future loops will obey — except no one checked whether it was actually true. A one-off observation becomes durable doctrine by accident, and later work is silently shaped by a rule that was never approved and may be wrong.
+`;
+}
+export function roleSupervisor() {
+  return `# Supervisor
+## Purpose
+Lower the human's cost of watching the work, without taking the wheel. The supervisor is a state translator: it turns what the AI is doing into plain language and watches whether the work is still on track. It does not steer direction and it does not check facts line by line — that is the guardian's job. The split is deliberate: the guardian watches the facts (is this claim backed, does this code work, did scope drift); the supervisor watches the main line and the wording (are we still going where we meant to, and is the AI being honest about how done it is).
+## Can do
+- Translate the AI's current state into plain language for the human: where the main line is, what just happened, what the next step is.
+- Watch three things on every pass: (1) is the main line drifting — is the work quietly chasing a side-quest while the real goal stalls; (2) is "done-pending-verification" being passed off as "accepted" — is unproven work being described as finished; (3) is a decision being punted back to the human — is the AI bouncing a choice it should have made itself.
+- Issue one of three plain verdicts: SEND (it can go forward), SEND WITH A CORRECTION (a small fix rides along, no need to redo), or STOP AND FIX FIRST (something on the main line is wrong enough to halt).
+## Cannot do
+- Steer the direction or make the call — it flags, it does not decide, and "looks on track to me" is not the human's approval.
+- Do the guardian's job: it does not write a formal verdict on facts, does not hunt code-level defects, does not rule on evidence quality. When it strays into line-by-line fact-checking it has stopped being a supervisor and become a second guardian.
+- Open a side issue into a new main line, or let the human get pulled into doing a judgment the AI should have made.
+## Inputs
+- The AI's current state to translate (a status update, a handoff packet, a plan, a progress report) and the main line it is supposed to be serving, so drift can be measured across steps, not just within one.
+## Outputs
+- A short plain-language status read (where the main line is, the current step, the next action) plus the three-question check, ending in one of the three verdicts: send / send-with-a-correction / stop-and-fix-first.
+- For a send-with-a-correction, the exact small fix the next step should carry; for a stop-and-fix-first, what specifically must be repaired before the work moves on.
+## Escalates to
+- The owner / controller — the supervisor reports its plain-language read and its verdict, then the human decides. For a true facts-and-evidence judgment it hands off to the guardian rather than ruling on facts itself.
+## Overreach example (synthetic)
+A supervisor reviewing a status update stops translating and starts grading the code: it digs into a function, declares the implementation correct, and issues a pass on the technical work. But checking facts and clearing evidence is the guardian's role, and now the same pass mixes "the main line looks on track" with "the code is verified" — two different judgments the human can no longer tell apart. The supervisor has quietly become a second guardian, the real fact-check never gets an independent pass, and a plain-language safety net the human relied on to stay cheap has turned into one more heavyweight reviewer.
+`;
+}
+export function modesReadme() {
+  return `# Modes
+Modes state what kind of work is happening now. A mode is a boundary, not a personality.
+Use one mode at a time: shape, execute, review, handoff, or harvest.
+Each mode card below is a full spec, not a one-liner. It states six things so a tool always knows the edges of the current mode: the entry condition that lets you start, the actions allowed, the actions forbidden, the output format, the exit condition that ends the mode, and how it hands off to the other modes. The forbidden line and the handoff line are what keep modes from blurring into one another.
+## The loop between modes
+Shape comes first when the request is still fuzzy: it turns a rough intent into a signable thin contract before anything is built, so execute starts from a boundary instead of a guess. Then the core loop runs. Execute produces, then review challenges what execute produced; a rejection sends it back to execute, a pass moves it toward handoff or close. Handoff carries state across a session or tool boundary so the receiver re-enters execute cleanly. Harvest runs at a seam or close to lift reusable learning, then returns to whatever mode was active. Naming each entry and exit explicitly is what stops "shape" from sliding into design, "review" from quietly editing, or "execute" from drifting past its task.
+`;
+}
+export function modeExecute() {
+  return `# Execute Mode
+Build the agreed artifact, and only that.
+## Entry condition
+There is a clearly defined task with execution authority granted: a goal, a boundary, and acceptance criteria are all in place.
+## Allowed actions
+- Create or edit the agreed artifact.
+- Change the in-scope files or sections and save state.
+- Self-verify the work and capture the evidence.
+## Forbidden actions
+- Doing work outside the stated task or boundary.
+- Crossing a core boundary (rules, security-sensitive areas, anything out of scope) without stopping.
+- Declaring the work done without running the checks.
+## Output format
+The artifact, the list of changed files or sections, the verification evidence, and an explicit note of what is still unverified.
+## Exit condition
+The task is wrapped up and has passed acceptance, or it is blocked and must be handed off.
+## Inter-mode handoff
+When the artifact is done, move to review so an independent pass can challenge it before it is trusted; if work must cross a session or tool boundary first, move to handoff and let the receiver re-enter execute.
+`;
+}
+export function modeReview() {
+  return `# Review Mode
+Inspect and challenge the artifact — without changing it.
+## Entry condition
+There is a produced artifact that needs to be checked before anyone trusts it.
+## Allowed actions
+- Inspect the artifact against context, acceptance, and evidence.
+- Challenge claims, surface blind spots, and point each finding to a specific line, section, or missing piece of evidence.
+## Forbidden actions
+- Editing or fixing the artifact under review. Review and repair stay separate, so the reviewer never becomes the author of what it judges.
+## Output format
+One of the four standard verdicts (pass / reject / insufficient_evidence / pass_with_risk) plus the guard level (L0-L4) for the evidence seen, the findings with severity, the required fixes, and the named residual risk. A plain pass requires L3+ (a cross-family evidence pack); a pass_with_risk is not accepted until the owner explicitly signs off on the residual risk.
+## Exit condition
+A verdict has been issued.
+## Inter-mode handoff
+On reject, hand the required fixes back to execute for repair; on pass, move to handoff or to close. Review never applies the fix itself — it returns the artifact to execute for that.
+`;
+}
+export function modeHandoff() {
+  return `# Handoff Mode
+Compress state so the next session or tool can pick up exactly where this one stopped.
+## Entry condition
+Work is about to cross a boundary: a session is ending, a different tool is taking over, or a long task has reached a natural seam.
+## Allowed actions
+- Compress the current state into a structured handoff packet.
+- Seal the baseline (the exact point being handed off) so the receiver starts from a known state.
+## Forbidden actions
+- Dropping context the receiver needs.
+- Omitting the exact first action the next session should take.
+## Output format
+A handoff packet: what is done, what is pending, what is blocked, what is unverified, the sealed baseline, and the exact next step.
+## Exit condition
+The receiver confirms they can pick up from the packet alone, without re-reading the whole history.
+## Inter-mode handoff
+The receiver reads the packet and re-enters execute on the stated first action, continuing the loop from the sealed baseline rather than from zero.
+`;
+}
+export function modeHarvest() {
+  return `# Harvest Mode
+Lift reusable learning out of finished work — into staged, public-safe cards.
+## Entry condition
+Harvest is triggered, or a phase is closing, and there is reusable value worth saving before it is lost.
+## Allowed actions
+- Sweep the conversation or finished loop for reusable bits.
+- Draft harvest cards (one item per card) for decisions, lessons, methods, and stable preferences.
+- Redact private material into a general, public-safe form.
+## Forbidden actions
+- Filing anything into the knowledge base without redacting it first.
+- Deciding on the user's behalf whether a card lands; that confirmation belongs to the user.
+## Output format
+Harvest cards in a public-safe form, presented as candidates awaiting confirmation.
+## Exit condition
+The user has confirmed which cards land in the knowledge base.
+## Inter-mode handoff
+Harvest runs at a seam without taking over the work; once cards are confirmed and filed, it returns control to whatever mode was active before it (typically execute or a close).
+`;
+}
+export function modeShape() {
+  return `# Shape Mode
+Turn a fuzzy idea into a signable thin contract — before any solution is designed or built.
+## Entry condition
+The person has a rough intent but nothing crisp enough to act on yet: "I want to improve X", "this feels off", "I have an idea". It sits between exploring the current state and designing a solution; you enter it instead of jumping straight to a plan from a vague request.
+## Allowed actions
+- Pull the intent into a few anchors (the situation, the wanted result, the result that would be unacceptable, what this round must protect) and name the two or three ambiguities that actually matter.
+- Offer choices instead of asking for a blank-page description: pose comparison questions ("more like A or like B?"), and proactively recommend one to three reference points (an existing product or feature) so the person reacts to something concrete instead of recalling from nothing.
+- Lead with weaknesses before any direction: name the two or three ways the current instinct most easily goes wrong, including what an automated step would amplify and what would be hardest to undo.
+- Rewrite the problem statement when the framing itself is the trap, instead of politely optimizing along the original wording.
+- Give two or three candidate directions in experience terms (what the person will feel, and the cost), each with its single biggest failure point, and let them pick one.
+- Run a preview gate: before the person signs off, show a perceivable preview matched to the work — a mock or wireframe for UI, a 1-2-3 journey walk for a flow, or sample request/response and failure-case examples for backend — so "yes, that's the feeling" is grounded in something they can see, not an abstract description.
+## Forbidden actions
+- Discussing implementation, writing code, or proposing a technical solution before the contract is confirmed and the preview gate is passed.
+- Asking the person to describe implementation detail (they are not here for that), or handing back "please describe your requirements in detail" instead of offering choices.
+- Giving only one direction with no choice, pushing the job of "say what you want" back onto the person, or skipping the preview gate straight into design or build.
+## Output format
+A thin contract the person can sign: the success definition in their words, the failure definition and non-goals, the most likely wrong assumption, a short negative list (the two or three things most likely to be misread, missed, or amplified), and the confirmed reference points — followed by a matched preview the person has reacted to.
+## Exit condition
+The person confirms the thin contract AND the preview gate passes ("yes, that is the feeling"). Before exit, three adversarial questions must be answered: if this is pushed forward as currently understood, what is most likely done wrong; which weakness, left unfixed now, gets amplified by later automation; and is the real fix to the problem statement rather than the answer. Unanswered, the mode does not exit.
+## Inter-mode handoff
+On a confirmed contract and a passed preview, move to design for a technical plan (or straight to execute for a simple task), carrying the contract as the boundary the build is judged against. If the person rejects the direction, return to the choice step and re-pose it rather than optimizing the dead direction. A discovered framing error is a problem-statement rewrite, not a silent patch.
+`;
+}
+// One-line "what it does" per mechanism, keyed by id. Kept short on purpose so
+// the overview reads as a map, not a wall of text; each mechanism's own README
+// carries the full purpose/trigger/process. If a mechanism is added to the
+// catalog without an entry here, mechanismsReadme throws so the gap is caught
+// at generate time instead of shipping a silently incomplete overview.
+const MECHANISM_ONE_LINERS = {
+  "dual-guard":
+    "Trust an artifact only after a guard from a different model family (binding) plus an optional same-family guard (reference) have pressed on it, so a fluent answer is not believed just because it reads well.",
+  "scout-review-controller":
+    "Separate exploration from the decision: a SCOUT gathers options and evidence without choosing, so the controller decides on a real spread instead of the first path that came up.",
+  "one-click-dispatch":
+    "Turn a messy task into one self-contained work packet another AI tool can run without inheriting the whole chat.",
+  "task-splitting":
+    "Run a five-question pre-dispatch check before handing work to another AI, and split by topic or deliverable so a too-large prompt does not stall or collapse midway.",
+  "anti-drift-partner":
+    "Run a long thinking conversation with an AI that pushes back instead of agreeing — it surfaces your blind spots, probes at most two rounds, then commits to a judgment, so the talk never drifts into fluent confirmation.",
+  "blind-spot-scan":
+    "Borrow an outside viewpoint (customer, competitor, expert, opponent, your-future-self), re-read the decision through that seat, and get back the concrete dead angles you cannot see from your own plus the one counter-question most worth sitting with — and the borrowed viewpoint must genuinely challenge, never flatter from a costume.",
+  "root-cause-brake":
+    "When the same artifact is rejected twice in a row, trip a brake: no more patches until you answer four diagnostic questions, name the real cause, and rebuild the next version around it.",
+  "half-product-review":
+    "Block confident \"done\" when there are docs, demos, and architecture but no runnable first experience a stranger can actually complete.",
+  "handoff-abc":
+    "Externalize the current state into a structured packet so any session or tool resumes from where the work really is, instead of re-explaining the background each time.",
+  "harvest-and-erc":
+    "Capture the reusable lesson, prompt fragment, or rule candidate from finished work before it leaks away, including across multiple sessions.",
+  "do-not-handle-yet":
+    "Protect the main line by explicitly parking tempting but lower-priority work, on the record, instead of silently dropping or drifting into it.",
+  "plain-language-first-screen":
+    "Make the first screen explain the result, the path, and the proof before any concept or framework name.",
+  "honest-calibration":
+    "Lead every ask for a rating or recommendation with a short candor prefix (be candid, do not inflate, do not over-hedge) that offsets the model's pull to please and re-aims the baseline from make-you-happy to tell-the-truth.",
+  "feedback-absorption-ledger":
+    "When merging feedback from several sources, score each item across five tiers (absorb fully / refine / add a boundary / partly absorb / reject with a reason) so you keep independent judgment instead of rubber-stamping — the absorb/reject ratio is an outcome, not a target.",
+  "collaboration-coach":
+    "Proactively remind the user of the matching collaboration step at key moments, restrained by default.",
+  "single-tool-guard":
+    "The default starting guard for one-model-family users (most solo users) — new conversation + adversarial prompt turns a trusted \"looks fine\" into an evidence-backed, re-checkable result; honestly capped at L2 and explicitly not a passed cross-family gate, which is the upgrade ceiling."
+};
+export function mechanismsReadme() {
+  const lines = mechanismDefinitions.map((mechanism) => {
+    const oneLiner = MECHANISM_ONE_LINERS[mechanism.id];
+    if (!oneLiner) {
+      throw new Error(`mechanismsReadme: missing one-liner for mechanism "${mechanism.id}"`);
+    }
+    return `- \`${mechanism.id}/\` — **${mechanism.title}.** ${oneLiner}`;
+  });
+  return `# Mechanisms
+The reusable collaboration moves of the AI Collaboration Open System. Each one is a local-first, public-safe Markdown package you can copy-paste into any AI tool. A mechanism is a self-contained directory with five files:
+- \`README.md\` — what it is, when to use it, and when not to.
+- \`PROMPT.md\` — the copy-paste prompt that runs it.
+- \`TEMPLATE.md\` — a blank you fill in for your own task.
+- \`EXAMPLE.synthetic.md\` — a worked synthetic example (no private data).
+- \`FAILURE_MODES.md\` — how it goes wrong and how to keep it honest.
+These are the standing moves; the six layers (profile, context, acceptance, guard, handoff, harvest) are the spine they plug into, and the \`cookbook/\` recipes show how to run them on a real task.
+## The ${mechanismDefinitions.length} mechanisms
+${lines.join("\n")}
+## How to use one
+Open the mechanism's \`README.md\` to confirm it fits, copy the body of its \`PROMPT.md\` into your AI tool, and paste your own material where the \`TEMPLATE.md\` marks it. Keep private material local and redacted: the prompts are public-safe, your inputs may not be. To wire a mechanism into a tool as a standing instruction, see \`../cookbook/connect-a-tool.md\`.
+`;
+}
+export function cookbookReadme() {
+  return `# Cookbook
+Do-it recipes for running the AI Collaboration Open System. Each recipe is a full configuration: when to use it, prerequisites, steps, a copy-paste block you can actually run, expected output, failure handling, a privacy note, and a next step. The walkthroughs are operation cards ("press these in this order"); these recipes explain why each step exists and how to adapt it to your own task.
+- \`run-a-first-loop.md\`: run one complete collaboration loop end to end on your own real task; the prepared synthetic case is an optional "watch the flow first" track.
+- \`connect-a-tool.md\`: wire any AI tool (general chat AI, coding assistant, command-line AI) to the shared contract by copying files into its instruction slot.
+- \`review-a-half-product.md\`: audit a "done but maybe not" deliverable by forcing an independent AI to cite evidence and find the gap.
+- \`bridge-to-a-second-family.md\`: stand up the second, different-model-family AI the cross-family guard needs, and route a review across it — manual copy-paste (works anywhere) or an optional auto bridge.
+`;
+}
+export function cookbookFirstLoop() {
+  return `# Run a First Loop
+A do-it recipe: run one complete AI collaboration loop end to end on your own real (lightly redacted) task, and watch a guard catch a false completion claim that a single agent would have accepted. This is the recipe; \`../walkthroughs/10-minute-your-task.md\` is the operation card for that real-task run. The walkthrough says "press these buttons in this order"; this recipe says "here is why each step exists, and here is how to adapt it to whatever you are actually working on." If you would rather watch the loop on a prepared example before pointing it at your own work, the synthetic case is the optional "look first" track — see the box below.
+> Optional "look first" track: if your task feels too sensitive to paste right now, or you just want to see the shape of the loop first, run it once on the prepared synthetic case using \`../walkthroughs/10-minute.md\` (the demo preview), then come back and run it on your own task with the copy-paste block below.
+## When to use this
+- Your first time through the system, and you want to feel the whole loop on work you actually care about.
+- You can describe the loop but have never watched a guard actually reject a fluent "done".
+- You are about to start a real task and want a tested prompt sequence to adapt, not a blank page.
+Skip it if you have already run the loop and just need the fast operation card; go straight to \`../walkthroughs/10-minute-your-task.md\`.
+## Prerequisites
+- This workspace exists (you are reading a file inside it).
+- One real task of your own you can describe in a few sentences (lightly redacted: swap private names, paths, and numbers for placeholders). No private file needs to be uploaded — a redacted description is enough.
+- One AI tool you can paste into (any general chat AI, coding assistant, or command-line AI). One tool is enough for a first pass; a second tool of a different model family makes the guard step stronger but is optional.
+- Five to ten minutes. Nothing is uploaded; you only read and copy local files plus your own redacted description.
+## Steps
+Run these five moves on your own task. (Each move maps to one shipped artifact in \`../examples/ai-coding-long-task/artifacts/\` — open the matching file there any time you want to see the move done once on the prepared synthetic case.)
+1. Set context. Describe your task to the AI and have it write a context package: the goal in one sentence, what is in scope, and explicit non-goals. This turns a tangled request into a boundary. Reference: \`context-package.md\`.
+2. Set acceptance. Turn that context into an acceptance card — a short numbered list of checkable "done" criteria a reviewer can verify, not a vibe. This is the step people skip and then regret. Reference: \`acceptance-card.md\`.
+3. Produce the first output. Have the AI do only the accepted slice and report what changed, what it ran, what failed, and what it did NOT verify. Read its completion claim against the actual code or evidence — this is where a fluent "done" usually overstates the work. References: \`execution-prompt.md\`, \`first-ai-output.md\`.
+4. Run the guard. Paste that output plus \`../guard/PROMPT.md\` into a second AI tool (or the same one in a fresh turn) and ask it to review against the acceptance card. A good guard returns a cause-and-effect chain tied to specific spots and a reject, not a one-line "looks good". Reference: \`guard-review.md\`.
+5. Revise and close. Fix the named blocker and re-show it with evidence, then write a handoff (done / pending / unverified) and harvest one reusable lesson with all private specifics removed. References: \`revised-output.md\`, \`handoff-note.md\`, \`harvest-seed.md\`.
+The copy-paste block below is the prompt sequence that drives exactly these five moves on your task.
+## Copy-paste block
+Paste these in order into your AI tool, filling the bracketed parts with your own redacted task. This is the same loop as the steps above.
+\`\`\`text
+[1 / CONTEXT]
+Help me write a context package for this task. Capture: the goal in one sentence, what is in scope, and explicit non-goals. Keep it local-first; I will not upload private material.
+Task (redacted): [describe your task; replace any private name, path, or number with a placeholder]
+[2 / ACCEPTANCE]
+Now turn that context into an acceptance card: a short numbered list of checkable criteria that define "done". Each criterion must be something a reviewer can verify, not a vibe. Mark anything explicitly out of scope.
+[3 / EXECUTION]
+Do only the work the acceptance card describes. Do not expand scope. When done, report: what changed, what you ran to check it, what failed, and what you did NOT verify.
+[4 / GUARD - run this in a SECOND tool, ideally a different model family]
+Review the output below against the context and acceptance card. Point to concrete defects, missing evidence, privacy leaks, unsupported claims, and scope drift, each tied to a specific spot. Return findings by severity and a pass or reject. Do not approve a claim that the evidence does not back.
+Output under review: [paste the step-3 output]
+Acceptance card: [paste the step-2 card]
+[5 / HANDOFF + HARVEST]
+Write two short artifacts. Handoff: where the work is now, split into done / pending / unverified, plus the single next action and the exact baseline to start from. Harvest: one reusable lesson from this loop, written generally enough to apply to a future task, with all private specifics removed.
+\`\`\`
+## Expected output
+- A context package and an acceptance card with checkable criteria (not prose).
+- A first output whose completion claim you can check against evidence.
+- A guard review that names a real, line-level defect and returns reject when the claim outruns the evidence, or pass with named residual risk when it does not.
+- A revised output where the named blocker is fixed and re-shown with evidence.
+- A handoff that separates done / pending / unverified, and one reusable harvest lesson.
+## Failure handling
+- The guard just says "looks good" and finds nothing. It is probably grading tone, not claims. Re-run step 4 and force it to check each completion claim against the acceptance card and point to a specific line or a missing piece of evidence; an empty finding list is only valid if it can say what it checked.
+- The first output looks perfect and you cannot spot the defect. Re-read the completion claim next to the code or evidence it rests on. The classic failure is a claim ("keyboard reorder works") that the code does not actually perform.
+- You only have one AI tool. Run the guard in a fresh turn or a fresh session of the same tool. It is weaker than a second model family (same family tends to miss the same things), but far better than no guard.
+- The loop feels like overhead on a tiny task. It is, for a one-line change. Use the full loop on work another session or person will build on; for throwaway work, skip it.
+## Privacy note
+Redact before you paste: replace real product names, file paths, customer or person names, and internal numbers with placeholders. Do not paste a private profile, raw private chat logs, or a non-public path into an external AI. The loop works on a redacted description; it does not need the private original. (If you take the optional "look first" track instead, the shipped synthetic case uploads nothing at all — there is nothing of yours to redact.)
+## Next step
+- Connect this loop to the AI tool you actually use day to day: \`connect-a-tool.md\`.
+- When you receive a "done" artifact you did not produce, pressure-test it: \`review-a-half-product.md\`.
+- Reuse the full mechanism behind step 4 on higher-stakes work: \`../mechanisms/dual-guard/README.md\`.
+`;
+}
+export function cookbookConnectTool() {
+  return `# Connect a Tool
+A do-it recipe: point any AI tool you already use at this workspace, so the same profile, context, acceptance, guard, handoff, and harvest rules drive every tool instead of six drifting rule systems. The key idea is that every mechanism here is just a Markdown file. You connect a tool by copying file contents into that tool's instruction slot. Nothing depends on this CLI staying installed; the CLI only writes the files.
+## When to use this
+- You have a favorite AI tool (a general chat AI, a coding assistant, or a command-line AI) and want it to follow this system's loop.
+- You use more than one tool and they each behave differently because each has its own ad hoc rules.
+- You want a mechanism (like a guard pass) available inside your tool as a reusable instruction, not something you re-type every time.
+Skip it if you only ever read these files by hand and never paste them into a tool.
+## Prerequisites
+- This workspace exists locally.
+- The AI tool you want to connect, and knowledge of where it accepts standing instructions. Three common shapes: a general chat AI uses a "system prompt" or "custom instructions" box; a coding assistant uses a project rules file (for example a \`CLAUDE.md\`, an \`AGENTS.md\`, a \`.cursorrules\`, a \`.clinerules\`, or an equivalent); a command-line AI uses its config or a per-project instruction file.
+- Two minutes per tool. This is copy and paste, not installation.
+## Steps
+1. Open the shared contract. Open \`../adapters/SHARED_CORE_CONTRACT.md\`. This is the one rule source every tool should share so the loop does not drift between tools.
+2. Open the adapter for your tool family. Look in \`../adapters/\` for the closest match to your tool (each adapter is a thin pointer, intentionally not a second copy of the contract). If none matches exactly, pick the nearest one. The adapter shows the minimal instruction your tool needs.
+3. Put the contract where your tool reads standing instructions. For a chat AI, paste the contract into the system-prompt or custom-instructions box. For a coding assistant, save it (or a pointer to it) into that tool's project rules file. For a command-line AI, add it to the tool's config or per-project instruction file. Use the copy-paste block below.
+4. Add one mechanism as a reusable instruction (optional but the high-value move). Pick a mechanism you want on tap, for example \`../mechanisms/dual-guard/PROMPT.md\` or \`../guard/PROMPT.md\`. Copy the prompt body from that file's "Copy-paste prompt" block into a saved prompt, snippet, or rule in your tool, so a guard pass is one trigger away instead of a retype.
+5. Verify the wiring with a throwaway ask. Tell the tool: "State the core loop you are now following and where each step's rules live." A correctly connected tool names profile, context, acceptance, guard, handoff, harvest and treats them as explicit files, instead of inventing hidden memory.
+6. Save anything worth keeping back into this workspace (a filled template, a handoff, a harvest card) so the next tool or session starts from the same files.
+## Copy-paste block
+Two pieces. The first wires the whole loop into a tool. The second drops a single mechanism in as a reusable instruction. Before pasting, open the referenced file and paste its real contents where marked; do not paste the file path and expect the tool to read your disk.
+\`\`\`text
+[A / WIRE THE LOOP INTO A TOOL - paste into the tool's system prompt or project rules file]
+Follow this shared contract for our work. Treat profile, context, acceptance, guard/review, handoff, and harvest as explicit files in a local-first workspace, not as hidden memory. Work local-first; do not upload my content by default. Label facts, assumptions, decisions, and unverified claims. Use synthetic, redacted examples for anything I might share publicly.
+--- shared contract begins ---
+[paste the full contents of ../adapters/SHARED_CORE_CONTRACT.md here]
+--- shared contract ends ---
+[B / ADD ONE MECHANISM AS A REUSABLE INSTRUCTION - save as a snippet, saved prompt, or rule]
+When I invoke this, run the mechanism below on the material I provide. Keep private material local and redacted. Point findings to specific spots. Return the mechanism's stated output shape, not a vague summary.
+--- mechanism prompt begins ---
+[paste the "Copy-paste prompt" block from ../mechanisms/<mechanism>/PROMPT.md here]
+--- mechanism prompt ends ---
+\`\`\`
+## Expected output
+- Your tool, when asked, can name the core loop (profile, context, acceptance, guard, handoff, harvest) and treats each as a file rather than invented memory.
+- At least one mechanism is reachable inside the tool as a saved instruction you can trigger without retyping it.
+- The same contract now drives every tool you connected this way, so behavior is consistent across tools.
+## Failure handling
+- The tool ignores the standing instruction. You likely pasted into a one-off chat turn instead of the persistent slot. Move the contract into the actual system-prompt box or project rules file so it survives across turns.
+- The tool "can't find" a referenced file. Tools generally cannot read your disk from a path in a prompt. Paste the file's contents inline (as the block marks), not just its path. Files are the source of truth; pasting is how a tool sees them.
+- Behavior still drifts between two tools. Confirm both point at the same single \`SHARED_CORE_CONTRACT.md\` and that neither has an older private rule set fighting it. One contract, many thin adapters; never six full rule systems.
+- The adapter looks too thin and you want to fatten it. Do not. The adapter is meant to be a pointer; thickening it recreates the drift the shared contract exists to prevent.
+## Privacy note
+Connecting a tool means standing instructions, not your private data. Paste the contract and mechanism prompts (they are public-safe). Do not paste a private profile, raw private chat logs, internal numbers, or non-public paths into a tool's instruction slot or an external AI. When you later run real tasks through the connected tool, redact first and keep originals local; the loop is designed to work on a redacted description.
+## Next step
+- Run a full loop through the tool you just connected: \`run-a-first-loop.md\`.
+- Use the connected tool to pressure-test a "done" artifact: \`review-a-half-product.md\`.
+- Browse the other mechanisms you can wire in the same way: \`../mechanisms/README.md\`.
+`;
+}
+export function cookbookHalfProduct() {
+  return `# Review a Half Product
+A do-it recipe: audit a deliverable that says "done" but might not be, by forcing an independent AI to point at evidence and find the gap, instead of nodding along with "looks good". It uses the review mode plus the dual-guard and half-product-review mechanisms. The target is the classic half product: lots of docs, demo, and confident prose, but the thing it claims a stranger can do does not actually run.
+## When to use this
+- Someone (a tool, another session, a contributor) hands you work claimed complete and you will build on it or ship it.
+- A project has a polished README and architecture talk but you are not sure the first-run experience actually works.
+- A completion claim feels too smooth and you want a second, independent pass before you trust it.
+Skip it for low-stakes, easily reversible work, or a step you are about to fully re-check yourself anyway. Running a full review on trivial work is ceremony, and ceremony with no payoff trains people to skip review when it matters.
+## Prerequisites
+- The artifact under review, with stable references the reviewer can point to (line numbers, section anchors, or named files).
+- Its definition of done: an acceptance card, or at least the public claim it makes ("a stranger can do X in ten minutes").
+- The evidence that supposedly backs the claim: command output, test results, a reproduced result, or a clear note that none exists.
+- An AI tool to run the review in, ideally a different model family from whatever produced the artifact, since a different family is the pass most likely to see what the author cannot.
+## Steps
+1. Pin the claim. Write down, in one line, exactly what the artifact claims is done or usable. A claim you cannot state is a claim you cannot test. If it has an acceptance card, use that; if not, lift the strongest promise from its README or start page.
+2. Trace each claim to evidence. For every claim, find the file, command output, or test that proves it, or note that none exists. The half-product pattern is docs and demos that point at nothing runnable. A claim with no evidence is the finding.
+3. Try the first-run path. If the claim is "a stranger can do X", do X the way a stranger would: run the entry command, open the file the docs point to, follow the start page. Watch where it breaks or where a referenced artifact is missing.
+4. Run the independent guard. Paste the artifact, the acceptance card or pinned claim, and any evidence into the review prompt below, in a second tool. Demand findings tied to specific lines or missing evidence, ordered by severity, with a pass or reject. Do not accept a fluent "looks fine".
+5. Merge by strictness, not vote. If the guard names one real, evidence-grounded blocker, the artifact does not pass, even if everything else reads well. One concrete defect outweighs a pile of fluent approval. Compare against \`../mechanisms/dual-guard/README.md\` for how the binding pass works.
+6. Decide the wording. If the first-run path is not actually runnable, downgrade the release language (from "anyone can use this" to "early / not yet runnable end to end") or carry the gap as named residual risk the owner accepts on the record. Silent "good enough" is not allowed.
+## Copy-paste block
+Paste this into an independent AI tool, ideally a different model family from the one that produced the work. It is tuned to make the reviewer hunt for the gap and cite it, not to praise.
+\`\`\`text
+You are an independent reviewer. The work below claims to be complete or usable. Assume it might not be, and prove it either way against the evidence, not the tone.
+Claim under test: [paste the one-line "done"/usable claim, or the acceptance card]
+Artifact: [paste the artifact, or the README/start page and the key files it points to]
+Evidence provided: [paste command output / test results / reproduced result, or write "none provided"]
+Do this:
+1. For each claim, name the specific evidence that backs it, or state that none was provided.
+2. Walk the first-run path a stranger would take. Say exactly where it breaks or where a referenced file/command is missing.
+3. List defects ordered by severity. Tie each to a line, section, or the specific missing evidence. No vague "looks good" or "seems fine".
+4. If any one real, evidence-grounded blocker exists, the verdict is REJECT even if the rest reads well.
+Return:
+- Verdict: pass / reject / insufficient_evidence / pass_with_risk (a plain pass needs an L3+ cross-family evidence pack; a single tool tops out at pass_with_risk; summary-only is insufficient_evidence)
+- Guard level: L0-L4, the strength of the evidence you actually had. The CLI COMPUTES this from your review method + the evidence (it is not self-declared); a cross-family L3 is shown "self-declared, unverified" because a local tool cannot verify the reviewer's family — L4 (that cross-family review AND a rerun reconciled to a recorded run) is the strongest LOCAL-trust level, not cryptographic proof.
+- Findings (each tied to a line, section, or missing evidence)
+- Required fixes (the smallest change each blocker needs)
+- Residual risk (what stays unverified and who must accept it; a pass_with_risk needs an explicit owner sign-off)
+- Recommended release wording (downgrade it if the first-run path is not runnable)
+Rules: work only from what I provided; if key evidence is missing, say so rather than assuming it passes; keep examples public-safe.
+\`\`\`
+## Expected output
+- A verdict: pass, reject, insufficient_evidence, or pass_with_risk, plus the guard level (L0-L4) for the evidence seen.
+- A findings list where each item points to a line, section, or a specific missing piece of evidence, not a vibe.
+- The exact first-run step where the experience breaks, if it does.
+- The smallest fixes required before the work can wear its completion label, and recommended release wording.
+## Failure handling
+- The reviewer just approves it. It is grading tone, not claims. Re-run and force step 1: every claim must be matched to specific evidence or marked unproven; "looks good" is not a finding.
+- The reviewer invents evidence or assumes the path works. Tell it to work only from what you pasted and to say "none provided" rather than assume. If it cannot see the evidence, that absence is itself the result.
+- Two reviewers disagree (one approves, one rejects). Do not average them. If the rejection points to a real, evidence-grounded defect, it wins; one concrete blocker beats fluent approval.
+- You only have the same tool the author used. Run it anyway in a fresh session, but treat the pass as weaker: same model family tends to miss the same things, so a clean result here is a reference, not a guarantee.
+## Privacy note
+Review the work, not your private data. Redact before pasting: replace real product names, customer or person names, file paths, and internal numbers with placeholders. Do not paste a private profile, raw private chat logs, or non-public paths into an external AI for review. The review works on a redacted artifact plus its evidence; it does not need the private original.
+## Next step
+- Use the full two-layer review behind this on higher-stakes artifacts: \`../mechanisms/dual-guard/README.md\`.
+- See the dedicated mechanism for the docs-outrun-runtime pattern: \`../mechanisms/half-product-review/README.md\`.
+- After a reject, package the exact remaining work for whoever fixes it: \`../mechanisms/handoff-abc/README.md\`.
+`;
+}
+export function cookbookBridgeSecondFamily() {
+  return `# Bridge to a Second Family
+A do-it recipe: set up the second, different-model-family AI that the cross-family guard needs, and route a review across it. The rest of the system keeps telling you "when a second, different model family is available, you can upgrade to the cross-family double guard" — this recipe is the missing how. It does not redefine the guard: \`../mechanisms/dual-guard/README.md\` owns the judgment rules (L3 vs L4, binding vs reference, layered strictness over majority vote, the pass and reject bars). This recipe only covers the part those rules assume you already did: pick a second family, get your material across to it safely, and keep evidence that the second family actually ran.
+There are two tracks. The manual bridge (copy-paste between two AIs) is the main path: it works with any two tools, needs no setup, and never breaks. The auto bridge (a tool that dispatches the review to another family for you) is an optional convenience. Start manual; reach for auto only if a tool you already use offers it.
+## When to use this
+- A completion claim is about to be trusted by another session, tool, or person, and you want the cross-family binding gate the dual-guard mechanism describes — but you only have one tool wired up so far.
+- You keep stopping at "upgrade to a second model family" and do not know how to actually stand one up.
+- You have run \`single-tool-guard\` and want to move a result above its L2 ceiling with a genuine cross-family pass.
+Skip it for low-stakes, easily reversible work a human will fully re-check anyway. A single tool's own adversarial pass (\`../mechanisms/single-tool-guard/README.md\`) is the right tool there; bridging to a second family is the upgrade for work that will propagate.
+## Prerequisites
+- A redacted version of the artifact under review (swap private names, paths, and numbers for placeholders). Nothing private needs to leave your machine; a redacted copy is enough.
+- Its acceptance card or one-line "done" claim, and the evidence that supposedly backs it (command output, test result, a reproduced result, or a clear note that none exists).
+- One AI tool you already use as your primary (the family that drafted the work, or any family you treat as home base).
+- A second AI that is a DIFFERENT model family from your primary. That is the whole point: a different family does not share your primary's blind spots. (How to choose one is Step 1 below — you do not need it set up before you start.)
+## Track A — the manual bridge (main path, works with any two tools)
+### Step 1. Choose a second family
+Pick any AI that is a different model family from your primary tool. The families differ; the move does not. Concrete examples (each is just an example — substitute freely):
+- Primary is a Claude-family tool? Use a GPT-family or a Gemini-family AI as the second.
+- Primary is a GPT-family tool? Use a Claude-family or a Gemini-family AI as the second.
+- Primary is a Gemini-family tool? Use a Claude-family or a GPT-family AI as the second.
+Any different-family pairing works — the names above are illustrations, not a required list. What matters is "different family", not which brands. Two tools that wrap the same underlying family (for example two products both built on the same model) do NOT count as a cross-family pair; the dual-guard mechanism treats that as same-family, capped below the cross-family gate. If you are unsure whether two tools share a family, treat them as same-family until you can confirm otherwise.
+### Step 2. Redact before it leaves your primary
+The second AI cannot read your disk; to review your material it has to be pasted in. So redact first, exactly as in \`connect-a-tool.md\`: replace real product names, customer or person names, file paths, and internal numbers with placeholders. Do not paste a private profile, raw private chat logs, internal numbers, or non-public paths into the second AI. The review works on a redacted artifact plus its evidence; it does not need the private original.
+### Step 3. Send the package across and run the cross-family review
+This is your move to make, not the second AI's — it cannot read your disk, so you fetch the file contents and paste them in. On your own machine, open \`../mechanisms/dual-guard/PROMPT.md\` and copy its "Copy-paste prompt" body. Then paste one combined message into the second (different-family) AI: the carrier wrapper below, the dual-guard body you just copied, and your redacted material (artifact, acceptance card, context boundary, evidence). The carrier is a thin wrapper that hands the pasted dual-guard body to the second AI as its instructions; it does not restate the guard's rules, because the dual-guard mechanism owns them.
+### Step 4. Collect the verdict and record binding evidence
+Read back the verdict using the dual-guard pass and reject bars (do not re-derive them here). Then record what makes the result trustworthy later: which family was the binding guard, the findings, the fixes, and the residual risk. A bridge to a second family reaches L3 (a structured evidence pack reviewed by a different family). To reach L4, the binding guard must independently re-run the key evidence and reconcile it to a recorded run — re-running the critical check yourself across the second family is what raises a cross-family L3 to L4 (the strongest LOCAL-trust level, not cryptographic proof).
+## Copy-paste block (manual bridge)
+Paste this into the second, different-family AI. It carries your material to the dual-guard prompt; it deliberately does not repeat the guard's judgment rules.
+\`\`\`text
+Run a cross-family review for me. You are the second, different-model-family guard.
+Your full instructions are the Dual Guard prompt body, which I have pasted directly below. Follow it exactly (process, output shape, pass bar, reject bar, guard-level rules). It is the source of truth; do not invent your own rubric. You cannot read my disk, so I am pasting the body in here rather than pointing you at a local file.
+--- Dual Guard PROMPT body (begin) ---
+[Paste the Dual Guard PROMPT.md "Copy-paste prompt" body here — open it locally and copy it in yourself; the second AI cannot read your disk.]
+--- Dual Guard PROMPT body (end) ---
+Then review this material under the Dual Guard prompt body pasted above:
+- Drafting model family (my primary): [name the family that produced the work]
+- Artifact under review (redacted, with line/section refs): [paste]
+- Acceptance card / definition of done: [paste]
+- Context boundary (goal, in-scope, non-goals): [paste]
+- Evidence provided: [paste command output / test result / reproduced result, or write "none provided"]
+Return exactly the dual-guard output shape (verdict, guard level, binding findings, required fixes, residual risk, next action). Work only from what I pasted; if key evidence is missing, say so rather than assume it passes. Keep examples public-safe.
+\`\`\`
+## Track B — the auto bridge (optional, point-to-point, depends on your tool)
+Some tools can dispatch the review to a second family for you, so you do not hand-carry the paste. The shape is the same cross-family pass; the tool just automates the hand-off.
+Concrete example (an example, not a requirement): a coding tool that supports a "rescue" or cross-model plugin can route a review to a different family — for instance a Claude-family coding tool with a plugin that sends the review to a GPT-family model. That auto-dispatched second model is your cross-family bridge.
+Two rules make an auto bridge safe to trust:
+- Read-only is mandatory. The auto-dispatched second AI must review only — it must NOT be allowed to edit your files. If the bridge lets the second AI change the work, it is no longer an independent reviewer of that work, and the cross-family independence the whole gate depends on is gone. Configure the dispatch as read-only and confirm it actually ran read-only before you trust the verdict.
+- It is convenience, not a requirement. The auto bridge depends on a specific tool and integration, and those change over time. The manual bridge in Track A always works. Treat the auto bridge as a way to save effort, never as the only way to reach a second family.
+Everything else — which verdicts are allowed, the L3/L4 boundary, binding vs reference — is unchanged and still lives in \`../mechanisms/dual-guard/README.md\`. The auto bridge changes how the material gets there, not what counts as a pass.
+## Expected output
+- A cross-family review of your artifact, returned in the dual-guard output shape (verdict, guard level, findings, fixes, residual risk, next action).
+- A recorded note of which family was the binding guard, so a later session can trust the result without re-litigating it.
+- Honest leveling: an L3 result from the second family's review of your evidence pack, or L4 only if the binding guard re-ran the key evidence and showed that output. A bare claim that "a second family looked at it" is not a pass — the evidence is.
+## Failure handling
+- Your two tools turn out to share a model family. Then this is a same-family reference pass, not a cross-family gate; under the dual-guard rules it cannot clear the binding gate or move you above L2. Find a genuinely different family for the binding pass.
+- The second family just says "looks good" with no specifics. It is grading tone, not claims. Make sure you pasted the dual-guard prompt body (not only the carrier wrapper) so its "each finding cites a line/section/missing evidence" rule is in force; an empty finding list is only valid if it can say what it checked.
+- You only claimed a second family but kept no evidence. Family can be faked — anyone can say "a different AI reviewed this". What is hard to fake is a rerun and a reconciliation: record the binding family, the findings, and, for L4, your own rerun output. If you cannot show the review happened, treat the result as single-tool (L2), not cross-family.
+- The auto bridge ran with write access. Discard the verdict and re-run it read-only. A reviewer that could edit the work is not independent of it.
+## Privacy note
+A second family means a second place your material gets pasted, so the privacy bar is the same as \`connect-a-tool.md\`, applied twice. Redact before pasting into either tool: replace real product names, customer or person names, file paths, and internal numbers with placeholders. Do not paste a private profile, raw private chat logs, internal numbers, or non-public paths into any external AI. For an auto bridge, confirm the dispatched second AI is read-only and does not exfiltrate or store your content beyond the review. The cross-family review works on a redacted artifact plus its evidence; it never needs the private original.
+## Next step
+- Read the judgment rules this recipe feeds into: \`../mechanisms/dual-guard/README.md\` (L3 vs L4, binding vs reference, pass and reject bars).
+- Coming from one tool? See the L2 front door you are upgrading from: \`../mechanisms/single-tool-guard/README.md\`.
+- Wire the second family in as a standing tool so the bridge is one trigger away: \`connect-a-tool.md\`.
+`;
+}
+export function stateCurrent() {
+  return `# Current State
+Use this file for local state only. Do not publish private task details.
+## Current goal
+## Active context package
+## Active acceptance card
+## Current mode
+## Next action
+`;
+}
+export function stateTaskLog() {
+  return `# Task Log
+Use this local log to keep AI work resumable.
+| Date | Task | Mode | Evidence | Next action |
+| --- | --- | --- | --- | --- |
+| synthetic | First loop | review | guard review exists | write handoff |
+`;
+}
+export function stateDecisions() {
+  return `# Decisions
+Record decisions that future sessions should not reopen without new evidence.
+| Date | Decision | Evidence | Revisit condition |
+| --- | --- | --- | --- |
+| synthetic | Keep examples synthetic | privacy boundary | public-safe replacement needed |
+`;
+}
+export function walkthrough10() {
+  return `# 10-Minute Walkthrough (Demo preview)
+This is the demo preview: it runs the loop on a prepared case so you can see the flow without pasting anything of your own. To run the same loop on your own real task, use \`10-minute-your-task.md\` instead (that is the recommended first run). Pick this preview if your task feels too sensitive to paste right now, or you just want to watch the shape of the loop first.
+Goal: walk one AI collaboration loop end to end on the prepared TaskBoard case, and watch a guard catch a false completion claim that a single agent would have accepted.
+The case: a user asks an AI to add task reordering to a TaskBoard. The AI says it added mouse and keyboard reorder with tests. The guard proves the keyboard part was never implemented. You will see context, acceptance, first output, guard review, revised output, handoff, and harvest.
+Everything is local-first and synthetic. You only read and copy files; nothing is uploaded.
+## Step 1 (1 min) - Open the case
+Open \`../examples/ai-coding-long-task/CASE.md\` and read "Confusing raw input" and "Likely single-agent failure". This is the messy request and the answer a raw chat usually gives.
+Expected: you can say in one line why "I will refactor, add drag, keyboard, polish, and tests" is unsafe (it mixes scope and defines no pass standard).
+## Step 2 (2 min) - Set context and acceptance
+Open \`../examples/ai-coding-long-task/artifacts/context-package.md\`, then \`acceptance-card.md\`. Copy both into your AI tool together with \`../adapters/SHARED_CORE_CONTRACT.md\`.
+Expected: your tool now has five checkable acceptance criteria (AC1 mouse, AC2 keyboard, AC3 tests for both, AC4 data preserved, AC5 visual polish out of scope).
+## Step 3 (2 min) - Read the first AI output
+Open \`../examples/ai-coding-long-task/artifacts/first-ai-output.md\`. Read the completion claim, then the \`TaskBoard.tsx\` code block.
+Expected: you can point to the defect yourself. The claim says arrow-key reorder works, but \`onKeyDown\` (lines 27-30 of that code block) only logs the key and never calls \`moveTask\`, and the test block has no keyboard test.
+## Step 4 (2 min) - Run the guard review
+Open \`../examples/ai-coding-long-task/artifacts/guard-review.md\`. Optionally paste \`first-ai-output.md\` plus \`../guard/PROMPT.md\` into a second AI tool and ask it to review against the acceptance card.
+Expected: the guard returns a cause-and-effect chain, not a one-line verdict. It cites \`first-ai-output.md\` lines 27-30 (stub handler) and the missing keyboard test, maps them to AC2 and AC3, and returns reject. This is the line the guard checks.
+## Step 5 (2 min) - Read the revised output and close the loop
+Open \`../examples/ai-coding-long-task/artifacts/revised-output.md\`, then \`handoff-note.md\`, then \`harvest-seed.md\`.
+Expected: \`onKeyDown\` now calls \`moveTask\` for ArrowUp/ArrowDown, a keyboard test was added that fails on the old stub and passes on the fix, the handoff separates done / pending / unverified (visual polish), and the harvest seed is the reusable artifact you keep: verify completion claims with code and test evidence, do not trust a fluent "done".
+## Completion check
+You have walked context -> acceptance -> first output -> guard -> revised -> handoff -> harvest on one case, you can name the exact line the guard pointed to, and you leave with one reusable artifact (\`harvest-seed.md\`) you can apply to your own next task.
+`;
+}
+export function walkthrough10YourTask() {
+  return `# 10-Minute Walkthrough (Your own task)
+This is the recommended first run. You run the whole collaboration loop on one real task of your own, instead of a prepared example, and feel the value on work you actually care about. If you would rather watch the flow on a prepared case first, use \`10-minute.md\` (the demo preview) and then come back here.
+Goal: take one messy task of yours and, in three short rounds, force the AI to (1) define "done" before it acts, (2) do only that, and (3) get re-checked by an independent AI that hunts for a thin "done" - then spend two minutes closing the loop into reusable cards so the next task starts ahead.
+Everything stays local-first. You paste a redacted description into the AI tools you already use; nothing is uploaded by this workspace. Redact before you paste: replace any real name, path, customer, or internal number with a placeholder. The loop works on a redacted description; it does not need the private original.
+What you need: one real task that is a bit messy, and one AI tool you can paste into. A second tool of a different model family (a different AI brand) makes Step 3 much stronger, but you can run all three rounds in one tool if that is all you have.
+Want the AI to prompt you for these steps on its own - to ping you to review every time it says "done", instead of you remembering to paste Step 3? Install the adapter into your tool's always-on instructions with \`node bin/ai-collab.js adapters install --target <repo>\`; it turns on the coaching reminders, and if you only have one tool it routes the completion-claim check through \`single-tool-guard\` (a fresh adversarial pass in the same tool).
+## Step 1 (2 min) - Define done before any work
+Paste this into your AI tool, with your own task in the brackets:
+\`\`\`text
+I have a task in front of me that is a bit messy. Do NOT write any implementation yet.
+Task (redacted): [describe your task in plain language; replace any private name, path, or number with a placeholder]
+Return two things:
+1) Boundary card: this run does only this one small slice; explicitly list what is NOT in scope.
+2) Acceptance card: a numbered list of hard, checkable standards (AC1, AC2, ...). Mark anything that would be out of scope.
+\`\`\`
+Expected: a boundary card and an acceptance card. You now have a written definition of "done" for your own task, before a line of work exists. This is the step people skip and then regret.
+## Step 2 (3 min) - Do only the accepted slice, then produce an Evidence Pack
+Paste this next, so the AI builds only what the acceptance card described and hands back a structured **Evidence Pack** the next round can actually check - not a prose "it's done":
+\`\`\`text
+Do only the work the acceptance card describes. Do not expand scope.
+When you are done, produce an "Evidence Pack" in exactly this shape (it is the artifact the re-check will judge):
+1) Changed files / diff: the list of files you changed, with the key diff hunks (or the full patch). If you changed nothing, say so.
+2) Commands run: the exact commands you ran to verify the work (tests, build, lint, a manual reproduction). If you ran none, write "none".
+3) Command output summary: the real output of each command (paste it, do not paraphrase), trimmed to the relevant lines.
+4) Exit codes: the exit code of each command (0 = passed). If a command failed, keep its non-zero code and error visible - do NOT hide it.
+5) Acceptance mapping: for each acceptance criterion (AC1, AC2, ...), say PASS / FAIL / NOT-VERIFIED and point to the evidence above that backs it.
+6) Not verified: everything you could NOT prove (edge cases, things you skipped, criteria with no command behind them).
+Do not claim "done" for anything that does not have evidence in this pack.
+\`\`\`
+Expected: an Evidence Pack with the six numbered parts above (changed files/diff, commands run, output summary, exit codes, acceptance mapping, not-verified). Keep this whole pack - it is exactly what the next round pressure-tests, and a missing or empty pack is itself a finding in Step 3.
+## Step 3 (3 min, the aha moment) - Independent re-check
+Open a fresh chat. Ideally use a different AI brand than the one that did Step 2 - a different model family is the pass most likely to catch what the first one missed. Paste this:
+\`\`\`text
+You are an independent reviewer. The work below claims to be done. Assume it is NOT done and prove it from the evidence, not the tone.
+Acceptance card: [paste your Step 1 acceptance card]
+Evidence Pack under review: [paste the Step 2 Evidence Pack: changed files/diff, commands run, output summary, exit codes, acceptance mapping, not-verified]
+Do this, in order:
+1) First check the Evidence Pack itself. If there is no Evidence Pack, or it is missing real command output / exit codes, or a claimed PASS has no command behind it, you CANNOT pass the work: return the verdict INSUFFICIENT_EVIDENCE and list exactly what evidence is missing. A confident "done" with no evidence is INSUFFICIENT_EVIDENCE, not pass.
+2) For each acceptance criterion, point to the exact line/output in the Evidence Pack that backs it, or say there is no evidence for it.
+3) Walk it the way a stranger would actually use it and say exactly where it breaks.
+4) List defects by severity, each pinned to a specific location.
+5) Pick the verdict: REJECT if an evidence-grounded hard defect exists; INSUFFICIENT_EVIDENCE if the pack cannot support a pass; pass only if every criterion is backed by real evidence.
+Return: verdict (pass / REJECT / INSUFFICIENT_EVIDENCE) + defect or missing-evidence list (with locations) + the smallest fix for each + what is still unverified.
+\`\`\`
+Expected (the aha): the independent reviewer first weighs your Evidence Pack. If Step 2 handed over a fluent "done" with no real evidence, it returns \`INSUFFICIENT_EVIDENCE\` and names what is missing; if the evidence exists but a criterion is not actually met, it returns \`REJECT\` with the defect pinned to a location - on your own task, not a tutorial's. Either way, that is the gap a single fluent chat would have hidden from you: no evidence pack means no pass.
+## Step 4 (2 min) - Close the loop so it compounds
+The re-check is the safety net; this step is where the loop starts paying you back. Keep it light - three short cards, not a report. Paste this:
+\`\`\`text
+Close out this task in three short cards. Keep each card to a few lines - do NOT write a long report.
+1) Handoff card (so the next session or tool resumes without re-explaining), three columns:
+   - Done: what is finished and evidence-backed.
+   - To do: what is left.
+   - Not verified: what was claimed but not proven (carry over anything the re-check flagged).
+2) Harvest card: one reusable lesson from this task, as a single sentence I could apply to a future task.
+3) Profile candidate (only if one applies): if a stable preference about how I want you to work showed up more than once, propose it as one line, with status \`proposed\`. Do NOT add it to my long-term profile yet. If nothing stable showed up, say "no profile candidate this time".
+\`\`\`
+Expected: a three-column handoff, a one-line harvest lesson, and either one \`proposed\` profile candidate or an explicit "none". Save the handoff and harvest cards into your workspace (\`../handoff/\` and \`../harvest/\`). A profile candidate does NOT go straight into your long-term profile - it lands in \`../profile/CANDIDATES.md\` as \`proposed\` first. It only moves into \`profile/EXAMPLE.synthetic.md\` (or your real profile) after you review it: mark it \`confirmed\` (use as-is), \`edited\` (reword first), or \`dropped\` (discard) in CANDIDATES.md, and only \`confirmed\`/\`edited\` ones graduate. That buffer is why one task makes the next one start ahead without an unreviewed guess hardening into a standing rule - you walk away with a re-checked result *and* something reusable, but nothing edits your profile behind your back.
+### Profile-candidate buffer (the state machine)
+A profile candidate is a guess about a standing preference. An unreviewed guess must not silently become a rule future sessions obey, so candidates move through four states in \`../profile/CANDIDATES.md\`:
+- \`proposed\` — the AI suggested it this loop; not yet trusted, not in your profile.
+- \`confirmed\` — you reviewed it and it is correct as written; it may now graduate into your profile.
+- \`edited\` — correct after you reword it; the edited line graduates, the original does not.
+- \`dropped\` — you reviewed it and it does not belong; it stays recorded as dropped so it is not re-proposed every loop.
+Rule: only \`confirmed\` and \`edited\` candidates graduate into your long-term profile, and only after you say so. \`proposed\` and \`dropped\` never edit your profile. Open \`../profile/CANDIDATES.md\` for the table and how to use it.
+Prefer to let the tool track this for you instead of hand-editing a table? The same four states are available as commands: \`ai-collab learning add --type profile --content "..."\` records the candidate (and \`--type harvest\` records the one-line lesson from card 2), then \`ai-collab learning confirm\` / \`learning edit\` / \`learning drop\` keep, reword, or discard it. Next time you run \`ai-collab status\`, it echoes back the one preference you most recently confirmed - so the next task literally starts with "still working the way you confirmed last time." Use the table or the commands, whichever you like; they share the same states, so you are never maintaining two systems.
+## Two-track comparison (optional, makes the point undeniable)
+Run your task once with no discipline first, then with the loop, and compare:
+1. Track A (no discipline): in a fresh chat, paste your messy task with no structure and just ask the AI to do it. Save the smooth "Sure, I will do X, Y, Z" reply. That smooth line is your real before-evidence, generated on your own task.
+2. Track B (the loop): the three steps above.
+3. Side by side: ask the AI to put both tracks into one table with four rows - scope, definition of done, completion claim, and what would have been missed. The messy half is real evidence from your own task, not something the tutorial invented.
+## Want the why behind each step
+This walkthrough is the operation card. For the reasoning behind each move and a longer copy-paste sequence to adapt, open \`../cookbook/run-a-first-loop.md\` (it runs this same loop on your own task and explains why each step exists). To turn Step 3 into a reusable habit on higher-stakes work, see \`../cookbook/review-a-half-product.md\` and \`../mechanisms/dual-guard/README.md\`.
+## Completion check
+You defined "done" before the work, had the AI do only that, had an independent AI re-check it against evidence, and closed the loop into a handoff card, a one-line harvest lesson, and (if one applied) a profile candidate - all on a real task of your own. You can name the exact place the re-check pointed to, and you leave with a re-checked result, reusable cards, and a habit (define done, do only that, get re-checked, then capture what is reusable) that makes your next task start ahead instead of from scratch.
+`;
+}
+export function walkthrough30() {
+  return `# 30-Minute Walkthrough
+Goal: adapt one layer to a real task.
+## Input
+Choose one current task and redact private identifiers.
+## Steps
+1. Open \`../context/TEMPLATE.md\`.
+2. Fill goal, current state, constraints, facts, assumptions, risks, and open questions.
+3. Open the adapter for your tool in \`../adapters/\`.
+4. Ask the tool to produce one acceptance card or review note from your context.
+## Expected output file
+One completed context package or acceptance card.
+## Completion check
+Another session can tell what the task is, what is out of scope, and what evidence is still missing.
+`;
+}
+export function walkthrough60() {
+  return `# 60-Minute Walkthrough
+Goal: run one complete AI collaboration loop.
+## Steps
+1. Fill a light profile.
+2. Package task context.
+3. Define acceptance.
+4. Run one execution prompt.
+5. Challenge the result with guard review.
+6. Write a handoff note.
+7. Extract one harvest seed.
+## Expected output files
+- profile card
+- context package
+- acceptance card
+- execution artifact
+- guard review
+- handoff note
+- harvest seed
+## Completion check
+The next AI session can resume without asking what happened, and the useful lesson is saved for future reuse.
+`;
+}
+export function syntheticTranscript() {
+  const item = caseDefinitions[0];
+  return `# Synthetic Loop Transcript
+This transcript demonstrates one complete loop using \`${item.id}\`.
+## Goal
+Show that one user can move from a messy starting point to context, acceptance, execution, guard review, handoff, and harvest without relying on a raw chat memory.
+## Expected output
+A complete artifact chain: context package, acceptance card, execution request, guard review result, handoff note, harvest seed, and a short comparison against single raw AI chat.
+## User
+${item.messy}
+## Context package
+${item.profileContext}
+## Acceptance card
+${item.acceptance}
+## Execution request
+${item.executionPrompt}
+## Guard review result
+${item.guardReview}
+## Handoff note
+${item.handoff}
+## Harvest seed
+${item.harvest}
+## Difference from raw chat
+${item.comparison}
+`;
+}