npm - cclaw-cli - Versions diffs - 0.8.0 → 0.9.0 - Mend

cclaw-cli 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/content/examples.d.ts +16 -0
package/dist/content/examples.js +212 -4
package/dist/content/harness-tool-refs.d.ts +20 -0
package/dist/content/harness-tool-refs.js +240 -0
package/dist/content/meta-skill.js +72 -4
package/dist/content/skills.js +63 -41
package/dist/content/stage-schema.js +29 -3
package/dist/content/templates.js +13 -3
package/dist/doctor.js +77 -0
package/dist/install.js +19 -0
package/package.json +1 -1

package/dist/content/examples.d.ts CHANGED Viewed

@@ -1,3 +1,19 @@
 import type { FlowStage } from "../types.js";
 export declare function stageGoodBadExamples(stage: FlowStage): string;
+export declare const STAGE_EXAMPLES_REFERENCE_DIR = "references/stages";
+export declare function stageExamplesReferencePath(stage: FlowStage): string;
+/**
+ * Returns the full example artifact body as a standalone reference markdown
+ * file. Materialized under .cclaw/references/stages/<stage>-examples.md so
+ * the always-rendered skill body can link instead of inlining.
+ */
+export declare function stageExamplesReferenceMarkdown(stage: FlowStage): string | null;
+/**
+ * Returns the short inline pointer rendered directly inside the stage skill.
+ * Replaces the previous always-inline ~50-100 line fenced block and
+ * delivers true progressive disclosure: the full example lives in a
+ * reference file loaded on demand.
+ */
 export declare function stageExamples(stage: FlowStage): string;
+export type ExampleDomain = "web" | "cli" | "library" | "data-pipeline";
+export declare function stageDomainExamples(stage: FlowStage): string;

package/dist/content/examples.js CHANGED Viewed

@@ -495,14 +495,29 @@ export function stageGoodBadExamples(stage) {
         ""
     ].join("\n");
 }
-export function stageExamples(stage) {
+export const STAGE_EXAMPLES_REFERENCE_DIR = "references/stages";
+export function stageExamplesReferencePath(stage) {
+    return `.cclaw/${STAGE_EXAMPLES_REFERENCE_DIR}/${stage}-examples.md`;
+}
+/**
+ * Returns the full example artifact body as a standalone reference markdown
+ * file. Materialized under .cclaw/references/stages/<stage>-examples.md so
+ * the always-rendered skill body can link instead of inlining.
+ */
+export function stageExamplesReferenceMarkdown(stage) {
     const examples = STAGE_EXAMPLES[stage];
     if (!examples)
-        return "";
+        return null;
     return [
-        "## Examples",
+        `---`,
+        `stage: ${stage}`,
+        `name: ${stage}-stage-examples`,
+        `description: "Full sample artifact for the ${stage} stage. Loaded only when an agent explicitly needs a complete example; the stage skill links here rather than inlining."`,
+        `---`,
         "",
-        "Concrete artifact samples. These mirror the exact heading levels agents must use when authoring the stage artifact (all H2 `##` sections), so they are presented inside a markdown fence to avoid collapsing into the SKILL outline.",
+        `# ${stage} stage — full artifact sample`,
+        "",
+        `This file is linked from \`.cclaw/skills/<${stage}-stage>/SKILL.md\` under **Examples → See also**. The sample uses H2 headings that mirror the artifact a cclaw session must produce, so the markdown is wrapped in a fence to avoid collapsing into the outline.`,
         "",
         "```markdown",
         examples,
@@ -510,3 +525,196 @@ export function stageExamples(stage) {
         ""
     ].join("\n");
 }
+/**
+ * Returns the short inline pointer rendered directly inside the stage skill.
+ * Replaces the previous always-inline ~50-100 line fenced block and
+ * delivers true progressive disclosure: the full example lives in a
+ * reference file loaded on demand.
+ */
+export function stageExamples(stage) {
+    const examples = STAGE_EXAMPLES[stage];
+    if (!examples)
+        return "";
+    return [
+        "## Examples",
+        "",
+        `Full artifact sample for this stage lives at \`${stageExamplesReferencePath(stage)}\`. Open it when you need a complete reference; do NOT paste the example into the artifact verbatim — it is a shape guide, not a template.`,
+        "",
+        "Summary of what the reference covers:",
+        ...exampleSummaryBullets(stage),
+        ""
+    ].join("\n");
+}
+function exampleSummaryBullets(stage) {
+    const headings = STAGE_EXAMPLE_SECTION_HEADINGS[stage] ?? [];
+    if (headings.length === 0)
+        return ["- Full artifact structure."];
+    return headings.map((heading) => `- ${heading}`);
+}
+// Kept in sync with STAGE_EXAMPLES above so the inline summary matches the
+// reference file without duplicating the heavy text. Update whenever the
+// sample in STAGE_EXAMPLES gains or loses a top-level section.
+const STAGE_EXAMPLE_SECTION_HEADINGS = {
+    brainstorm: [
+        "Problem framing (problem, success, constraints)",
+        "Candidate approaches with trade-offs",
+        "Recommended direction + open questions",
+        "Clarification log and decision record"
+    ],
+    scope: [
+        "In-scope / out-of-scope / deferred lists with concrete capabilities",
+        "Requirements table with stable R# IDs",
+        "Boundary stress-tests and non-negotiables",
+        "Decision record for premise challenges"
+    ],
+    design: [
+        "Blast-radius file list",
+        "Mandatory architecture diagram (Mermaid)",
+        "Failure-mode table with detection + mitigation",
+        "Test strategy + performance budget",
+        "Completion dashboard + unresolved decisions"
+    ],
+    spec: [
+        "Acceptance-criteria table (observable, measurable, falsifiable)",
+        "Requirement-ref column tying each AC back to an R# from scope",
+        "Verification-approach column",
+        "Approval block"
+    ],
+    plan: [
+        "Dependency graph + dependency waves",
+        "Task list with effort + minutes estimate per task",
+        "Acceptance mapping (every AC → task IDs)",
+        "No-Placeholder scan row + WAIT_FOR_CONFIRM marker"
+    ],
+    tdd: [
+        "RED evidence per slice (failing test output)",
+        "Acceptance mapping per slice",
+        "GREEN evidence (full-suite pass)",
+        "REFACTOR notes with behavior-preservation confirmation",
+        "Test-pyramid shape + prove-it reproduction when applicable"
+    ],
+    review: [
+        "Spec-compliance findings (Layer 1)",
+        "Code-quality findings (Layer 2)",
+        "Severity, evidence, and status per finding",
+        "Go / no-go verdict"
+    ],
+    ship: [
+        "Release checklist (version, changelog, tag, artifacts)",
+        "Rollback plan with trigger, steps, verification",
+        "Runbook (how to verify the release post-deploy)",
+        "Sign-off block"
+    ]
+};
+const DOMAIN_LABELS = {
+    web: "Web app (full-stack)",
+    cli: "CLI tool",
+    library: "Library / SDK",
+    "data-pipeline": "Data pipeline / ETL"
+};
+const STAGE_DOMAIN_SAMPLES = {
+    spec: [
+        {
+            domain: "web",
+            label: "AC",
+            body: "AC-W1: Given a signed-in admin viewing `/dashboard/orders`, when an order's status changes server-side, the row updates within 2s without a full navigation (assert via `pnpm playwright test orders-live.spec.ts`)."
+        },
+        {
+            domain: "cli",
+            label: "AC",
+            body: "AC-C1: Given `cclaw init --claude` run in an empty directory, exit code is `0`, `.cclaw/config.yaml` is created with `harnesses: [claude]`, and stderr contains no warnings (asserted by `tests/integration/init-sync-doctor.test.ts`)."
+        },
+        {
+            domain: "library",
+            label: "AC",
+            body: "AC-L1: `validateHookDocument(obj)` returns `{ ok: true }` for every fixture under `tests/fixtures/valid-hooks/` and `{ ok: false, errors: [...] }` with at least one message for every fixture under `tests/fixtures/invalid-hooks/`."
+        },
+        {
+            domain: "data-pipeline",
+            label: "AC",
+            body: "AC-D1: For any `orders.csv` input, the pipeline emits exactly one row per `(order_id, event_ts)` pair to `warehouse.fact_orders`; running the job twice on the same input is idempotent (row count unchanged, verified by `dbt test --select fact_orders`)."
+        }
+    ],
+    plan: [
+        {
+            domain: "web",
+            label: "Task",
+            body: "T-W-3 `[~4m]`: Wire SSE endpoint `/api/orders/stream` into `useOrderFeed` hook. AC-W1. Verify: `pnpm playwright test orders-live.spec.ts`. Depends on: T-W-2."
+        },
+        {
+            domain: "cli",
+            label: "Task",
+            body: "T-C-2 `[~3m]`: Add `--dry-run` flag to `cclaw archive` that prints the would-be-archived run IDs to stdout and exits 0. AC-C3. Verify: `node dist/cli.js archive --dry-run` + `tests/unit/cli-parse.test.ts`."
+        },
+        {
+            domain: "library",
+            label: "Task",
+            body: "T-L-1 `[~5m]`: Expose `validateHookDocument` from the package root and re-export its types. AC-L1. Verify: `pnpm build && node -e \"console.log(require('./dist').validateHookDocument)\"`."
+        },
+        {
+            domain: "data-pipeline",
+            label: "Task",
+            body: "T-D-2 `[~5m]`: Add dedup step keyed on `(order_id, event_ts)` between `raw.orders` and `fact_orders`. AC-D1. Verify: `dbt run --select fact_orders+ && dbt test --select fact_orders`."
+        }
+    ],
+    tdd: [
+        {
+            domain: "web",
+            label: "RED→GREEN→REFACTOR",
+            body: "RED: `pnpm playwright test orders-live.spec.ts` → timeout waiting for row update. GREEN: wired SSE event → row rerenders via `useOrderFeed`. REFACTOR: extracted `applyOrderEvent(row, event)` pure helper; 87/87 tests still pass."
+        },
+        {
+            domain: "cli",
+            label: "RED→GREEN→REFACTOR",
+            body: "RED: `tests/unit/cli-parse.test.ts` expects `--dry-run` flag → `unknown option` error. GREEN: added to the Zod parser; 19/19 pass. REFACTOR: hoisted the dry-run formatter into `src/cli/format.ts` shared with `status`."
+        },
+        {
+            domain: "library",
+            label: "RED→GREEN→REFACTOR",
+            body: "RED: `tests/unit/hook-schema.test.ts` imports `validateHookDocument` from package root → `export not found`. GREEN: added re-export + types. REFACTOR: renamed internal `__validate` to `validateHookDocument` so the export name matches the source."
+        },
+        {
+            domain: "data-pipeline",
+            label: "RED→GREEN→REFACTOR",
+            body: "RED: `dbt test --select fact_orders` → `unique test on (order_id, event_ts)` fails on re-run. GREEN: added `row_number()` dedup in the staging model. REFACTOR: extracted the dedup CTE into `int_orders_deduped` for reuse by `fact_returns`."
+        }
+    ],
+    ship: [
+        {
+            domain: "web",
+            label: "Rollback",
+            body: "Trigger: error rate on `/api/orders/stream` > 2% for 5 minutes, or p95 latency > 1.5s for 10 minutes. Steps: `vercel rollback <deployment>`; run `2026_04_14_revert_orders_stream.sql` before traffic returns. Verify: error rate returns to baseline within 10 minutes on the `orders-live` dashboard."
+        },
+        {
+            domain: "cli",
+            label: "Rollback",
+            body: "Trigger: `cclaw init --claude` exits non-zero on a fresh tmp dir, OR `cclaw doctor` regresses (FAIL count increases) on the smoke matrix. Steps: `npm unpublish cclaw-cli@<version>` (within the 72h window) or `npm deprecate cclaw-cli@<version> '<reason>'`; publish the previous patch. Verify: `npx cclaw-cli@latest --version` prints the previous version."
+        },
+        {
+            domain: "library",
+            label: "Rollback",
+            body: "Trigger: any consumer reports `validateHookDocument` no longer exported, OR the CI `dual-package-check` job fails. Steps: `npm deprecate cclaw-cli@<version> 'broken package export — use <prev>'`; publish the previous minor with a patch bump; emit changelog `## Rollback` entry. Verify: a smoke consumer project `pnpm add cclaw-cli@latest` imports cleanly."
+        },
+        {
+            domain: "data-pipeline",
+            label: "Rollback",
+            body: "Trigger: `dbt test --select fact_orders` fails on production run, OR downstream dashboard MAU count drops >10% week-over-week. Steps: disable the new model via `dbt_project.yml` + `dbt run --select state:modified` with the previous git SHA; rerun backfill `dagster asset materialize fact_orders --partition <yesterday>`. Verify: `fact_orders` row count within ±1% of the previous week's baseline."
+        }
+    ]
+};
+export function stageDomainExamples(stage) {
+    const samples = STAGE_DOMAIN_SAMPLES[stage];
+    if (!samples || samples.length === 0)
+        return "";
+    const lines = [
+        "## Living Examples by Domain",
+        "",
+        "Use the row matching your project shape to calibrate voice, specificity, and command choice. The rows are deliberately terse — copy the **shape**, not the text.",
+        ""
+    ];
+    for (const sample of samples) {
+        lines.push(`**${DOMAIN_LABELS[sample.domain]} — ${sample.label}:** ${sample.body}`);
+        lines.push("");
+    }
+    return lines.join("\n");
+}

package/dist/content/harness-tool-refs.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+/**
+ * Per-harness tool-mapping reference files.
+ *
+ * Addresses A.1#4: the four supported harnesses (claude, cursor, opencode, codex)
+ * expose different primitive names for the same capabilities (ask-user,
+ * delegate/Task, web fetch, file edit, code execution, ...). cclaw's stage skills
+ * need to pick the right name at runtime without bloating every stage with per-harness
+ * if/else ladders.
+ *
+ * Each file below is short (one table per capability), authoritative, and materialised
+ * at `.cclaw/references/harness-tools/<harness>.md`. Stage skills and the meta-skill
+ * cite the folder instead of duplicating the mappings inline.
+ *
+ * When a new harness is added (or an existing one renames a tool), update the
+ * corresponding entry here — do NOT scatter tool names across skill text.
+ */
+import type { HarnessId } from "../types.js";
+export declare const HARNESS_TOOL_REFS_DIR = "references/harness-tools";
+export declare function harnessToolRefMarkdown(harness: HarnessId): string;
+export declare const HARNESS_TOOL_REFS_INDEX_MD = "---\nname: Harness tool maps\ndescription: \"Index file. One reference per supported harness \u2014 cite the per-harness file instead of hardcoding tool names in stage skills.\"\n---\n\n# Harness Tool Maps\n\ncclaw supports four harnesses; each exposes different primitive names for the same capabilities. Stage skills and utility skills cite the file matching the currently active harness and fall back to plain-text equivalents for capabilities that the harness lacks.\n\n| Harness | File | Notes |\n|---|---|---|\n| Claude Code | `.cclaw/references/harness-tools/claude.md` | Richest tool surface (AskUserQuestion, Task, WebFetch, WebSearch, MCP, \u2026). |\n| Cursor | `.cclaw/references/harness-tools/cursor.md` | Near-parity with Claude; uses `AskQuestion` instead of `AskUserQuestion`. |\n| OpenCode | `.cclaw/references/harness-tools/opencode.md` | No native ask-user / dispatch; more plain-text fallbacks. |\n| Codex | `.cclaw/references/harness-tools/codex.md` | No native ask-user / dispatch; shell + file I/O only by default. |\n\nWhen a new harness is added or an existing one renames a tool, update the corresponding file (and this index) \u2014 do NOT scatter tool names across skill text.\n";

package/dist/content/harness-tool-refs.js ADDED Viewed

@@ -0,0 +1,240 @@
+/**
+ * Per-harness tool-mapping reference files.
+ *
+ * Addresses A.1#4: the four supported harnesses (claude, cursor, opencode, codex)
+ * expose different primitive names for the same capabilities (ask-user,
+ * delegate/Task, web fetch, file edit, code execution, ...). cclaw's stage skills
+ * need to pick the right name at runtime without bloating every stage with per-harness
+ * if/else ladders.
+ *
+ * Each file below is short (one table per capability), authoritative, and materialised
+ * at `.cclaw/references/harness-tools/<harness>.md`. Stage skills and the meta-skill
+ * cite the folder instead of duplicating the mappings inline.
+ *
+ * When a new harness is added (or an existing one renames a tool), update the
+ * corresponding entry here — do NOT scatter tool names across skill text.
+ */
+export const HARNESS_TOOL_REFS_DIR = "references/harness-tools";
+const CLAUDE_TOOLS_MD = `---
+harness: claude
+name: Claude Code tool map
+description: "Canonical mapping of cclaw capability names → Claude Code tool names. Cited by stage skills; do not duplicate in per-stage text."
+---
+# Claude Code — Tool Map
+Use this file as the single source of truth for which Claude Code tool to call when a cclaw skill references a generic capability.
+## Core capabilities
+| cclaw capability | Claude Code tool | Notes |
+|---|---|---|
+| Ask user a structured question | \`AskUserQuestion\` | Max 4 options; lettered labels ≤12 chars. Fall back to plain-text lettered list on schema error. |
+| Dispatch a subagent (read-only or write) | \`Task\` with \`subagent_type\` | \`explore\` = read-only; \`generalPurpose\` = read-write. Background via \`run_in_background: true\`. |
+| Read file | \`Read\` | Prefer this over \`cat\` / \`head\` / \`tail\`. |
+| Edit file | \`StrReplace\` (exact match) or \`Write\` (overwrite) | Always \`Read\` before editing; avoid \`sed\`/\`awk\` unless asked. |
+| Create file | \`Write\` | Reject if the task can be solved by editing an existing file. |
+| Search file contents | \`Grep\` (ripgrep-backed) | Use \`output_mode: files_with_matches\` for file lists. |
+| Find files by name / glob | \`Glob\` | Pattern matches mtime-sorted. |
+| Shell command | \`Shell\` | Background long-running jobs with \`block_until_ms: 0\`; poll with \`Await\`. |
+| Fetch URL | \`WebFetch\` | Returns markdown. No auth, no binaries. |
+| Web search | \`WebSearch\` | Use for docs, real-time info, version lookups. |
+| Semantic code search | \`SemanticSearch\` | One directory per call; whole-repo via \`[]\`. |
+| Todo tracking | \`TodoWrite\` | Use \`merge: true\` to update; keep one task \`in_progress\`. |
+| Ask tool (multi-question) | \`AskQuestion\` (Cursor-only, unavailable in Claude) | NOT available in Claude — use \`AskUserQuestion\` instead. |
+| MCP tool call | \`CallMcpTool\` | Always read the tool's schema descriptor first. |
+## Decision-protocol mapping
+When a stage skill says "ask the user a structured question", in Claude Code that means:
+\`\`\`
+AskUserQuestion({
+  questions: [{
+    id: "...",
+    prompt: "One-sentence decision, plain English",
+    options: [
+      { id: "a", label: "Short label" },   // ≤12 chars
+      { id: "b", label: "Alt label" },
+      { id: "c", label: "Recommended" }
+    ]
+  }]
+})
+\`\`\`
+One question per call. Never batch.
+## Escalation / fall-back
+If a tool returns a schema error twice in a row (see the meta-skill's Error / Retry Budget), switch to plain-text equivalents:
+- \`AskUserQuestion\` → write a numbered list in the response, wait for reply.
+- \`Task\` (dispatch) → inline the work in the current turn.
+- \`WebFetch\` → ask the user for the URL's content.
+`;
+const CURSOR_TOOLS_MD = `---
+harness: cursor
+name: Cursor tool map
+description: "Canonical mapping of cclaw capability names → Cursor agent tool names. Cited by stage skills; do not duplicate in per-stage text."
+---
+# Cursor — Tool Map
+Use this file as the single source of truth for which Cursor agent tool to call when a cclaw skill references a generic capability.
+## Core capabilities
+| cclaw capability | Cursor tool | Notes |
+|---|---|---|
+| Ask user a structured question | \`AskQuestion\` | \`questions\` is an array; each question has \`id\`, \`prompt\`, \`options\`, optional \`allow_multiple\`. |
+| Dispatch a subagent | \`Task\` with \`subagent_type\` | Available types: \`generalPurpose\`, \`explore\` (readonly), \`shell\`, \`browser-use\`, \`best-of-n-runner\`. |
+| Read file | \`Read\` | Line-numbered output; avoid \`cat\` / \`head\` / \`tail\`. |
+| Edit file | \`StrReplace\` | Unique \`old_string\` required; use \`replace_all: true\` for bulk renames. |
+| Create file | \`Write\` | Prefer editing existing files. |
+| Search file contents | \`Grep\` (ripgrep-backed) | Output modes: \`content\`, \`files_with_matches\`, \`count\`. |
+| Find files by name / glob | \`Glob\` | Auto-prepends \`**/\` when pattern does not start with it. |
+| Shell command | \`Shell\` | Long-running jobs go to background via \`block_until_ms: 0\`; poll with \`Await\`. |
+| Fetch URL | \`WebFetch\` | Markdown output. |
+| Web search | \`WebSearch\` | Use for real-time info, framework docs, news. |
+| Semantic code search | \`SemanticSearch\` | Prefer for exploratory "how does X work?" queries. |
+| Todo tracking | \`TodoWrite\` | Supports \`merge: true\` for partial updates. |
+| Generate image | \`GenerateImage\` | Only on explicit user request. |
+| Ask structured questions (Claude-style) | \`AskUserQuestion\` | NOT available in Cursor — use \`AskQuestion\`. |
+| MCP tool call | \`CallMcpTool\` | Cursor exposes MCP tools via this wrapper; read the descriptor first. |
+| Jupyter notebook edit | \`EditNotebook\` | Use for \`.ipynb\` only; cell-granular edits. |
+| Mode switching | \`SwitchMode\` | Propose plan/agent mode changes when task character shifts. |
+## Decision-protocol mapping
+In Cursor, structured asks look like:
+\`\`\`
+AskQuestion({
+  questions: [{
+    id: "...",
+    prompt: "One-sentence decision",
+    options: [
+      { id: "a", label: "Option A" },
+      { id: "b", label: "Option B" }
+    ]
+  }]
+})
+\`\`\`
+## Escalation / fall-back
+On repeated tool errors, fall back to plain-text equivalents just like Claude — see the meta-skill's Error / Retry Budget.
+`;
+const OPENCODE_TOOLS_MD = `---
+harness: opencode
+name: OpenCode tool map
+description: "Canonical mapping of cclaw capability names → OpenCode primitives. Cited by stage skills; do not duplicate in per-stage text."
+---
+# OpenCode — Tool Map
+OpenCode exposes a leaner tool surface than Claude Code / Cursor. When a cclaw skill describes a capability that OpenCode lacks, fall back to the plain-text equivalent listed below.
+## Core capabilities
+| cclaw capability | OpenCode primitive | Notes |
+|---|---|---|
+| Ask user a structured question | **Not available as a tool.** | Emit a plain-text numbered list: \`A) ... B) ... C) (recommended) ...\`. Wait for the user's letter. |
+| Dispatch a subagent | **Not available as a tool.** | Inline the work in the current turn, or split across multiple turns with the user driving. |
+| Read file | file-read primitive | Same role as \`Read\`. |
+| Edit file | file-edit primitive | Same role as \`StrReplace\`; confirm diff before writing. |
+| Create file | file-write primitive | Prefer editing existing files. |
+| Search file contents | \`rg\` via shell | Cite \`rg\` output verbatim as evidence when a skill requires a grep result. |
+| Find files by name / glob | \`fd\` or \`find\` via shell | Capture the command + output. |
+| Shell command | shell primitive | Long-running jobs require explicit background + polling — check the OpenCode docs for \`&\` semantics. |
+| Fetch URL | \`curl\` via shell | No markdown conversion; extract manually. |
+| Web search | **Not available.** | Ask the user to paste docs or provide a URL, then fetch via shell. |
+| Todo tracking | **Not available as a tool.** | Maintain a \`### TODO\` block inline in your response; keep one item in progress. |
+| MCP tool call | Depends on runtime config. | If MCP is enabled, use the documented invocation; otherwise treat as unavailable. |
+## Decision-protocol mapping
+\`\`\`
+Decision: <one sentence>.
+A) <label> — <trade-off>
+B) <label> — <trade-off>
+C) <label> — <trade-off>  (recommended, because <one-line reason>)
+Please reply with the letter.
+\`\`\`
+## Escalation / fall-back
+Because OpenCode lacks native ask-user and dispatch tools, more of cclaw's protocols degrade to plain text. This is expected — the flow gates and artifacts are identical; only the delivery channel changes.
+`;
+const CODEX_TOOLS_MD = `---
+harness: codex
+name: Codex tool map
+description: "Canonical mapping of cclaw capability names → Codex CLI primitives. Cited by stage skills; do not duplicate in per-stage text."
+---
+# Codex — Tool Map
+Codex (OpenAI Codex CLI) exposes roughly the same core surface as OpenCode: file I/O, shell, no native ask-user, no dispatch. Fall back to plain text for anything else.
+## Core capabilities
+| cclaw capability | Codex primitive | Notes |
+|---|---|---|
+| Ask user a structured question | **Not available as a tool.** | Emit a plain-text lettered list; wait for the user's reply. |
+| Dispatch a subagent | **Not available as a tool.** | Inline the work; split turns if needed. |
+| Read file | \`read\` / \`open\` primitive | Same role as \`Read\`. |
+| Edit file | \`edit\` / \`patch\` primitive | Same role as \`StrReplace\`. |
+| Create file | \`write\` primitive | Prefer editing existing files. |
+| Search file contents | \`rg\` via shell | Capture command + output verbatim. |
+| Find files by name / glob | \`fd\` / \`find\` / \`ls\` via shell | Capture command + output. |
+| Shell command | shell primitive | Codex CLI may restrict some binaries by default — check the effective permissions. |
+| Fetch URL | \`curl\` via shell | Extract markdown manually. |
+| Web search | **Not available.** | Ask user for docs / URL. |
+| Todo tracking | **Not available as a tool.** | Keep an inline \`### TODO\` section; update it as you progress. |
+| MCP tool call | Depends on runtime config. | If MCP is wired, cite the descriptor; otherwise treat as unavailable. |
+## Decision-protocol mapping
+\`\`\`
+Decision: <one sentence>.
+A) <label> — <trade-off>
+B) <label> — <trade-off>  (recommended, because <reason>)
+C) <label> — <trade-off>
+Please reply with the letter.
+\`\`\`
+## Escalation / fall-back
+Treat missing tools as "plain-text required", not "skip the step". The gate still has to pass; only the channel changes.
+`;
+const HARNESS_TOOL_REFS = {
+    claude: CLAUDE_TOOLS_MD,
+    cursor: CURSOR_TOOLS_MD,
+    opencode: OPENCODE_TOOLS_MD,
+    codex: CODEX_TOOLS_MD
+};
+export function harnessToolRefMarkdown(harness) {
+    return HARNESS_TOOL_REFS[harness];
+}
+export const HARNESS_TOOL_REFS_INDEX_MD = `---
+name: Harness tool maps
+description: "Index file. One reference per supported harness — cite the per-harness file instead of hardcoding tool names in stage skills."
+---
+# Harness Tool Maps
+cclaw supports four harnesses; each exposes different primitive names for the same capabilities. Stage skills and utility skills cite the file matching the currently active harness and fall back to plain-text equivalents for capabilities that the harness lacks.
+| Harness | File | Notes |
+|---|---|---|
+| Claude Code | \`.cclaw/${HARNESS_TOOL_REFS_DIR}/claude.md\` | Richest tool surface (AskUserQuestion, Task, WebFetch, WebSearch, MCP, …). |
+| Cursor | \`.cclaw/${HARNESS_TOOL_REFS_DIR}/cursor.md\` | Near-parity with Claude; uses \`AskQuestion\` instead of \`AskUserQuestion\`. |
+| OpenCode | \`.cclaw/${HARNESS_TOOL_REFS_DIR}/opencode.md\` | No native ask-user / dispatch; more plain-text fallbacks. |
+| Codex | \`.cclaw/${HARNESS_TOOL_REFS_DIR}/codex.md\` | No native ask-user / dispatch; shell + file I/O only by default. |
+When a new harness is added or an existing one renames a tool, update the corresponding file (and this index) — do NOT scatter tool names across skill text.
+`;

package/dist/content/meta-skill.js CHANGED Viewed

@@ -209,10 +209,7 @@ When a stage requires user input (approval, choice, direction):
 1. **State the decision** in one sentence.
 2. **Present options** as labeled choices (A, B, C...), one-line each, with trade-off / consequence.
 3. **Mark one option \`(recommended)\`** with a one-line reason. Do NOT use numeric "Completeness" rubrics — pick the option that best closes the decision with the smallest blast radius, lowest irreversible risk, and clearest evidence.
-4. **Use the harness ask-user tool when available:**
-   - Claude Code: \`AskUserQuestion\`
-   - Cursor: \`AskQuestion\` (options array)
-   - Codex/OpenCode: numbered list in plain text (no native ask tool).
+4. **Use the harness ask-user tool when available.** For the exact tool name and fallback, consult \`.cclaw/references/harness-tools/<harness>.md\` (one file per supported harness — claude, cursor, opencode, codex). Summary: Claude Code → \`AskUserQuestion\`; Cursor → \`AskQuestion\`; OpenCode / Codex → plain-text lettered list.
 5. **Wait for response.** Do not proceed until the user picks.
 6. **Commit to the choice.** Once decided, do not re-argue.
@@ -236,6 +233,43 @@ When a stage requires user input (approval, choice, direction):
 If the same approach fails three times in a row (same verification command, same review finding, same tool invocation), STOP and escalate: summarize what you tried, what evidence you have, what hypothesis you are now testing, and ask the user how to proceed. Do not invent a new angle silently on the fourth attempt.
+### Shared Stage Completion Protocol
+Every stage skill ends with a completion block parameterized by four values: \`next\` (next stage or \`done\`), \`gates\` (gate IDs to mark passed), \`artifact\` (file under \`.cclaw/artifacts/\`), and \`mandatory\` (agents required by delegation enforcement). Stage skills print their **Completion Parameters** and then defer to this procedure — do NOT re-print the full procedure per stage.
+When all required gates are satisfied and the artifact is written, execute **in this exact order**:
+0. **Delegation pre-flight** (BLOCKING, only when \`mandatory\` is non-empty).
+   - For each agent in \`mandatory\`: confirm it was dispatched (via Task/delegate) and completed, OR record an explicit waiver with reason in \`.cclaw/state/delegation-log.json\`.
+   - Write a JSON entry per agent: \`{ "stage": "<stage>", "agent": "<name>", "mode": "mandatory", "status": "completed"|"waived", "waiverReason": "<if waived>", "ts": "<ISO timestamp>" }\`.
+   - If the harness does not support delegation, record status \`"waived"\` with reason \`"harness_limitation"\`.
+   - **Do NOT proceed to step 1 until every mandatory agent has an entry in the delegation log.**
+1. **Update \`.cclaw/state/flow-state.json\`:**
+   - Set \`currentStage\` to \`next\` (or leave unchanged when \`next === "done"\`).
+   - Add the current stage to \`completedStages\`.
+   - Move every gate ID in \`gates\` into \`stageGateCatalog.<stage>.passed\`.
+   - Clear \`stageGateCatalog.<stage>.blocked\`.
+   - For each passed gate, add an entry to \`guardEvidence\`: \`"<gate_id>": "<artifact path or excerpt proving the gate>"\`. Do NOT leave \`guardEvidence\` empty.
+2. **Persist artifact** at \`.cclaw/artifacts/<artifact>\`. Do NOT manually copy into \`.cclaw/runs/\`; archival is handled by \`cclaw archive\`.
+3. **Doctor pre-flight** — run \`npx cclaw doctor\` (or the installed cclaw binary). If any check fails, resolve the issue (missing delegation entry, artifact section, gate evidence) and re-run until all checks pass. Do NOT proceed while doctor reports failures.
+4. **Tell the user** (verbatim when \`next\` is a stage; use the flow-complete variant when \`next === "done"\`):
+   > **Stage \`<stage>\` complete.** Next: **<next>** — <one-line next-stage description>.
+   >
+   > Run \`/cc-next\` to continue.
+   Flow-complete variant:
+   > **Flow complete.** All stages finished. The project is ready for release.
+5. **STOP.** Do not load the next stage skill yourself. The user will run \`/cc-next\` when ready (same session or new session).
+### Shared Resume Protocol
+When resuming a stage in a NEW session (artifact exists but gates are not all passed in \`flow-state.json\`):
+1. Read the existing artifact and mark every gate whose evidence is already present in the artifact.
+2. For each unverified gate, ask the user to confirm ONE gate at a time. Do NOT batch multiple gate confirmations in a single message.
+3. Update \`guardEvidence\` for each confirmed gate before proceeding to the next unverified gate.
 ## </EXTREMELY-IMPORTANT>
 ## Invocation Preamble (per turn, non-trivial tasks)
@@ -255,6 +289,40 @@ The preamble exists to prevent silent drift from the user's ask. If the preamble
 Do not re-emit the preamble on every subsequent tool call — once per user turn is sufficient. If the user message changes the goal mid-execution, emit a fresh preamble before acting on the new direction.
+## Engineering Ethos
+Three guardrails apply to every stage, every turn. Internalise them — they trump speed, cleverness, and novelty:
+### Search Before Building
+Before writing new code, a new skill, a new abstraction, or a new artifact section, spend 60–120 seconds checking whether the thing already exists. Order of search:
+1. **Project artifacts** — \`.cclaw/artifacts/**\`, \`docs/**\`, root-level \`README.md\` / \`SPEC.md\` / \`DESIGN.md\`.
+2. **Project knowledge** — \`.cclaw/knowledge.jsonl\` (lessons with matching \`domain\` / \`trigger\`).
+3. **Codebase** — \`rg\` / \`Grep\` for the symbol, function, test, or comment that describes what you're about to add.
+4. **Framework/library primitives** — prefer a stdlib or framework-native affordance over a handwritten helper.
+5. **Existing skill or stage rule** — \`.cclaw/skills/**/SKILL.md\` and \`.cclaw/commands/**/*.md\`.
+Only after the first four turn up nothing do you build. Every duplicate helper, redefined type, parallel-but-incompatible artifact section, or re-discovered lesson is a tax on the next five sessions. Record the negative search result (what you looked for, where, and why nothing fit) in the turn's preamble or the stage artifact so future agents don't repeat the hunt.
+### Boil the Lake (scoped minimum-sweep rule)
+"Boil the lake" normally means wasteful, exhaustive work. **cclaw inverts the phrase**: within the current stage, you are expected to sweep *the defined surface exhaustively* — not to stop at the first plausible answer.
+- In \`brainstorm\` / \`scope\` — enumerate every viable approach in the defined option space; name the ones you rejected and why.
+- In \`design\` — trace every data-flow and failure edge across the chosen component boundary, not just the happy path.
+- In \`spec\` — list every acceptance criterion for the in-scope surface; "and similar" / "etc." is banned.
+- In \`tdd\` — exercise every branch / error path / boundary of the slice under test, not only the canonical case.
+- In \`review\` — audit every file touched in the diff, not just the files named in the spec.
+The sweep is bounded by the stage's declared surface. Expanding the surface is a Decision Protocol question, not a silent enlargement.
+### Do Less, Prove More
+When in doubt between adding code / scope / artifact sections and cutting them, cut. The flow already forces you to justify each stage's output — volume is never a proxy for quality. One acceptance criterion with captured evidence beats five without; one labeled architecture diagram beats three generic boxes-and-arrows; one REFACTOR note explaining a concrete trade-off beats a paragraph of filler.
+If a rule, template section, or agent feels ornamental, flag it in \`Operational Self-Improvement\` and propose removal — cclaw's invariant is that every section must pay its tokens back by preventing a specific failure mode.
 ## Operational Self-Improvement (auto-learn)
 cclaw treats **lived friction** as first-class knowledge. When you observe one of the triggers below during a session, append a single JSONL line to \`.cclaw/knowledge.jsonl\` via \`/cc-learn add\` (or queue it for the next \`/cc-learn\` call) — do NOT let the signal evaporate when the session ends.

package/dist/content/skills.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { RUNTIME_ROOT } from "../constants.js";
-import { stageExamples, stageGoodBadExamples } from "./examples.js";
+import { stageDomainExamples, stageExamples, stageGoodBadExamples } from "./examples.js";
 import { selfImprovementBlock } from "./learnings.js";
 import { stageAutoSubagentDispatch, stageSchema } from "./stage-schema.js";
 function rationalizationTable(stage) {
@@ -155,60 +155,81 @@ function waveExecutionModeBlock(stage) {
 After plan approval (**WAIT_FOR_CONFIRM** / \`plan_wait_for_confirm\` satisfied), process **all tasks in the current dependency wave** sequentially: **RED → GREEN → REFACTOR** per task, recording evidence per slice. **Stop** only on **BLOCKED**, a test failure that **requires user input**, or **wave completion** (every task in the wave has the required RED / GREEN / REFACTOR evidence per the plan artifact).
+### Walkthrough — Wave 1 with 3 tasks
+The example below is **illustrative only** — do not copy the command names blindly, match them to your stack.
+Assume Wave 1 from the plan artifact contains three tasks:
+| Task ID | Description | AC | Verification |
+|---|---|---|---|
+| T-1 \`[~3m]\` | Add \`User.emailNormalized\` column | AC-1 | \`npm test -- users/schema\` |
+| T-2 \`[~4m]\` | Normalize on write in \`UserRepo.save\` | AC-1 | \`npm test -- users/repo\` |
+| T-3 \`[~3m]\` | Reject duplicates in \`UserService.signup\` | AC-2 | \`npm test -- users/service\` |
+**Execution transcript** (one slice at a time, evidence captured per step):
+**T-1 — RED**
+> Run: \`npm test -- users/schema\` → **FAIL** (missing column: \`emailNormalized\`). Captured the failure stack as RED evidence. No production code touched yet.
+**T-1 — GREEN**
+> Added the column in the schema module. Re-ran \`npm test -- users/schema\` → **PASS**. Ran the full suite \`npm test\` → **PASS**. Captured both outputs as GREEN evidence.
+**T-1 — REFACTOR**
+> Extracted the column definition into a shared \`NormalizedEmail\` type used by T-2/T-3. Re-ran \`npm test\` → **PASS**. Captured REFACTOR note: "Extracted NormalizedEmail type to keep T-2/T-3 DRY; zero behavior change, all tests still green."
+**T-2 — RED / GREEN / REFACTOR**: same shape — write the repo test that expects normalised writes, watch it fail (RED), implement normalisation inside \`UserRepo.save\` only (GREEN), then refactor the normaliser out of the repo into a helper shared with T-3 (REFACTOR).
+**T-3 — RED / GREEN / REFACTOR**: write the service-level duplicate test that expects a rejection, watch it fail (RED), add the duplicate check in \`UserService.signup\` (GREEN), refactor the error message into a named constant (REFACTOR).
+**Wave gate check**
+After T-3 REFACTOR, before declaring Wave 1 done:
+1. Run the **full suite** (\`npm test\`) one final time → **PASS** captured as wave-exit evidence.
+2. Verify the TDD artifact contains RED, GREEN, and REFACTOR evidence for T-1, T-2, **and** T-3. No partial waves.
+3. Only now mark Wave 1 complete. Wave 2 cannot start until this step.
+**When to stop mid-wave (do NOT push through)**
+- A RED test fails for a reason you did not predict (e.g. an unrelated flaky test) → **pause**, diagnose, log an operational-self-improvement entry, and decide with the user before proceeding.
+- A GREEN step would require touching code outside the task's acceptance criterion → **pause**, the task is scoped wrong; adjust the plan or open a follow-up task.
+- The same RED failure reappears after a GREEN change → **escalate** per the 3-attempts rule; do not keep patching.
 `;
 }
 function stageCompletionProtocol(schema) {
     const stage = schema.stage;
     const gateIds = schema.requiredGates.map((g) => g.id);
     const gateList = gateIds.map((id) => `\`${id}\``).join(", ");
-    const nextStage = schema.next === "done" ? null : schema.next;
+    const nextStage = schema.next === "done" ? "done" : schema.next;
     const mandatory = schema.mandatoryDelegations;
-    const delegationLogRel = `${RUNTIME_ROOT}/state/delegation-log.json`;
-    const stateUpdate = nextStage
-        ? `   - Set \`currentStage\` to \`"${nextStage}"\`
-   - Add \`"${stage}"\` to \`completedStages\` array
-   - Move all gate IDs for this stage (${gateList}) into \`stageGateCatalog.${stage}.passed\`
-   - Clear \`stageGateCatalog.${stage}.blocked\``
-        : `   - Add \`"${stage}"\` to \`completedStages\` array
-   - Move all gate IDs for this stage (${gateList}) into \`stageGateCatalog.${stage}.passed\`
-   - Clear \`stageGateCatalog.${stage}.blocked\``;
-    const delegationBlock = mandatory.length > 0
-        ? `0. **Delegation pre-flight** (BLOCKING):
-   - Mandatory agents for this stage: ${mandatory.map((a) => `\`${a}\``).join(", ")}.
-   - For each mandatory agent: confirm it was dispatched (via Task/delegate) and completed, OR record an explicit waiver with reason in \`${delegationLogRel}\`.
-   - Write a JSON entry per agent: \`{ "stage": "${stage}", "agent": "<name>", "mode": "mandatory", "status": "completed"|"waived", "waiverReason": "<if waived>", "ts": "<ISO timestamp>" }\`.
-   - If the harness does not support delegation, record status \`"waived"\` with reason \`"harness_limitation"\`.
-   - **Do NOT proceed to step 1 until every mandatory agent has an entry in the delegation log.**
-`
-        : "";
-    let nextAction;
-    if (nextStage) {
-        const nextSchema = stageSchema(nextStage);
-        const nextDescription = nextSchema.skillDescription.charAt(0).toLowerCase() + nextSchema.skillDescription.slice(1);
-        nextAction = `4. Tell the user:\n\n   > **Stage \`${stage}\` complete.** Next: **${nextStage}** — ${nextDescription}\n   >\n   > Run \`/cc-next\` to continue.`;
-    }
-    else {
-        nextAction = `4. Tell the user:\n\n   > **Flow complete.** All stages finished. The project is ready for release.`;
-    }
+    const mandatoryList = mandatory.length > 0 ? mandatory.map((a) => `\`${a}\``).join(", ") : "none";
+    const nextDescription = schema.next === "done"
+        ? "flow complete — release cut and handoff signed off"
+        : (() => {
+            const nextSchema = stageSchema(schema.next);
+            return nextSchema.skillDescription.charAt(0).toLowerCase() + nextSchema.skillDescription.slice(1);
+        })();
     return `## Stage Completion Protocol
-When all required gates are satisfied and the artifact is written:
+Apply the **Shared Stage Completion Protocol** from \`.cclaw/skills/using-cclaw/SKILL.md\` with these parameters — do NOT re-derive the generic steps here.
-${delegationBlock}1. **Update \`${RUNTIME_ROOT}/state/flow-state.json\`:**
-${stateUpdate}
-   - For each passed gate, add an entry to \`guardEvidence\`: \`"<gate_id>": "<artifact path or excerpt proving the gate>"\`. Do NOT leave \`guardEvidence\` empty.
-2. **Persist artifact** at \`${RUNTIME_ROOT}/artifacts/${schema.artifactFile}\`. Do NOT manually copy into \`${RUNTIME_ROOT}/runs/\`; archival is handled by \`cclaw archive\`.
-3. **Doctor pre-flight** — Run \`npx cclaw doctor\` (or the installed cclaw binary). If any check fails, resolve the issue (missing delegation entry, artifact section, gate evidence) and re-run until all checks pass. Do NOT proceed to the next step while doctor reports failures.
-${nextAction}
+**Completion Parameters**
+- \`stage\` — \`${stage}\`
+- \`next\` — \`${nextStage}\` (${nextDescription})
+- \`gates\` — ${gateList}
+- \`artifact\` — \`${RUNTIME_ROOT}/artifacts/${schema.artifactFile}\`
+- \`mandatory\` — ${mandatoryList}
-**STOP.** Do not load the next stage skill yourself. The user will run \`/cc-next\` when ready (same session or new session).
+When all required gates are satisfied and the artifact is written, execute the shared procedure (delegation pre-flight → flow-state update → artifact persistence → \`npx cclaw doctor\` → user handoff → STOP) using the parameters above. If any check fails, resolve the issue and re-run before proceeding.
 ## Resume Protocol
-When resuming a stage in a NEW session (artifact exists but gates are not all passed in flow-state):
-1. Read the existing artifact and check which gates can be verified from artifact evidence.
-2. For each unverified gate, ask the user to confirm ONE gate at a time. Do NOT batch multiple gate confirmations in a single message.
-3. Update \`guardEvidence\` for each confirmed gate before proceeding.
+When resuming this stage in a NEW session (artifact exists but not all of ${gateList} are passed), follow the **Shared Resume Protocol** in \`.cclaw/skills/using-cclaw/SKILL.md\` — confirm one gate at a time, update \`guardEvidence\` for each, never batch confirmations.
 `;
 }
 function stageTransitionAutoAdvanceBlock(schema) {
@@ -364,6 +385,7 @@ You MUST complete these steps in order:
 ${checklistItems}
 ${stageGoodBadExamples(stage)}
+${stageDomainExamples(stage)}
 ${stageExamples(stage)}
 ${namedAntiPatternBlock(stage)}
 ${cognitivePatternsList(stage)}

package/dist/content/stage-schema.js CHANGED Viewed

@@ -865,6 +865,8 @@ const PLAN = {
     cognitivePatterns: [
         { name: "Vertical Slice Thinking", description: "Each task delivers one thin end-to-end slice of value. Horizontal layers (all models, then all controllers) create integration risk. Vertical slices (one feature through all layers) reduce it." },
         { name: "Two-Minute Smell Test", description: "If a competent engineer cannot understand and start a task in two minutes, the task is too large or too vague. Break it down further." },
+        { name: "Five-Minute Budget (hard)", description: "Every plan step MUST fit a 2-to-5-minute execution budget on a competent implementer. If a step plausibly takes longer, it is two steps pretending to be one — split it. Measure by 'keyboard minutes on this slice', not by wall clock. Write the estimated minutes next to each task (e.g. `[~3m]`); when a TDD slice later consumes >2× the estimate, log an operational-self-improvement entry so future plans calibrate better." },
+        { name: "No Placeholders", description: "Plan text must be copy-pasteable. Forbidden tokens anywhere in the artifact: `TODO`, `TBD`, `FIXME`, `<fill-in>`, `<your-*-here>`, `xxx`, `...` (as ellipsis for omitted content — real commands use real args). Every acceptance-criterion link, file path, test command, and verification command must be concrete and runnable as written. A placeholder is a deferred decision masquerading as a plan; decide it now or remove the task." },
         { name: "Make the Change Easy, Then Make the Easy Change", description: "Refactor first, implement second. Never structural + behavioral changes simultaneously. Sequence tasks accordingly." },
         { name: "Diagnose Before Fix", description: "Before decomposing work, understand the current state of the codebase. Read existing code, tests, and conventions. Tasks should reference what exists, not assume a blank slate." },
         { name: "Scrap Signals", description: "If a task description is vague, the acceptance criterion is missing, or the verification command is a placeholder — it is scrap. Either rewrite it or remove it. Half-specified tasks waste more time than no tasks." },
@@ -892,6 +894,16 @@ const PLAN = {
                 "Are there hidden dependencies between tasks in different waves?"
             ],
             stopGate: true
+        },
+        {
+            title: "Five-Minute Budget + No-Placeholders Audit",
+            evaluationPoints: [
+                "Does every task carry an explicit minutes estimate (e.g. `[~3m]`) and does every estimate fit the 2-to-5-minute budget? Estimates >5 minutes must be split.",
+                "Are all file paths, test commands, and verification commands copy-pasteable as written — no `TODO`, `TBD`, `FIXME`, `<fill-in>`, `<your-*-here>`, `xxx`, or ellipsis standing in for omitted args?",
+                "Does every acceptance-criterion reference resolve to a real R# / AC-### in the spec (not a blank link)?",
+                "If an estimate is genuinely uncertain (first-time integration, unfamiliar library), is the uncertainty named explicitly and scheduled as a spike task in wave 0, rather than hidden behind a large estimate?"
+            ],
+            stopGate: true
         }
     ],
     completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
@@ -903,11 +915,12 @@ const PLAN = {
     artifactValidation: [
         { section: "Dependency Graph", required: true, validationRule: "Ordering and parallel opportunities explicit. No circular dependencies." },
         { section: "Dependency Waves", required: true, validationRule: "Every task belongs to a wave. Each wave has an exit gate and dependency statement." },
-        { section: "Task List", required: true, validationRule: "Each task: ID, description, acceptance criterion link, verification command, and effort estimate (S/M/L)." },
+        { section: "Task List", required: true, validationRule: "Each task row includes ID, description, acceptance criterion, verification command, and effort estimate (S/M/L). Every task must also carry a minutes estimate within the 2-5 minute budget." },
         { section: "Acceptance Mapping", required: true, validationRule: "Every spec criterion is covered by at least one task." },
         { section: "Risk Assessment", required: false, validationRule: "If present: per-task or per-wave risk identification with likelihood, impact, and mitigation strategy." },
         { section: "Boundary Map", required: false, validationRule: "If present: per-wave or per-task interface contracts listing what each task produces (exports) and consumes (imports) from other tasks." },
-        { section: "WAIT_FOR_CONFIRM", required: true, validationRule: "Explicit marker present. Status: pending until user approves." }
+        { section: "WAIT_FOR_CONFIRM", required: true, validationRule: "Explicit marker present. Status: pending until user approves." },
+        { section: "No-Placeholder Scan", required: false, validationRule: "If present: confirmation that a text scan for `TODO`, `TBD`, `FIXME`, `<fill-in>`, `<your-*-here>`, `xxx`, or bare ellipses has zero hits in the task list. A placeholder is a deferred decision masquerading as a plan." }
     ],
     namedAntiPattern: {
         title: "Task Details Can Be Finalized During Coding",
@@ -1041,7 +1054,9 @@ const TDD = {
         { name: "Characterization First", description: "Before changing existing behavior, write characterization tests that capture current behavior as-is. These tests document what the system does today — even if that behavior is wrong. Only after the characterization suite is green do you add the new RED test for the desired change. This prevents accidental behavior destruction during refactoring." },
         { name: "Test Pyramid Shape", description: "Healthy test suites look like a pyramid: many small fast tests at the base, fewer medium integration tests in the middle, few large end-to-end tests at the top. Each layer catches a different class of bug; none of them substitutes for another. If your suite is top-heavy (mostly E2E) it is slow and flaky; if it is base-only it misses integration contracts. During TDD, default to the smallest layer that can prove the behavior." },
         { name: "Prove-It Pattern (bug fixes)", description: "For any reported regression or hotfix, the FIRST test is a reproduction — it must fail without your fix, pass with your fix, and fail again if the fix is reverted. This is the only way to prove you fixed the reported bug and not a superficially similar one. Skipping this step is how bugs come back two releases later wearing a different name." },
-        { name: "Test Size Model", description: "Size tests by scope, not by name: Small = pure logic, no I/O, <50ms; Medium = one process boundary, possibly filesystem or an in-memory DB; Large = multi-process / network / real external service. Small tests are the default; escalate to Medium only when a real boundary must be exercised, and to Large only for end-to-end user journeys. Record the size class in the TDD artifact so reviewers can sanity-check the pyramid shape." }
+        { name: "Test Size Model", description: "Size tests by scope, not by name: Small = pure logic, no I/O, <50ms; Medium = one process boundary, possibly filesystem or an in-memory DB; Large = multi-process / network / real external service. Small tests are the default; escalate to Medium only when a real boundary must be exercised, and to Large only for end-to-end user journeys. Record the size class in the TDD artifact so reviewers can sanity-check the pyramid shape." },
+        { name: "State Over Interaction", description: "Assert on observable outcomes (return values, state changes, persisted data, HTTP responses) — NOT on which helper methods were called, how many times, or in what order. Interaction-style assertions (`expect(mock.foo).toHaveBeenCalledWith(...)` without a state assertion) couple tests to implementation and shatter under harmless refactors. Use mocks only at trust boundaries (network, filesystem, time); for everything inside the module, let state do the asserting. If you cannot observe the outcome without a mock-spy, rework the seam before writing the test." },
+        { name: "Beyoncé Rule", description: "If you liked it, you should have put a test on it. Every surface that a caller can observe — public API, CLI flag, config key, exit code, persisted schema — is a contract, and every contract without a test is a silent regression waiting to happen. When a bug or production incident reveals an uncovered surface, the fix is never 'patch the code'; it is 'patch the code AND add the test that would have caught it'. Untested behavior does not exist for future refactors — it only exists until somebody accidentally removes it." }
     ],
     reviewSections: [
         {
@@ -1085,6 +1100,17 @@ const TDD = {
                 "Is there a note confirming the reproduction test fails again if the fix is reverted (or equivalent evidence that the test is actually pinned to this fix)?"
             ],
             stopGate: false
+        },
+        {
+            title: "State-over-Interaction + Beyoncé Coverage",
+            evaluationPoints: [
+                "Do assertions target observable state (return values, persisted data, HTTP responses, logs) rather than which internal helpers were called?",
+                "Are mocks/spies used only at true trust boundaries (network, filesystem, time, external services), not for module-internal collaborators?",
+                "For every public surface touched in this slice (exported API, CLI flag, config key, env var, exit code, schema field) — does at least one test observe it?",
+                "If a bug or review finding revealed an uncovered surface, was a test added alongside the fix, not just the code change?",
+                "Are interaction-style assertions (e.g. `toHaveBeenCalledWith` without a state assertion) justified by an explicit boundary comment, or flagged for follow-up?"
+            ],
+            stopGate: false
         }
     ],
     completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],

package/dist/content/templates.js CHANGED Viewed

@@ -278,9 +278,15 @@ export const ARTIFACT_TEMPLATES = {
 Execution rule: complete and verify each wave before starting the next wave.
 ## Task List
-| Task ID | Description | Acceptance criterion | Verification command | Effort |
-|---|---|---|---|---|
-| T-1 |  |  |  |  |
+**Rules (apply before writing rows):**
+- Every task fits the **2-5 minute budget**. If \`[~Nm]\` is >5, split the task.
+- **No placeholders.** Forbidden tokens anywhere in this table: \`TODO\`, \`TBD\`, \`FIXME\`, \`<fill-in>\`, \`<your-*-here>\`, \`xxx\`, bare ellipsis. Every file path, test, and verification command must be copy-pasteable as written.
+- If an estimate is genuinely uncertain (new library, unfamiliar subsystem), add a **spike task in wave 0** to de-risk — do NOT hide the uncertainty inside a large estimate.
+| Task ID | Description | Acceptance criterion | Verification command | Effort (S/M/L) | Minutes |
+|---|---|---|---|---|---|
+| T-1 |  |  |  |  | [~3m] |
 ## Acceptance Mapping
 | Criterion ID | Task IDs |
@@ -297,6 +303,10 @@ Execution rule: complete and verify each wave before starting the next wave.
 |---|---|---|
 |  |  |  |
+## No-Placeholder Scan
+- Scanned tokens: \`TODO\`, \`TBD\`, \`FIXME\`, \`<fill-in>\`, \`<your-*-here>\`, \`xxx\`, bare ellipsis in task rows.
+- Hits: 0 (required for WAIT_FOR_CONFIRM to resolve).
 ## WAIT_FOR_CONFIRM
 - Status: pending
 - Confirmed by:

package/dist/doctor.js CHANGED Viewed

@@ -258,13 +258,90 @@ export async function doctorChecks(projectRoot, options = {}) {
             const skillContent = await fs.readFile(skillPath, "utf8");
             const lineCount = skillContent.split("\n").length;
             const MIN_SKILL_LINES = 110;
+            const MAX_SKILL_LINES = 650;
             checks.push({
                 name: `skill:${stage}:min_lines`,
                 ok: lineCount >= MIN_SKILL_LINES,
                 details: `${skillPath} has ${lineCount} lines (minimum ${MIN_SKILL_LINES})`
             });
+            checks.push({
+                name: `skill:${stage}:max_lines`,
+                ok: lineCount <= MAX_SKILL_LINES,
+                details: `${skillPath} has ${lineCount} lines (soft max ${MAX_SKILL_LINES}; stage skills beyond this drift into unread bloat)`
+            });
+            const canonicalSections = [
+                { id: "frontmatter", pattern: /^---\nname: [\w-]+\ndescription: /m, label: "YAML frontmatter (name + description)" },
+                { id: "hard_gate", pattern: /^## HARD-GATE$/m, label: "## HARD-GATE" },
+                { id: "checklist", pattern: /^## Checklist$/m, label: "## Checklist" },
+                { id: "completion_protocol", pattern: /^## Stage Completion Protocol$/m, label: "## Stage Completion Protocol" },
+                { id: "handoff_menu", pattern: /^### Handoff Menu$/m, label: "### Handoff Menu" },
+                { id: "good_vs_bad", pattern: /Good vs Bad/i, label: "Good vs Bad examples" },
+                { id: "anti_patterns", pattern: /^## Anti-Patterns$/m, label: "## Anti-Patterns" }
+            ];
+            const missingSections = canonicalSections
+                .filter((section) => !section.pattern.test(skillContent))
+                .map((section) => section.label);
+            checks.push({
+                name: `skill:${stage}:canonical_sections`,
+                ok: missingSections.length === 0,
+                details: missingSections.length === 0
+                    ? `${skillPath} contains all canonical sections`
+                    : `${skillPath} missing sections: ${missingSections.join(", ")}`
+            });
         }
     }
+    // Meta-skill health — the using-cclaw routing brain must always contain the
+    // signals that stage skills reference. When one of these drifts, every stage
+    // citation breaks silently.
+    const metaSkillPath = path.join(projectRoot, RUNTIME_ROOT, "skills", "using-cclaw", "SKILL.md");
+    if (await exists(metaSkillPath)) {
+        const metaContent = await fs.readFile(metaSkillPath, "utf8");
+        const requiredSignals = [
+            { id: "instruction_priority", pattern: /Instruction Priority/i, label: "Instruction Priority" },
+            { id: "spawned_detection", pattern: /Spawned Subagent Detection/i, label: "Spawned Subagent Detection" },
+            { id: "shared_decision", pattern: /Shared Decision \+ Tool-Use Protocol/i, label: "Shared Decision + Tool-Use Protocol" },
+            { id: "shared_completion", pattern: /Shared Stage Completion Protocol/i, label: "Shared Stage Completion Protocol" },
+            { id: "escalation_rule", pattern: /Escalation Rule \(3 attempts\)/i, label: "Escalation Rule (3 attempts)" },
+            { id: "invocation_preamble", pattern: /Invocation Preamble/i, label: "Invocation Preamble" },
+            { id: "operational_self_improvement", pattern: /Operational Self-Improvement/i, label: "Operational Self-Improvement" },
+            { id: "engineering_ethos", pattern: /Engineering Ethos/i, label: "Engineering Ethos" },
+            { id: "task_classification", pattern: /Task Classification/i, label: "Task Classification" }
+        ];
+        const missingMeta = requiredSignals
+            .filter((signal) => !signal.pattern.test(metaContent))
+            .map((signal) => signal.label);
+        checks.push({
+            name: "skill:meta:signals",
+            ok: missingMeta.length === 0,
+            details: missingMeta.length === 0
+                ? `${metaSkillPath} contains all required routing signals`
+                : `${metaSkillPath} missing signals: ${missingMeta.join(", ")}`
+        });
+    }
+    // Harness tool-map references (A.1#4) must always be present — stage skills
+    // cite the paths by name.
+    const harnessRefDir = path.join(projectRoot, RUNTIME_ROOT, "references", "harness-tools");
+    const harnessRefFiles = ["README.md", "claude.md", "cursor.md", "opencode.md", "codex.md"];
+    for (const fileName of harnessRefFiles) {
+        const refPath = path.join(harnessRefDir, fileName);
+        checks.push({
+            name: `harness_tool_ref:${fileName.replace(/\.md$/, "")}`,
+            ok: await exists(refPath),
+            details: refPath
+        });
+    }
+    // Per-stage example references (A.2#8, progressive disclosure). Each stage
+    // skill's Examples section points here; the file MUST exist or the pointer
+    // is a dangling link.
+    const stageRefDir = path.join(projectRoot, RUNTIME_ROOT, "references", "stages");
+    for (const stage of COMMAND_FILE_ORDER) {
+        const refPath = path.join(stageRefDir, `${stage}-examples.md`);
+        checks.push({
+            name: `stage_examples_ref:${stage}`,
+            ok: await exists(refPath),
+            details: refPath
+        });
+    }
     checks.push({
         name: "gitignore:required_patterns",
         ok: await gitignoreHasRequiredPatterns(projectRoot),

package/dist/install.js CHANGED Viewed

@@ -17,7 +17,9 @@ import { contextMonitorScript, promptGuardScript, workflowGuardScript } from "./
 import { META_SKILL_NAME, usingCclawSkillMarkdown } from "./content/meta-skill.js";
 import { ARTIFACT_TEMPLATES, CURSOR_WORKFLOW_RULE_MDC, RULEBOOK_MARKDOWN, buildRulesJson } from "./content/templates.js";
 import { stageSkillFolder, stageSkillMarkdown } from "./content/skills.js";
+import { STAGE_EXAMPLES_REFERENCE_DIR, stageExamplesReferenceMarkdown } from "./content/examples.js";
 import { LANGUAGE_RULE_PACK_DIR, LANGUAGE_RULE_PACK_FILES, LANGUAGE_RULE_PACK_GENERATORS, LEGACY_LANGUAGE_RULE_PACK_FOLDERS, UTILITY_SKILL_FOLDERS, UTILITY_SKILL_MAP } from "./content/utility-skills.js";
+import { HARNESS_TOOL_REFS_DIR, HARNESS_TOOL_REFS_INDEX_MD, harnessToolRefMarkdown } from "./content/harness-tool-refs.js";
 import { createInitialFlowState } from "./flow-state.js";
 import { ensureDir, exists, writeFileSafe } from "./fs-utils.js";
 import { ensureGitignore, removeGitignorePatterns } from "./gitignore.js";
@@ -169,6 +171,14 @@ async function writeSkills(projectRoot, config) {
     for (const stage of COMMAND_FILE_ORDER) {
         const folder = stageSkillFolder(stage);
         await writeFileSafe(runtimePath(projectRoot, "skills", folder, "SKILL.md"), stageSkillMarkdown(stage));
+        // Progressive disclosure (A.2#8): materialize the full example artifact as
+        // a sibling reference file. The stage skill only links to it; agents load
+        // the reference on demand.
+        const referenceMarkdown = stageExamplesReferenceMarkdown(stage);
+        if (referenceMarkdown) {
+            const referenceDir = STAGE_EXAMPLES_REFERENCE_DIR.split("/");
+            await writeFileSafe(runtimePath(projectRoot, ...referenceDir, `${stage}-examples.md`), referenceMarkdown);
+        }
     }
     // Utility skills (not flow stages)
     await writeFileSafe(runtimePath(projectRoot, "skills", "learnings", "SKILL.md"), learnSkillMarkdown());
@@ -201,6 +211,15 @@ async function writeSkills(projectRoot, config) {
             await fs.rm(legacyPath, { recursive: true, force: true });
         }
     }
+    // Per-harness tool maps (A.1#4). One reference file per supported harness
+    // plus an index; stage/utility skills cite these instead of hardcoding
+    // tool names inline.
+    const harnessIds = ["claude", "cursor", "opencode", "codex"];
+    const harnessRefsDir = HARNESS_TOOL_REFS_DIR.split("/");
+    await writeFileSafe(runtimePath(projectRoot, ...harnessRefsDir, "README.md"), HARNESS_TOOL_REFS_INDEX_MD);
+    for (const harness of harnessIds) {
+        await writeFileSafe(runtimePath(projectRoot, ...harnessRefsDir, `${harness}.md`), harnessToolRefMarkdown(harness));
+    }
 }
 async function writeUtilityCommands(projectRoot) {
     await writeFileSafe(runtimePath(projectRoot, "commands", "learn.md"), learnCommandContract());

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "cclaw-cli",
-  "version": "0.8.0",
+  "version": "0.9.0",
   "description": "Installer-first flow toolkit for coding agents",
   "type": "module",
   "bin": {