npm - cclaw-cli - Versions diffs - 0.8.0 → 0.10.0 - Mend

cclaw-cli 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/content/examples.d.ts +16 -0
package/dist/content/examples.js +364 -55
package/dist/content/harness-tool-refs.d.ts +20 -0
package/dist/content/harness-tool-refs.js +240 -0
package/dist/content/hooks.js +48 -2
package/dist/content/meta-skill.js +72 -4
package/dist/content/skills.d.ts +5 -0
package/dist/content/skills.js +118 -46
package/dist/content/stage-schema.d.ts +9 -3
package/dist/content/stage-schema.js +72 -22
package/dist/content/subagents.js +21 -0
package/dist/content/templates.js +13 -3
package/dist/doctor.js +82 -0
package/dist/harness-adapters.js +11 -3
package/dist/install.js +25 -1
package/dist/policy.js +1 -1
package/package.json +1 -1

package/dist/content/examples.d.ts CHANGED Viewed

@@ -1,3 +1,19 @@
 import type { FlowStage } from "../types.js";
 export declare function stageGoodBadExamples(stage: FlowStage): string;
+export declare const STAGE_EXAMPLES_REFERENCE_DIR = "references/stages";
+export declare function stageExamplesReferencePath(stage: FlowStage): string;
+/**
+ * Returns the full example artifact body as a standalone reference markdown
+ * file. Materialized under .cclaw/references/stages/<stage>-examples.md so
+ * the always-rendered skill body can link instead of inlining.
+ */
+export declare function stageExamplesReferenceMarkdown(stage: FlowStage): string | null;
+/**
+ * Returns the short inline pointer rendered directly inside the stage skill.
+ * Replaces the previous always-inline ~50-100 line fenced block and
+ * delivers true progressive disclosure: the full example lives in a
+ * reference file loaded on demand.
+ */
 export declare function stageExamples(stage: FlowStage): string;
+export type ExampleDomain = "web" | "cli" | "library" | "data-pipeline";
+export declare function stageDomainExamples(stage: FlowStage): string;

package/dist/content/examples.js CHANGED Viewed

@@ -433,68 +433,205 @@ Execution rule: complete and verify each wave before starting the next wave.
 - PR URL: https://github.com/example/repo/pull/42`,
 };
 const GOOD_BAD_EXAMPLES = {
-    brainstorm: {
-        good: "Problem: release checks are fragile and inconsistent between CI and local runs; invalid metadata sometimes reaches npm publish. Success: invalid release preconditions are caught before publish with explicit operator feedback, in both CI and local workflows. Constraints: no new runtime dependencies.",
-        bad: "Problem: releases are broken. Success: make them better. Constraints: be careful.",
-        lesson: "\"Make it better\" is not a success criterion — an agent cannot know when it is done. State the observable condition that proves success."
-    },
-    scope: {
-        good: "In scope: in-app notification feed, SSE delivery path, read/unread state, retry on transient failures. Out of scope: email/SMS/push providers, per-user preferences. Deferred: WebSocket channel, rich media, full-text search.",
-        bad: "In scope: notifications. Out of scope: stuff we are not doing. Deferred: v2.",
-        lesson: "Vague boundaries get relitigated in every subsequent stage. Enumerate concrete capabilities on each side — \"stuff we are not doing\" is not a decision."
-    },
-    design: {
-        good: "Failure: SSE connection drop. Trigger: network interruption. Detection: client heartbeat timeout (30s). Mitigation: auto-reconnect with exponential backoff + REST snapshot fallback. User impact: ≤10s delay, no data loss.",
-        bad: "Failure: network errors. Mitigation: retry and log. User impact: users may see issues sometimes.",
-        lesson: "A failure row without a detection signal and a bounded user impact is aspirational, not a design. Name the trigger, the detector, and the recovery behavior."
-    },
-    spec: {
-        good: "AC-1: Given a signed-in user with an active session, when the server publishes a new notification event for that user, the client feed shows the new item within 5 seconds without a full page reload.",
-        bad: "AC-1: Users should see their notifications quickly and reliably, with a good user experience.",
-        lesson: "Spec criteria must be observable, measurable, and falsifiable. \"Quickly\" is a feeling; \"within 5 seconds without a full page reload\" is a test."
-    },
-    plan: {
-        good: "T-2: Implement publisher + outbox write path. Acceptance: AC-1. Verification: `pnpm vitest run tests/integration/publisher.test.ts`. Depends on: T-1. Effort: M.",
-        bad: "T-2: Build the backend. Verify: manual testing. Effort: a few days.",
-        lesson: "A task without a single acceptance criterion and a reproducible verification command is a wish. If you cannot say how you will know it is done, you cannot ship it."
-    },
-    tdd: {
-        good: "RED: `pnpm vitest run tests/unit/dedupe-feed.test.ts` → `publishToOutbox is not a function`. GREEN (after minimal impl): same command, 47/47 pass, full suite. REFACTOR: extracted `mergeLatestByDedupeKey`; suite still 47/47.",
-        bad: "Wrote the publisher code. Tests pass now. Will add unit tests later when I have time.",
-        lesson: "Code written before a failing test is guessing validated after the fact. The RED failure IS the specification — without it, the GREEN pass proves nothing about the intended behavior."
-    },
-    review: {
-        good: "R-1 Critical: snapshot endpoint returns newest N rows but does not guarantee consistency with stream cursor — users can miss items between snapshot and subscribe. Evidence: integration test `notification-consistency.test.ts:22-58`. Status: open.",
-        bad: "Looks good overall. A few small things could be polished, maybe refactor the merge logic. LGTM.",
-        lesson: "\"LGTM\" is not a review — it is a signature on whatever the author shipped. Every finding needs a severity, a falsifiable description, evidence, and a status."
-    },
-    ship: {
-        good: "Rollback trigger: error rate on `/notifications/stream` >5% for 5 minutes, or p95 publish-to-visible lag >10s. Steps: `git revert <merge-sha> && git push origin main` then redeploy; run `2026_04_12_notifications_cursor_down.sql` before traffic. Verification: error rate returns to baseline within 10 minutes.",
-        bad: "Rollback plan: revert the commit if anything goes wrong.",
-        lesson: "\"Revert if anything goes wrong\" leaves the on-call engineer to invent the plan at 2 a.m. The rollback trigger is an operational contract: state the signal, the command, and the verification."
-    }
+    brainstorm: [
+        {
+            label: "Problem / success statement",
+            good: "Problem: release checks are fragile and inconsistent between CI and local runs; invalid metadata sometimes reaches npm publish. Success: invalid release preconditions are caught before publish with explicit operator feedback, in both CI and local workflows. Constraints: no new runtime dependencies.",
+            bad: "Problem: releases are broken. Success: make them better. Constraints: be careful.",
+            lesson: "\"Make it better\" is not a success criterion — an agent cannot know when it is done. State the observable condition that proves success."
+        },
+        {
+            label: "Alternative direction (one of 2–3)",
+            good: "Option B: Pre-publish verifier script invoked from \`release.yml\` and a \`pnpm release:check\` target. Pros: one enforcement surface; fails fast locally. Cons: adds a script to maintain; must stay in sync with \`package.json\`. Rejected alternative: relying on npm lifecycle hooks only — they run too late to block publish.",
+            bad: "We could also use a script, or hooks, or something in CI. We'll pick whichever is easier later.",
+            lesson: "Alternatives are only useful if they are concrete and comparable. Name each one, call out pros/cons, and say what was rejected — otherwise \"later\" becomes \"never\" and the choice is made by accident."
+        },
+        {
+            label: "Clarifying question",
+            good: "Before I lock direction: should a failed release:check block the CI job (hard failure) or only warn and continue? The former is safer but costs a revert cycle when the check itself is wrong; the latter preserves velocity but can let bad metadata through. Recommend A (block). Pick: A) Block  B) Warn-only  C) Block in CI, warn locally.",
+            bad: "Do you want it to fail or warn? Let me know.",
+            lesson: "A good question gives the user context, a recommendation, and lettered options they can answer with one keystroke. \"Let me know\" shifts the framing cost back to the user."
+        }
+    ],
+    scope: [
+        {
+            label: "In / out / deferred boundaries",
+            good: "In scope: in-app notification feed, SSE delivery path, read/unread state, retry on transient failures. Out of scope: email/SMS/push providers, per-user preferences. Deferred: WebSocket channel, rich media, full-text search.",
+            bad: "In scope: notifications. Out of scope: stuff we are not doing. Deferred: v2.",
+            lesson: "Vague boundaries get relitigated in every subsequent stage. Enumerate concrete capabilities on each side — \"stuff we are not doing\" is not a decision."
+        },
+        {
+            label: "Scope change trace",
+            good: "Scope delta at 2026-04-15: user asked to add per-user mute preferences. Decision: moved from Out-of-scope → In-scope; acknowledged cost (≈1 day, +1 schema migration); risk: touches settings surface. Recorded in \`03-design.md#scope-trace\`. Requires re-running scope review before design lock.",
+            bad: "Added mute preferences to scope.",
+            lesson: "Scope changes silently are how projects drift. Every in↔out move needs a timestamp, a cost estimate, and a link to the next review it invalidates."
+        }
+    ],
+    design: [
+        {
+            label: "Failure mode row",
+            good: "Failure: SSE connection drop. Trigger: network interruption. Detection: client heartbeat timeout (30s). Mitigation: auto-reconnect with exponential backoff + REST snapshot fallback. User impact: ≤10s delay, no data loss.",
+            bad: "Failure: network errors. Mitigation: retry and log. User impact: users may see issues sometimes.",
+            lesson: "A failure row without a detection signal and a bounded user impact is aspirational, not a design. Name the trigger, the detector, and the recovery behavior."
+        },
+        {
+            label: "Rejected design alternative",
+            good: "Considered WebSocket instead of SSE. Rejected because: (1) our proxy layer strips upgrade headers; (2) one-way push fits the \"notification feed\" semantics; (3) SSE plays nicer with HTTP/2 fan-out. Trade-off accepted: no client→server channel; we will fall back to REST for the tiny set of acks.",
+            bad: "We chose SSE. WebSocket could also work.",
+            lesson: "A design without a rejected alternative reads like a requirement, not a decision. The rejection is the part that survives review — it tells future readers what trade-off was taken."
+        },
+        {
+            label: "Diagram caption",
+            good: "Figure 1 — Notification pipeline (sequence diagram): producer → outbox(durable) → relay → SSE stream → client. Label on relay shows \"at-least-once; dedupe by event_id\"; label on client shows \"merge by dedupe_key before render\".",
+            bad: "Figure 1: notification flow.",
+            lesson: "An unlabeled diagram is decoration. Every arrow needs a delivery guarantee, every box needs an action verb — otherwise the diagram contradicts the prose without anyone noticing."
+        }
+    ],
+    spec: [
+        {
+            label: "Observable acceptance criterion",
+            good: "AC-1: Given a signed-in user with an active session, when the server publishes a new notification event for that user, the client feed shows the new item within 5 seconds without a full page reload.",
+            bad: "AC-1: Users should see their notifications quickly and reliably, with a good user experience.",
+            lesson: "Spec criteria must be observable, measurable, and falsifiable. \"Quickly\" is a feeling; \"within 5 seconds without a full page reload\" is a test."
+        },
+        {
+            label: "Negative / error-path criterion",
+            good: "AC-4: Given the SSE connection drops mid-session, when the client detects no heartbeat for 30 seconds, the UI shows a \"Reconnecting…\" badge and automatically re-subscribes; missed events delivered since the last ACKed id are replayed exactly once.",
+            bad: "AC-4: Handle errors gracefully.",
+            lesson: "Error-path criteria are where most bugs hide. Write them with the same \"given/when/then\" rigor as happy-path — otherwise QA ends up inventing them at release time."
+        },
+        {
+            label: "Non-functional budget",
+            good: "NFR-2: p95 end-to-end publish-to-visible latency ≤5s under 1k concurrent subscribers on a 2-vCPU pod; CPU headroom ≥30% at steady state. Measurement: \`k6 run tests/load/notifications.js\`, report median + p95 + p99.",
+            bad: "NFR-2: Performance should be good.",
+            lesson: "Non-functional goals without numbers + a measurement command are aspirational. Pin the percentile, the load shape, and the script that produces the evidence."
+        }
+    ],
+    plan: [
+        {
+            label: "Single task row",
+            good: "T-2: Implement publisher + outbox write path. Acceptance: AC-1. Verification: \`pnpm vitest run tests/integration/publisher.test.ts\`. Depends on: T-1. Effort: M (≈4 min).",
+            bad: "T-2: Build the backend. Verify: manual testing. Effort: a few days.",
+            lesson: "A task without a single acceptance criterion and a reproducible verification command is a wish. If you cannot say how you will know it is done, you cannot ship it."
+        },
+        {
+            label: "Dependency graph entry",
+            good: "T-5 (consume SSE client) depends on T-3 (stream endpoint) and T-4 (auth cookie forwarding). Parallelizable with T-6 (read-state persistence). Blocks T-8 (end-to-end happy-path e2e).",
+            bad: "T-5 depends on other tasks.",
+            lesson: "The value of a dependency graph is mechanical scheduling. \"Depends on other tasks\" is a shrug — list the IDs so the execution order is unambiguous."
+        }
+    ],
+    tdd: [
+        {
+            label: "RED → GREEN → REFACTOR slice",
+            good: "RED: \`pnpm vitest run tests/unit/dedupe-feed.test.ts\` → \`publishToOutbox is not a function\`. GREEN (after minimal impl): same command, 47/47 pass, full suite. REFACTOR: extracted \`mergeLatestByDedupeKey\`; suite still 47/47.",
+            bad: "Wrote the publisher code. Tests pass now. Will add unit tests later when I have time.",
+            lesson: "Code written before a failing test is guessing validated after the fact. The RED failure IS the specification — without it, the GREEN pass proves nothing about the intended behavior."
+        },
+        {
+            label: "Bug-fix reproduction test",
+            good: "Bug B-17: dedup fails when two events arrive in the same ms. Prove-It RED: added \`tests/unit/dedupe-feed.test.ts > dedupes when timestamps collide\`; run → \`expected 1 item, received 2\`. Fix applied; same test passes; full suite still 47/47.",
+            bad: "Fixed the duplicate rendering issue.",
+            lesson: "A bug without a reproducing test is a bug that comes back. Ship the RED test as part of the fix — it is the contract that prevents regression."
+        },
+        {
+            label: "Refactor-only slice (state-based)",
+            good: "Refactor: moved heartbeat logic into \`useHeartbeat()\` hook. No behavior change intended. Evidence: no new tests; existing state-based tests \`feed-state.test.ts\` (42 assertions) still pass; coverage unchanged at 94%.",
+            bad: "Refactored the component. Added some interaction mocks to check the new hook is called.",
+            lesson: "A refactor should assert on state, not on call shape. If you had to rewrite your mocks, it was not a refactor — it was a redesign dressed as one."
+        }
+    ],
+    review: [
+        {
+            label: "Critical finding",
+            good: "R-1 Critical: snapshot endpoint returns newest N rows but does not guarantee consistency with stream cursor — users can miss items between snapshot and subscribe. Evidence: integration test \`notification-consistency.test.ts:22-58\`. Status: open.",
+            bad: "Looks good overall. A few small things could be polished, maybe refactor the merge logic. LGTM.",
+            lesson: "\"LGTM\" is not a review — it is a signature on whatever the author shipped. Every finding needs a severity, a falsifiable description, evidence, and a status."
+        },
+        {
+            label: "Security review row",
+            good: "R-4 High (sec): SSE endpoint accepts any user_id in the query string; a logged-in attacker can subscribe to another user's stream. Evidence: \`curl\` repro in \`docs/notes/sec-r4.md\`. Fix: require auth cookie, filter events by session.user.id server-side. Status: fix in T-11; verified in \`notifications-auth.test.ts\`.",
+            bad: "Might want to double-check auth on the SSE endpoint.",
+            lesson: "Security findings without a reproduction step and a tied fix-task are suggestions, not reviews. Attach the curl (or equivalent), the fix task ID, and the verification test."
+        }
+    ],
+    ship: [
+        {
+            label: "Rollback contract",
+            good: "Rollback trigger: error rate on \`/notifications/stream\` >5% for 5 minutes, or p95 publish-to-visible lag >10s. Steps: \`git revert <merge-sha> && git push origin main\` then redeploy; run \`2026_04_12_notifications_cursor_down.sql\` before traffic. Verification: error rate returns to baseline within 10 minutes.",
+            bad: "Rollback plan: revert the commit if anything goes wrong.",
+            lesson: "\"Revert if anything goes wrong\" leaves the on-call engineer to invent the plan at 2 a.m. The rollback trigger is an operational contract: state the signal, the command, and the verification."
+        },
+        {
+            label: "Preflight check",
+            good: "Preflight: \`pnpm release:check\` ✅ (package metadata ok, changeset captured), \`pnpm test\` ✅ 195/195, \`pnpm build\` ✅, CI green on feat/notifications @ \`abc1234\`, rollback plan captured, migration reviewed. Finalization mode: Merge via squash.",
+            bad: "All good, shipping it.",
+            lesson: "A preflight is a checklist that names each gate and the command that proved it. \"All good\" is a vibe — it cannot be audited after the fact when the deploy misbehaves."
+        }
+    ]
 };
 export function stageGoodBadExamples(stage) {
-    const sample = GOOD_BAD_EXAMPLES[stage];
-    if (!sample)
+    const samples = GOOD_BAD_EXAMPLES[stage];
+    if (!samples || samples.length === 0)
         return "";
-    return [
+    const blocks = [
         "## Good vs Bad (at-a-glance)",
         "",
-        "Contrasting samples to calibrate the quality bar for this stage. Read before writing the artifact — mirror the **Good** shape, avoid the **Bad** shape.",
-        "",
-        "**Good**",
-        "",
-        "> " + sample.good,
+        "Contrasting samples to calibrate the quality bar for this stage. Read before writing the artifact — mirror the **Good** shape, avoid the **Bad** shape. Each block targets a different axis of the stage so you can spot-check more than one dimension of your draft.",
+        ""
+    ];
+    samples.forEach((sample, index) => {
+        blocks.push(`### ${index + 1}. ${sample.label}`);
+        blocks.push("");
+        blocks.push("**Good**");
+        blocks.push("");
+        blocks.push("> " + sample.good);
+        blocks.push("");
+        blocks.push("**Bad**");
+        blocks.push("");
+        blocks.push("> " + sample.bad);
+        blocks.push("");
+        blocks.push("**Why it matters:** " + sample.lesson);
+        blocks.push("");
+    });
+    return blocks.join("\n");
+}
+export const STAGE_EXAMPLES_REFERENCE_DIR = "references/stages";
+export function stageExamplesReferencePath(stage) {
+    return `.cclaw/${STAGE_EXAMPLES_REFERENCE_DIR}/${stage}-examples.md`;
+}
+/**
+ * Returns the full example artifact body as a standalone reference markdown
+ * file. Materialized under .cclaw/references/stages/<stage>-examples.md so
+ * the always-rendered skill body can link instead of inlining.
+ */
+export function stageExamplesReferenceMarkdown(stage) {
+    const examples = STAGE_EXAMPLES[stage];
+    if (!examples)
+        return null;
+    return [
+        `---`,
+        `stage: ${stage}`,
+        `name: ${stage}-stage-examples`,
+        `description: "Full sample artifact for the ${stage} stage. Loaded only when an agent explicitly needs a complete example; the stage skill links here rather than inlining."`,
+        `---`,
         "",
-        "**Bad**",
+        `# ${stage} stage — full artifact sample`,
         "",
-        "> " + sample.bad,
+        `This file is linked from \`.cclaw/skills/<${stage}-stage>/SKILL.md\` under **Examples → See also**. The sample uses H2 headings that mirror the artifact a cclaw session must produce, so the markdown is wrapped in a fence to avoid collapsing into the outline.`,
         "",
-        "**Why it matters:** " + sample.lesson,
+        "```markdown",
+        examples,
+        "```",
         ""
     ].join("\n");
 }
+/**
+ * Returns the short inline pointer rendered directly inside the stage skill.
+ * Replaces the previous always-inline ~50-100 line fenced block and
+ * delivers true progressive disclosure: the full example lives in a
+ * reference file loaded on demand.
+ */
 export function stageExamples(stage) {
     const examples = STAGE_EXAMPLES[stage];
     if (!examples)
@@ -502,11 +639,183 @@ export function stageExamples(stage) {
     return [
         "## Examples",
         "",
-        "Concrete artifact samples. These mirror the exact heading levels agents must use when authoring the stage artifact (all H2 `##` sections), so they are presented inside a markdown fence to avoid collapsing into the SKILL outline.",
+        `Full artifact sample for this stage lives at \`${stageExamplesReferencePath(stage)}\`. Open it when you need a complete reference; do NOT paste the example into the artifact verbatim — it is a shape guide, not a template.`,
         "",
-        "```markdown",
-        examples,
-        "```",
+        "Summary of what the reference covers:",
+        ...exampleSummaryBullets(stage),
         ""
     ].join("\n");
 }
+function exampleSummaryBullets(stage) {
+    const headings = STAGE_EXAMPLE_SECTION_HEADINGS[stage] ?? [];
+    if (headings.length === 0)
+        return ["- Full artifact structure."];
+    return headings.map((heading) => `- ${heading}`);
+}
+// Kept in sync with STAGE_EXAMPLES above so the inline summary matches the
+// reference file without duplicating the heavy text. Update whenever the
+// sample in STAGE_EXAMPLES gains or loses a top-level section.
+const STAGE_EXAMPLE_SECTION_HEADINGS = {
+    brainstorm: [
+        "Problem framing (problem, success, constraints)",
+        "Candidate approaches with trade-offs",
+        "Recommended direction + open questions",
+        "Clarification log and decision record"
+    ],
+    scope: [
+        "In-scope / out-of-scope / deferred lists with concrete capabilities",
+        "Requirements table with stable R# IDs",
+        "Boundary stress-tests and non-negotiables",
+        "Decision record for premise challenges"
+    ],
+    design: [
+        "Blast-radius file list",
+        "Mandatory architecture diagram (Mermaid)",
+        "Failure-mode table with detection + mitigation",
+        "Test strategy + performance budget",
+        "Completion dashboard + unresolved decisions"
+    ],
+    spec: [
+        "Acceptance-criteria table (observable, measurable, falsifiable)",
+        "Requirement-ref column tying each AC back to an R# from scope",
+        "Verification-approach column",
+        "Approval block"
+    ],
+    plan: [
+        "Dependency graph + dependency waves",
+        "Task list with effort + minutes estimate per task",
+        "Acceptance mapping (every AC → task IDs)",
+        "No-Placeholder scan row + WAIT_FOR_CONFIRM marker"
+    ],
+    tdd: [
+        "RED evidence per slice (failing test output)",
+        "Acceptance mapping per slice",
+        "GREEN evidence (full-suite pass)",
+        "REFACTOR notes with behavior-preservation confirmation",
+        "Test-pyramid shape + prove-it reproduction when applicable"
+    ],
+    review: [
+        "Spec-compliance findings (Layer 1)",
+        "Code-quality findings (Layer 2)",
+        "Severity, evidence, and status per finding",
+        "Go / no-go verdict"
+    ],
+    ship: [
+        "Release checklist (version, changelog, tag, artifacts)",
+        "Rollback plan with trigger, steps, verification",
+        "Runbook (how to verify the release post-deploy)",
+        "Sign-off block"
+    ]
+};
+const DOMAIN_LABELS = {
+    web: "Web app (full-stack)",
+    cli: "CLI tool",
+    library: "Library / SDK",
+    "data-pipeline": "Data pipeline / ETL"
+};
+const STAGE_DOMAIN_SAMPLES = {
+    spec: [
+        {
+            domain: "web",
+            label: "AC",
+            body: "AC-W1: Given a signed-in admin viewing `/dashboard/orders`, when an order's status changes server-side, the row updates within 2s without a full navigation (assert via `pnpm playwright test orders-live.spec.ts`)."
+        },
+        {
+            domain: "cli",
+            label: "AC",
+            body: "AC-C1: Given `cclaw init --claude` run in an empty directory, exit code is `0`, `.cclaw/config.yaml` is created with `harnesses: [claude]`, and stderr contains no warnings (asserted by `tests/integration/init-sync-doctor.test.ts`)."
+        },
+        {
+            domain: "library",
+            label: "AC",
+            body: "AC-L1: `validateHookDocument(obj)` returns `{ ok: true }` for every fixture under `tests/fixtures/valid-hooks/` and `{ ok: false, errors: [...] }` with at least one message for every fixture under `tests/fixtures/invalid-hooks/`."
+        },
+        {
+            domain: "data-pipeline",
+            label: "AC",
+            body: "AC-D1: For any `orders.csv` input, the pipeline emits exactly one row per `(order_id, event_ts)` pair to `warehouse.fact_orders`; running the job twice on the same input is idempotent (row count unchanged, verified by `dbt test --select fact_orders`)."
+        }
+    ],
+    plan: [
+        {
+            domain: "web",
+            label: "Task",
+            body: "T-W-3 `[~4m]`: Wire SSE endpoint `/api/orders/stream` into `useOrderFeed` hook. AC-W1. Verify: `pnpm playwright test orders-live.spec.ts`. Depends on: T-W-2."
+        },
+        {
+            domain: "cli",
+            label: "Task",
+            body: "T-C-2 `[~3m]`: Add `--dry-run` flag to `cclaw archive` that prints the would-be-archived run IDs to stdout and exits 0. AC-C3. Verify: `node dist/cli.js archive --dry-run` + `tests/unit/cli-parse.test.ts`."
+        },
+        {
+            domain: "library",
+            label: "Task",
+            body: "T-L-1 `[~5m]`: Expose `validateHookDocument` from the package root and re-export its types. AC-L1. Verify: `pnpm build && node -e \"console.log(require('./dist').validateHookDocument)\"`."
+        },
+        {
+            domain: "data-pipeline",
+            label: "Task",
+            body: "T-D-2 `[~5m]`: Add dedup step keyed on `(order_id, event_ts)` between `raw.orders` and `fact_orders`. AC-D1. Verify: `dbt run --select fact_orders+ && dbt test --select fact_orders`."
+        }
+    ],
+    tdd: [
+        {
+            domain: "web",
+            label: "RED→GREEN→REFACTOR",
+            body: "RED: `pnpm playwright test orders-live.spec.ts` → timeout waiting for row update. GREEN: wired SSE event → row rerenders via `useOrderFeed`. REFACTOR: extracted `applyOrderEvent(row, event)` pure helper; 87/87 tests still pass."
+        },
+        {
+            domain: "cli",
+            label: "RED→GREEN→REFACTOR",
+            body: "RED: `tests/unit/cli-parse.test.ts` expects `--dry-run` flag → `unknown option` error. GREEN: added to the Zod parser; 19/19 pass. REFACTOR: hoisted the dry-run formatter into `src/cli/format.ts` shared with `status`."
+        },
+        {
+            domain: "library",
+            label: "RED→GREEN→REFACTOR",
+            body: "RED: `tests/unit/hook-schema.test.ts` imports `validateHookDocument` from package root → `export not found`. GREEN: added re-export + types. REFACTOR: renamed internal `__validate` to `validateHookDocument` so the export name matches the source."
+        },
+        {
+            domain: "data-pipeline",
+            label: "RED→GREEN→REFACTOR",
+            body: "RED: `dbt test --select fact_orders` → `unique test on (order_id, event_ts)` fails on re-run. GREEN: added `row_number()` dedup in the staging model. REFACTOR: extracted the dedup CTE into `int_orders_deduped` for reuse by `fact_returns`."
+        }
+    ],
+    ship: [
+        {
+            domain: "web",
+            label: "Rollback",
+            body: "Trigger: error rate on `/api/orders/stream` > 2% for 5 minutes, or p95 latency > 1.5s for 10 minutes. Steps: `vercel rollback <deployment>`; run `2026_04_14_revert_orders_stream.sql` before traffic returns. Verify: error rate returns to baseline within 10 minutes on the `orders-live` dashboard."
+        },
+        {
+            domain: "cli",
+            label: "Rollback",
+            body: "Trigger: `cclaw init --claude` exits non-zero on a fresh tmp dir, OR `cclaw doctor` regresses (FAIL count increases) on the smoke matrix. Steps: `npm unpublish cclaw-cli@<version>` (within the 72h window) or `npm deprecate cclaw-cli@<version> '<reason>'`; publish the previous patch. Verify: `npx cclaw-cli@latest --version` prints the previous version."
+        },
+        {
+            domain: "library",
+            label: "Rollback",
+            body: "Trigger: any consumer reports `validateHookDocument` no longer exported, OR the CI `dual-package-check` job fails. Steps: `npm deprecate cclaw-cli@<version> 'broken package export — use <prev>'`; publish the previous minor with a patch bump; emit changelog `## Rollback` entry. Verify: a smoke consumer project `pnpm add cclaw-cli@latest` imports cleanly."
+        },
+        {
+            domain: "data-pipeline",
+            label: "Rollback",
+            body: "Trigger: `dbt test --select fact_orders` fails on production run, OR downstream dashboard MAU count drops >10% week-over-week. Steps: disable the new model via `dbt_project.yml` + `dbt run --select state:modified` with the previous git SHA; rerun backfill `dagster asset materialize fact_orders --partition <yesterday>`. Verify: `fact_orders` row count within ±1% of the previous week's baseline."
+        }
+    ]
+};
+export function stageDomainExamples(stage) {
+    const samples = STAGE_DOMAIN_SAMPLES[stage];
+    if (!samples || samples.length === 0)
+        return "";
+    const lines = [
+        "## Living Examples by Domain",
+        "",
+        "Use the row matching your project shape to calibrate voice, specificity, and command choice. The rows are deliberately terse — copy the **shape**, not the text.",
+        ""
+    ];
+    for (const sample of samples) {
+        lines.push(`**${DOMAIN_LABELS[sample.domain]} — ${sample.label}:** ${sample.body}`);
+        lines.push("");
+    }
+    return lines.join("\n");
+}

package/dist/content/harness-tool-refs.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+/**
+ * Per-harness tool-mapping reference files.
+ *
+ * Addresses A.1#4: the four supported harnesses (claude, cursor, opencode, codex)
+ * expose different primitive names for the same capabilities (ask-user,
+ * delegate/Task, web fetch, file edit, code execution, ...). cclaw's stage skills
+ * need to pick the right name at runtime without bloating every stage with per-harness
+ * if/else ladders.
+ *
+ * Each file below is short (one table per capability), authoritative, and materialised
+ * at `.cclaw/references/harness-tools/<harness>.md`. Stage skills and the meta-skill
+ * cite the folder instead of duplicating the mappings inline.
+ *
+ * When a new harness is added (or an existing one renames a tool), update the
+ * corresponding entry here — do NOT scatter tool names across skill text.
+ */
+import type { HarnessId } from "../types.js";
+export declare const HARNESS_TOOL_REFS_DIR = "references/harness-tools";
+export declare function harnessToolRefMarkdown(harness: HarnessId): string;
+export declare const HARNESS_TOOL_REFS_INDEX_MD = "---\nname: Harness tool maps\ndescription: \"Index file. One reference per supported harness \u2014 cite the per-harness file instead of hardcoding tool names in stage skills.\"\n---\n\n# Harness Tool Maps\n\ncclaw supports four harnesses; each exposes different primitive names for the same capabilities. Stage skills and utility skills cite the file matching the currently active harness and fall back to plain-text equivalents for capabilities that the harness lacks.\n\n| Harness | File | Notes |\n|---|---|---|\n| Claude Code | `.cclaw/references/harness-tools/claude.md` | Richest tool surface (AskUserQuestion, Task, WebFetch, WebSearch, MCP, \u2026). |\n| Cursor | `.cclaw/references/harness-tools/cursor.md` | Near-parity with Claude; uses `AskQuestion` instead of `AskUserQuestion`. |\n| OpenCode | `.cclaw/references/harness-tools/opencode.md` | No native ask-user / dispatch; more plain-text fallbacks. |\n| Codex | `.cclaw/references/harness-tools/codex.md` | No native ask-user / dispatch; shell + file I/O only by default. |\n\nWhen a new harness is added or an existing one renames a tool, update the corresponding file (and this index) \u2014 do NOT scatter tool names across skill text.\n";