npm - cclaw-cli - Versions diffs - 0.7.1 → 0.9.0 - Mend

cclaw-cli 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/content/agents.d.ts +9 -0
package/dist/content/agents.js +177 -6
package/dist/content/examples.d.ts +17 -0
package/dist/content/examples.js +275 -4
package/dist/content/harness-tool-refs.d.ts +20 -0
package/dist/content/harness-tool-refs.js +240 -0
package/dist/content/meta-skill.js +203 -33
package/dist/content/skills.js +106 -49
package/dist/content/stage-schema.js +63 -11
package/dist/content/start-command.js +63 -17
package/dist/content/subagents.js +169 -0
package/dist/content/templates.js +44 -6
package/dist/content/utility-skills.d.ts +2 -1
package/dist/content/utility-skills.js +141 -2
package/dist/doctor.js +77 -0
package/dist/harness-adapters.js +55 -16
package/dist/install.js +19 -0
package/package.json +1 -1

package/dist/content/agents.d.ts CHANGED Viewed

@@ -33,6 +33,15 @@ export declare function agentMarkdown(agent: AgentDefinition): string;
  * Markdown table mapping Cclaw stage entry points to specialist agents.
  */
 export declare function agentRoutingTable(): string;
+/**
+ * Cost tier routing: keep heavy reasoning on the \`deep\` tier (planner, a
+ * single post-review reconciliation), push read-only research and narrow
+ * machine-only checks to the \`fast\` tier, and default review to \`balanced\`.
+ * This table is emitted into AGENTS.md so harness users understand why
+ * certain specialists are automatically fan-out-able without blowing the
+ * context budget.
+ */
+export declare function agentCostTierTable(): string;
 /**
  * AGENTS.md-ready section describing Cclaw’s specialist delegation model.
  */

package/dist/content/agents.js CHANGED Viewed

@@ -170,6 +170,157 @@ export const CCLAW_AGENTS = [
             "**Scope control:** only update what needs updating — **do not rewrite** docs that remain correct.",
         ].join("\n"),
     },
+    {
+        name: "repo-research-analyst",
+        description: "PROACTIVE at the start of brainstorm/scope/design: delegates deep codebase exploration — existing modules, ownership boundaries, duplication, and reuse candidates — so the primary agent can plan from a grounded map instead of guesses.",
+        tools: ["Read", "Grep", "Glob"],
+        model: "fast",
+        activation: "proactive",
+        relatedStages: ["brainstorm", "scope", "design"],
+        body: [
+            "You are a **repo research analyst**.",
+            "",
+            "Scan the codebase for existing modules, helpers, patterns, and ownership boundaries relevant to the current task. Deliver a grounded map the primary agent can plan against.",
+            "",
+            "**Process:**",
+            "",
+            "1. Identify the task domain keywords (nouns, verbs, known file/module names).",
+            "2. Glob for obvious homes (by convention: `src/**`, `packages/**`, `apps/**`, etc.).",
+            "3. Grep for existing implementations of the same capability.",
+            "4. Enumerate adjacent tests/fixtures that already cover the area.",
+            "5. Flag duplication, near-duplicates, and reuse candidates with file:line.",
+            "",
+            "**Output schema:**",
+            "",
+            "- `Relevant modules:` bulleted list with `path — 1-line purpose`.",
+            "- `Reuse candidates:` bulleted list with `file:line — why this could absorb the change`.",
+            "- `Ownership hints:` any CODEOWNERS / README / comment signals.",
+            "- `Gaps:` what does NOT yet exist that the task would need.",
+            "",
+            "**Role boundary:** read-only. Do NOT edit files. Cite `file:line` for every claim; never guess paths.",
+        ].join("\n"),
+    },
+    {
+        name: "learnings-researcher",
+        description: "PROACTIVE before every non-trivial stage: streams `.cclaw/knowledge.jsonl` and surfaces the entries (rules, patterns, lessons, compounds) most relevant to the current task before the primary agent commits to a direction.",
+        tools: ["Read", "Grep", "Glob"],
+        model: "fast",
+        activation: "proactive",
+        relatedStages: ["brainstorm", "scope", "design", "spec", "plan", "tdd", "review", "ship"],
+        body: [
+            "You are a **project learnings researcher**.",
+            "",
+            "Stream `.cclaw/knowledge.jsonl` and surface the entries most relevant to the current task. The goal is to prevent the primary agent from re-learning things the project already wrote down.",
+            "",
+            "**Process:**",
+            "",
+            "1. Parse `.cclaw/knowledge.jsonl` (one JSON object per line, strict schema).",
+            "2. Match entries by `domain`, `stage`, and substring overlap with the current task description.",
+            "3. Rank by `confidence` then recency (`created`).",
+            "4. Group by `type` (rule, pattern, lesson, compound).",
+            "5. Return the top 10 entries verbatim with a one-line reason each.",
+            "",
+            "**Output schema:**",
+            "",
+            "- `Matched rules:` list of `trigger → action (confidence)`.",
+            "- `Matched patterns:` list of `trigger → action (confidence)`.",
+            "- `Matched lessons:` list of `trigger → action (confidence)`.",
+            "- `Matched compounds:` list of `trigger → action (confidence)`.",
+            "- `No-match note:` if nothing relevant exists, say so explicitly.",
+            "",
+            "**Role boundary:** read-only. Never rewrite or delete entries — corrections are appended by the primary agent via `/cc-learn add`.",
+        ].join("\n"),
+    },
+    {
+        name: "framework-docs-researcher",
+        description: "PROACTIVE during design/spec/tdd for tasks that touch a specific framework, library, or SDK: fetches authoritative, version-aware documentation (via context7 when available) so implementation matches the live API, not training priors.",
+        tools: ["Read", "Grep", "Glob", "WebSearch", "WebFetch"],
+        model: "fast",
+        activation: "on-demand",
+        relatedStages: ["design", "spec", "tdd", "review"],
+        body: [
+            "You are a **framework documentation researcher**.",
+            "",
+            "Fetch authoritative, version-aware docs for any library/framework/SDK/CLI the current task depends on. The goal is to replace model priors with live API references.",
+            "",
+            "**Process:**",
+            "",
+            "1. Identify the exact library + version from the repo (package.json, pyproject, go.mod, etc.).",
+            "2. If context7 MCP is available, use it first — it returns docs keyed to the installed version.",
+            "3. Otherwise WebSearch / WebFetch for the official docs site or the tagged release changelog.",
+            "4. Capture: public API signatures, breaking changes since a major version back, migration notes, and any deprecated paths relevant to the task.",
+            "",
+            "**Output schema:**",
+            "",
+            "- `Library + version:` name and resolved version.",
+            "- `Key APIs:` bullet list of signatures the task will touch.",
+            "- `Breaking changes:` notable deltas relevant to the task.",
+            "- `Gotchas:` footguns, deprecated paths, version-gated flags.",
+            "- `Source:` URL(s) or MCP reference used.",
+            "",
+            "**Role boundary:** never invent APIs. If docs are unclear, say `UNKNOWN` and surface the gap instead of guessing.",
+        ].join("\n"),
+    },
+    {
+        name: "best-practices-researcher",
+        description: "PROACTIVE during design/spec when the task touches a well-known domain (auth, caching, rate limiting, observability, accessibility, etc.): delivers a short, opinionated best-practice summary grounded in citable sources.",
+        tools: ["Read", "Grep", "Glob", "WebSearch", "WebFetch"],
+        model: "fast",
+        activation: "on-demand",
+        relatedStages: ["brainstorm", "scope", "design", "spec", "review"],
+        body: [
+            "You are a **best-practices researcher**.",
+            "",
+            "For a named domain (auth, caching, rate limiting, observability, accessibility, etc.), deliver a short, opinionated best-practice summary that is citable and current.",
+            "",
+            "**Process:**",
+            "",
+            "1. Restate the domain + narrow it to the sub-problem the task is solving.",
+            "2. Gather 3–5 authoritative sources (official docs, IETF / W3C / OWASP references, well-known community standards).",
+            "3. Surface the 5–8 practices most relevant to the task, each with one-line rationale + source.",
+            "4. Flag practices that look common but are anti-patterns today.",
+            "",
+            "**Output schema:**",
+            "",
+            "- `Domain + sub-problem:` one sentence.",
+            "- `Recommended practices:` list of `practice — rationale — source`.",
+            "- `Common traps:` list of `trap — why it fails — source`.",
+            "- `Decision hooks:` 1–3 explicit questions the primary agent must answer before moving on.",
+            "",
+            "**Role boundary:** never prescribe a choice without citing a source. If the domain has no authoritative answer, say so.",
+        ].join("\n"),
+    },
+    {
+        name: "git-history-analyzer",
+        description: "PROACTIVE when a task touches an existing module: reads git log/blame/diff to surface prior changes, failed attempts, revert patterns, and code owners that bias the current plan.",
+        tools: ["Read", "Grep", "Glob", "Bash"],
+        model: "fast",
+        activation: "on-demand",
+        relatedStages: ["scope", "design", "plan", "review"],
+        body: [
+            "You are a **git history analyzer**.",
+            "",
+            "Read commit history, blame, and recent diffs for files the current task touches. The goal is to expose prior context (attempts, reverts, owners, flaky surfaces) the primary agent would otherwise miss.",
+            "",
+            "**Process:**",
+            "",
+            "1. For each impacted path: `git log --follow -n 20 -- <path>` and note the themes.",
+            "2. `git blame` the hot lines to surface current owners.",
+            "3. Look for `Revert ...`, `Reopen ...`, or repeated regressions in the last 90 days.",
+            "4. Check CODEOWNERS / committer frequency for ownership signal.",
+            "5. Flag any recent refactors or migrations in-flight that this task might collide with.",
+            "",
+            "**Output schema:**",
+            "",
+            "- `Impacted paths:` list.",
+            "- `Recent themes:` 3–5 bullets summarizing what changed lately in those paths.",
+            "- `Revert/regression signals:` list with commit SHAs.",
+            "- `Owners:` best-guess owners with supporting evidence.",
+            "- `Collision risks:` in-flight branches/migrations that overlap.",
+            "",
+            "**Role boundary:** read-only; never amend history, never `git push`. Use `git` commands only.",
+        ].join("\n"),
+    },
 ];
 import { enhancedAgentBody } from "./subagents.js";
 /**
@@ -213,13 +364,29 @@ ${taskDelegation}
 export function agentRoutingTable() {
     return `| Stage Entry | Primary Agent | Supporting Agents |
 |---|---|---|
-| Brainstorm (start with \`/cc <idea>\`) | planner | — |
-| Scope / Design / Spec / Plan (advance via \`/cc-next\`) | planner | security-reviewer on design, spec-reviewer on spec |
-| TDD (via \`/cc-next\`) | test-author | doc-updater |
-| Review (via \`/cc-next\`) | spec-reviewer, code-reviewer, security-reviewer | — |
+| Brainstorm (start with \`/cc <idea>\`) | planner | repo-research-analyst, learnings-researcher, best-practices-researcher |
+| Scope / Design / Spec / Plan (advance via \`/cc-next\`) | planner | security-reviewer on design, spec-reviewer on spec, framework-docs-researcher + git-history-analyzer on design/plan |
+| TDD (via \`/cc-next\`) | test-author | doc-updater, framework-docs-researcher |
+| Review (via \`/cc-next\`) | spec-reviewer, code-reviewer, security-reviewer | best-practices-researcher, git-history-analyzer |
 | Ship (via \`/cc-next\`) | — | doc-updater |
 `;
 }
+/**
+ * Cost tier routing: keep heavy reasoning on the \`deep\` tier (planner, a
+ * single post-review reconciliation), push read-only research and narrow
+ * machine-only checks to the \`fast\` tier, and default review to \`balanced\`.
+ * This table is emitted into AGENTS.md so harness users understand why
+ * certain specialists are automatically fan-out-able without blowing the
+ * context budget.
+ */
+export function agentCostTierTable() {
+    return `| Tier | Use for | Example agents |
+|---|---|---|
+| \`deep\` | one heavy plan or one final reconciliation per stage | planner |
+| \`balanced\` | spec compliance and code/security review with enough context | spec-reviewer, code-reviewer, security-reviewer, test-author |
+| \`fast\` | read-only research / narrow machine checks / docs updates; safe to fan out 3-5× in parallel | repo-research-analyst, learnings-researcher, framework-docs-researcher, best-practices-researcher, git-history-analyzer, doc-updater |
+`;
+}
 /**
  * AGENTS.md-ready section describing Cclaw’s specialist delegation model.
  */
@@ -232,8 +399,12 @@ ${agentRoutingTable()}
 **Activation modes:**
 - **Mandatory:** MUST be used when the related stage runs (spec-reviewer, code-reviewer, and security-reviewer during review; planner during scope and design; test-author during tdd; doc-updater during ship). Even if a change has no trust-boundary impact, security-reviewer produces an explicit no-change attestation.
-- **Proactive:** Should be used automatically when context matches (planner for complex features, security-reviewer escalations outside review, doc-updater on behavior changes)
-- **On-demand:** Invoked only when explicitly requested
+- **Proactive:** Should be used automatically when context matches (planner for complex features, repo-research-analyst / learnings-researcher at the start of brainstorm/scope/design, security-reviewer escalations outside review, doc-updater on behavior changes).
+- **On-demand:** Invoked only when explicitly requested, but strongly suggested in the matching contexts (framework-docs-researcher when the task touches a specific library/SDK, best-practices-researcher when the task touches a well-known domain, git-history-analyzer when the task touches existing code).
+### Cost-aware routing
+${agentCostTierTable()}
 **Agent files:** \`.cclaw/agents/{name}.md\` — each contains YAML frontmatter with tools and model tier.
 `;

package/dist/content/examples.d.ts CHANGED Viewed

@@ -1,2 +1,19 @@
 import type { FlowStage } from "../types.js";
+export declare function stageGoodBadExamples(stage: FlowStage): string;
+export declare const STAGE_EXAMPLES_REFERENCE_DIR = "references/stages";
+export declare function stageExamplesReferencePath(stage: FlowStage): string;
+/**
+ * Returns the full example artifact body as a standalone reference markdown
+ * file. Materialized under .cclaw/references/stages/<stage>-examples.md so
+ * the always-rendered skill body can link instead of inlining.
+ */
+export declare function stageExamplesReferenceMarkdown(stage: FlowStage): string | null;
+/**
+ * Returns the short inline pointer rendered directly inside the stage skill.
+ * Replaces the previous always-inline ~50-100 line fenced block and
+ * delivers true progressive disclosure: the full example lives in a
+ * reference file loaded on demand.
+ */
 export declare function stageExamples(stage: FlowStage): string;
+export type ExampleDomain = "web" | "cli" | "library" | "data-pipeline";
+export declare function stageDomainExamples(stage: FlowStage): string;

package/dist/content/examples.js CHANGED Viewed

@@ -432,14 +432,92 @@ Execution rule: complete and verify each wave before starting the next wave.
 - Execution result: PR #42 created via \`gh pr create\`; CI passed; squash-merged to main.
 - PR URL: https://github.com/example/repo/pull/42`,
 };
-export function stageExamples(stage) {
+const GOOD_BAD_EXAMPLES = {
+    brainstorm: {
+        good: "Problem: release checks are fragile and inconsistent between CI and local runs; invalid metadata sometimes reaches npm publish. Success: invalid release preconditions are caught before publish with explicit operator feedback, in both CI and local workflows. Constraints: no new runtime dependencies.",
+        bad: "Problem: releases are broken. Success: make them better. Constraints: be careful.",
+        lesson: "\"Make it better\" is not a success criterion — an agent cannot know when it is done. State the observable condition that proves success."
+    },
+    scope: {
+        good: "In scope: in-app notification feed, SSE delivery path, read/unread state, retry on transient failures. Out of scope: email/SMS/push providers, per-user preferences. Deferred: WebSocket channel, rich media, full-text search.",
+        bad: "In scope: notifications. Out of scope: stuff we are not doing. Deferred: v2.",
+        lesson: "Vague boundaries get relitigated in every subsequent stage. Enumerate concrete capabilities on each side — \"stuff we are not doing\" is not a decision."
+    },
+    design: {
+        good: "Failure: SSE connection drop. Trigger: network interruption. Detection: client heartbeat timeout (30s). Mitigation: auto-reconnect with exponential backoff + REST snapshot fallback. User impact: ≤10s delay, no data loss.",
+        bad: "Failure: network errors. Mitigation: retry and log. User impact: users may see issues sometimes.",
+        lesson: "A failure row without a detection signal and a bounded user impact is aspirational, not a design. Name the trigger, the detector, and the recovery behavior."
+    },
+    spec: {
+        good: "AC-1: Given a signed-in user with an active session, when the server publishes a new notification event for that user, the client feed shows the new item within 5 seconds without a full page reload.",
+        bad: "AC-1: Users should see their notifications quickly and reliably, with a good user experience.",
+        lesson: "Spec criteria must be observable, measurable, and falsifiable. \"Quickly\" is a feeling; \"within 5 seconds without a full page reload\" is a test."
+    },
+    plan: {
+        good: "T-2: Implement publisher + outbox write path. Acceptance: AC-1. Verification: `pnpm vitest run tests/integration/publisher.test.ts`. Depends on: T-1. Effort: M.",
+        bad: "T-2: Build the backend. Verify: manual testing. Effort: a few days.",
+        lesson: "A task without a single acceptance criterion and a reproducible verification command is a wish. If you cannot say how you will know it is done, you cannot ship it."
+    },
+    tdd: {
+        good: "RED: `pnpm vitest run tests/unit/dedupe-feed.test.ts` → `publishToOutbox is not a function`. GREEN (after minimal impl): same command, 47/47 pass, full suite. REFACTOR: extracted `mergeLatestByDedupeKey`; suite still 47/47.",
+        bad: "Wrote the publisher code. Tests pass now. Will add unit tests later when I have time.",
+        lesson: "Code written before a failing test is guessing validated after the fact. The RED failure IS the specification — without it, the GREEN pass proves nothing about the intended behavior."
+    },
+    review: {
+        good: "R-1 Critical: snapshot endpoint returns newest N rows but does not guarantee consistency with stream cursor — users can miss items between snapshot and subscribe. Evidence: integration test `notification-consistency.test.ts:22-58`. Status: open.",
+        bad: "Looks good overall. A few small things could be polished, maybe refactor the merge logic. LGTM.",
+        lesson: "\"LGTM\" is not a review — it is a signature on whatever the author shipped. Every finding needs a severity, a falsifiable description, evidence, and a status."
+    },
+    ship: {
+        good: "Rollback trigger: error rate on `/notifications/stream` >5% for 5 minutes, or p95 publish-to-visible lag >10s. Steps: `git revert <merge-sha> && git push origin main` then redeploy; run `2026_04_12_notifications_cursor_down.sql` before traffic. Verification: error rate returns to baseline within 10 minutes.",
+        bad: "Rollback plan: revert the commit if anything goes wrong.",
+        lesson: "\"Revert if anything goes wrong\" leaves the on-call engineer to invent the plan at 2 a.m. The rollback trigger is an operational contract: state the signal, the command, and the verification."
+    }
+};
+export function stageGoodBadExamples(stage) {
+    const sample = GOOD_BAD_EXAMPLES[stage];
+    if (!sample)
+        return "";
+    return [
+        "## Good vs Bad (at-a-glance)",
+        "",
+        "Contrasting samples to calibrate the quality bar for this stage. Read before writing the artifact — mirror the **Good** shape, avoid the **Bad** shape.",
+        "",
+        "**Good**",
+        "",
+        "> " + sample.good,
+        "",
+        "**Bad**",
+        "",
+        "> " + sample.bad,
+        "",
+        "**Why it matters:** " + sample.lesson,
+        ""
+    ].join("\n");
+}
+export const STAGE_EXAMPLES_REFERENCE_DIR = "references/stages";
+export function stageExamplesReferencePath(stage) {
+    return `.cclaw/${STAGE_EXAMPLES_REFERENCE_DIR}/${stage}-examples.md`;
+}
+/**
+ * Returns the full example artifact body as a standalone reference markdown
+ * file. Materialized under .cclaw/references/stages/<stage>-examples.md so
+ * the always-rendered skill body can link instead of inlining.
+ */
+export function stageExamplesReferenceMarkdown(stage) {
     const examples = STAGE_EXAMPLES[stage];
     if (!examples)
-        return "";
+        return null;
     return [
-        "## Examples",
+        `---`,
+        `stage: ${stage}`,
+        `name: ${stage}-stage-examples`,
+        `description: "Full sample artifact for the ${stage} stage. Loaded only when an agent explicitly needs a complete example; the stage skill links here rather than inlining."`,
+        `---`,
         "",
-        "Concrete artifact samples. These mirror the exact heading levels agents must use when authoring the stage artifact (all H2 `##` sections), so they are presented inside a markdown fence to avoid collapsing into the SKILL outline.",
+        `# ${stage} stage — full artifact sample`,
+        "",
+        `This file is linked from \`.cclaw/skills/<${stage}-stage>/SKILL.md\` under **Examples → See also**. The sample uses H2 headings that mirror the artifact a cclaw session must produce, so the markdown is wrapped in a fence to avoid collapsing into the outline.`,
         "",
         "```markdown",
         examples,
@@ -447,3 +525,196 @@ export function stageExamples(stage) {
         ""
     ].join("\n");
 }
+/**
+ * Returns the short inline pointer rendered directly inside the stage skill.
+ * Replaces the previous always-inline ~50-100 line fenced block and
+ * delivers true progressive disclosure: the full example lives in a
+ * reference file loaded on demand.
+ */
+export function stageExamples(stage) {
+    const examples = STAGE_EXAMPLES[stage];
+    if (!examples)
+        return "";
+    return [
+        "## Examples",
+        "",
+        `Full artifact sample for this stage lives at \`${stageExamplesReferencePath(stage)}\`. Open it when you need a complete reference; do NOT paste the example into the artifact verbatim — it is a shape guide, not a template.`,
+        "",
+        "Summary of what the reference covers:",
+        ...exampleSummaryBullets(stage),
+        ""
+    ].join("\n");
+}
+function exampleSummaryBullets(stage) {
+    const headings = STAGE_EXAMPLE_SECTION_HEADINGS[stage] ?? [];
+    if (headings.length === 0)
+        return ["- Full artifact structure."];
+    return headings.map((heading) => `- ${heading}`);
+}
+// Kept in sync with STAGE_EXAMPLES above so the inline summary matches the
+// reference file without duplicating the heavy text. Update whenever the
+// sample in STAGE_EXAMPLES gains or loses a top-level section.
+const STAGE_EXAMPLE_SECTION_HEADINGS = {
+    brainstorm: [
+        "Problem framing (problem, success, constraints)",
+        "Candidate approaches with trade-offs",
+        "Recommended direction + open questions",
+        "Clarification log and decision record"
+    ],
+    scope: [
+        "In-scope / out-of-scope / deferred lists with concrete capabilities",
+        "Requirements table with stable R# IDs",
+        "Boundary stress-tests and non-negotiables",
+        "Decision record for premise challenges"
+    ],
+    design: [
+        "Blast-radius file list",
+        "Mandatory architecture diagram (Mermaid)",
+        "Failure-mode table with detection + mitigation",
+        "Test strategy + performance budget",
+        "Completion dashboard + unresolved decisions"
+    ],
+    spec: [
+        "Acceptance-criteria table (observable, measurable, falsifiable)",
+        "Requirement-ref column tying each AC back to an R# from scope",
+        "Verification-approach column",
+        "Approval block"
+    ],
+    plan: [
+        "Dependency graph + dependency waves",
+        "Task list with effort + minutes estimate per task",
+        "Acceptance mapping (every AC → task IDs)",
+        "No-Placeholder scan row + WAIT_FOR_CONFIRM marker"
+    ],
+    tdd: [
+        "RED evidence per slice (failing test output)",
+        "Acceptance mapping per slice",
+        "GREEN evidence (full-suite pass)",
+        "REFACTOR notes with behavior-preservation confirmation",
+        "Test-pyramid shape + prove-it reproduction when applicable"
+    ],
+    review: [
+        "Spec-compliance findings (Layer 1)",
+        "Code-quality findings (Layer 2)",
+        "Severity, evidence, and status per finding",
+        "Go / no-go verdict"
+    ],
+    ship: [
+        "Release checklist (version, changelog, tag, artifacts)",
+        "Rollback plan with trigger, steps, verification",
+        "Runbook (how to verify the release post-deploy)",
+        "Sign-off block"
+    ]
+};
+const DOMAIN_LABELS = {
+    web: "Web app (full-stack)",
+    cli: "CLI tool",
+    library: "Library / SDK",
+    "data-pipeline": "Data pipeline / ETL"
+};
+const STAGE_DOMAIN_SAMPLES = {
+    spec: [
+        {
+            domain: "web",
+            label: "AC",
+            body: "AC-W1: Given a signed-in admin viewing `/dashboard/orders`, when an order's status changes server-side, the row updates within 2s without a full navigation (assert via `pnpm playwright test orders-live.spec.ts`)."
+        },
+        {
+            domain: "cli",
+            label: "AC",
+            body: "AC-C1: Given `cclaw init --claude` run in an empty directory, exit code is `0`, `.cclaw/config.yaml` is created with `harnesses: [claude]`, and stderr contains no warnings (asserted by `tests/integration/init-sync-doctor.test.ts`)."
+        },
+        {
+            domain: "library",
+            label: "AC",
+            body: "AC-L1: `validateHookDocument(obj)` returns `{ ok: true }` for every fixture under `tests/fixtures/valid-hooks/` and `{ ok: false, errors: [...] }` with at least one message for every fixture under `tests/fixtures/invalid-hooks/`."
+        },
+        {
+            domain: "data-pipeline",
+            label: "AC",
+            body: "AC-D1: For any `orders.csv` input, the pipeline emits exactly one row per `(order_id, event_ts)` pair to `warehouse.fact_orders`; running the job twice on the same input is idempotent (row count unchanged, verified by `dbt test --select fact_orders`)."
+        }
+    ],
+    plan: [
+        {
+            domain: "web",
+            label: "Task",
+            body: "T-W-3 `[~4m]`: Wire SSE endpoint `/api/orders/stream` into `useOrderFeed` hook. AC-W1. Verify: `pnpm playwright test orders-live.spec.ts`. Depends on: T-W-2."
+        },
+        {
+            domain: "cli",
+            label: "Task",
+            body: "T-C-2 `[~3m]`: Add `--dry-run` flag to `cclaw archive` that prints the would-be-archived run IDs to stdout and exits 0. AC-C3. Verify: `node dist/cli.js archive --dry-run` + `tests/unit/cli-parse.test.ts`."
+        },
+        {
+            domain: "library",
+            label: "Task",
+            body: "T-L-1 `[~5m]`: Expose `validateHookDocument` from the package root and re-export its types. AC-L1. Verify: `pnpm build && node -e \"console.log(require('./dist').validateHookDocument)\"`."
+        },
+        {
+            domain: "data-pipeline",
+            label: "Task",
+            body: "T-D-2 `[~5m]`: Add dedup step keyed on `(order_id, event_ts)` between `raw.orders` and `fact_orders`. AC-D1. Verify: `dbt run --select fact_orders+ && dbt test --select fact_orders`."
+        }
+    ],
+    tdd: [
+        {
+            domain: "web",
+            label: "RED→GREEN→REFACTOR",
+            body: "RED: `pnpm playwright test orders-live.spec.ts` → timeout waiting for row update. GREEN: wired SSE event → row rerenders via `useOrderFeed`. REFACTOR: extracted `applyOrderEvent(row, event)` pure helper; 87/87 tests still pass."
+        },
+        {
+            domain: "cli",
+            label: "RED→GREEN→REFACTOR",
+            body: "RED: `tests/unit/cli-parse.test.ts` expects `--dry-run` flag → `unknown option` error. GREEN: added to the Zod parser; 19/19 pass. REFACTOR: hoisted the dry-run formatter into `src/cli/format.ts` shared with `status`."
+        },
+        {
+            domain: "library",
+            label: "RED→GREEN→REFACTOR",
+            body: "RED: `tests/unit/hook-schema.test.ts` imports `validateHookDocument` from package root → `export not found`. GREEN: added re-export + types. REFACTOR: renamed internal `__validate` to `validateHookDocument` so the export name matches the source."
+        },
+        {
+            domain: "data-pipeline",
+            label: "RED→GREEN→REFACTOR",
+            body: "RED: `dbt test --select fact_orders` → `unique test on (order_id, event_ts)` fails on re-run. GREEN: added `row_number()` dedup in the staging model. REFACTOR: extracted the dedup CTE into `int_orders_deduped` for reuse by `fact_returns`."
+        }
+    ],
+    ship: [
+        {
+            domain: "web",
+            label: "Rollback",
+            body: "Trigger: error rate on `/api/orders/stream` > 2% for 5 minutes, or p95 latency > 1.5s for 10 minutes. Steps: `vercel rollback <deployment>`; run `2026_04_14_revert_orders_stream.sql` before traffic returns. Verify: error rate returns to baseline within 10 minutes on the `orders-live` dashboard."
+        },
+        {
+            domain: "cli",
+            label: "Rollback",
+            body: "Trigger: `cclaw init --claude` exits non-zero on a fresh tmp dir, OR `cclaw doctor` regresses (FAIL count increases) on the smoke matrix. Steps: `npm unpublish cclaw-cli@<version>` (within the 72h window) or `npm deprecate cclaw-cli@<version> '<reason>'`; publish the previous patch. Verify: `npx cclaw-cli@latest --version` prints the previous version."
+        },
+        {
+            domain: "library",
+            label: "Rollback",
+            body: "Trigger: any consumer reports `validateHookDocument` no longer exported, OR the CI `dual-package-check` job fails. Steps: `npm deprecate cclaw-cli@<version> 'broken package export — use <prev>'`; publish the previous minor with a patch bump; emit changelog `## Rollback` entry. Verify: a smoke consumer project `pnpm add cclaw-cli@latest` imports cleanly."
+        },
+        {
+            domain: "data-pipeline",
+            label: "Rollback",
+            body: "Trigger: `dbt test --select fact_orders` fails on production run, OR downstream dashboard MAU count drops >10% week-over-week. Steps: disable the new model via `dbt_project.yml` + `dbt run --select state:modified` with the previous git SHA; rerun backfill `dagster asset materialize fact_orders --partition <yesterday>`. Verify: `fact_orders` row count within ±1% of the previous week's baseline."
+        }
+    ]
+};
+export function stageDomainExamples(stage) {
+    const samples = STAGE_DOMAIN_SAMPLES[stage];
+    if (!samples || samples.length === 0)
+        return "";
+    const lines = [
+        "## Living Examples by Domain",
+        "",
+        "Use the row matching your project shape to calibrate voice, specificity, and command choice. The rows are deliberately terse — copy the **shape**, not the text.",
+        ""
+    ];
+    for (const sample of samples) {
+        lines.push(`**${DOMAIN_LABELS[sample.domain]} — ${sample.label}:** ${sample.body}`);
+        lines.push("");
+    }
+    return lines.join("\n");
+}

package/dist/content/harness-tool-refs.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+/**
+ * Per-harness tool-mapping reference files.
+ *
+ * Addresses A.1#4: the four supported harnesses (claude, cursor, opencode, codex)
+ * expose different primitive names for the same capabilities (ask-user,
+ * delegate/Task, web fetch, file edit, code execution, ...). cclaw's stage skills
+ * need to pick the right name at runtime without bloating every stage with per-harness
+ * if/else ladders.
+ *
+ * Each file below is short (one table per capability), authoritative, and materialised
+ * at `.cclaw/references/harness-tools/<harness>.md`. Stage skills and the meta-skill
+ * cite the folder instead of duplicating the mappings inline.
+ *
+ * When a new harness is added (or an existing one renames a tool), update the
+ * corresponding entry here — do NOT scatter tool names across skill text.
+ */
+import type { HarnessId } from "../types.js";
+export declare const HARNESS_TOOL_REFS_DIR = "references/harness-tools";
+export declare function harnessToolRefMarkdown(harness: HarnessId): string;
+export declare const HARNESS_TOOL_REFS_INDEX_MD = "---\nname: Harness tool maps\ndescription: \"Index file. One reference per supported harness \u2014 cite the per-harness file instead of hardcoding tool names in stage skills.\"\n---\n\n# Harness Tool Maps\n\ncclaw supports four harnesses; each exposes different primitive names for the same capabilities. Stage skills and utility skills cite the file matching the currently active harness and fall back to plain-text equivalents for capabilities that the harness lacks.\n\n| Harness | File | Notes |\n|---|---|---|\n| Claude Code | `.cclaw/references/harness-tools/claude.md` | Richest tool surface (AskUserQuestion, Task, WebFetch, WebSearch, MCP, \u2026). |\n| Cursor | `.cclaw/references/harness-tools/cursor.md` | Near-parity with Claude; uses `AskQuestion` instead of `AskUserQuestion`. |\n| OpenCode | `.cclaw/references/harness-tools/opencode.md` | No native ask-user / dispatch; more plain-text fallbacks. |\n| Codex | `.cclaw/references/harness-tools/codex.md` | No native ask-user / dispatch; shell + file I/O only by default. |\n\nWhen a new harness is added or an existing one renames a tool, update the corresponding file (and this index) \u2014 do NOT scatter tool names across skill text.\n";