npm - cclaw-cli - Versions diffs - 0.7.0 → 0.8.0 - Mend

cclaw-cli 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dist/content/agents.d.ts +9 -0
package/dist/content/agents.js +177 -6
package/dist/content/contracts.js +1 -1
package/dist/content/examples.d.ts +1 -0
package/dist/content/examples.js +63 -0
package/dist/content/hooks.js +6 -6
package/dist/content/learnings.d.ts +5 -0
package/dist/content/learnings.js +88 -104
package/dist/content/meta-skill.js +155 -44
package/dist/content/session-hooks.js +2 -2
package/dist/content/skills.js +46 -11
package/dist/content/stage-schema.js +36 -10
package/dist/content/start-command.js +63 -17
package/dist/content/status-command.js +2 -2
package/dist/content/subagents.js +169 -0
package/dist/content/templates.js +32 -4
package/dist/content/utility-skills.d.ts +23 -5
package/dist/content/utility-skills.js +204 -42
package/dist/doctor.js +33 -9
package/dist/harness-adapters.js +55 -16
package/dist/install.js +21 -7
package/dist/policy.js +3 -2
package/dist/runs.d.ts +4 -5
package/dist/runs.js +19 -11
package/dist/types.d.ts +4 -4
package/package.json +1 -1

package/dist/content/skills.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { RUNTIME_ROOT } from "../constants.js";
-import { stageExamples } from "./examples.js";
+import { stageExamples, stageGoodBadExamples } from "./examples.js";
 import { selfImprovementBlock } from "./learnings.js";
-import { QUESTION_FORMAT_SPEC, ERROR_BUDGET_SPEC, stageAutoSubagentDispatch, stageSchema } from "./stage-schema.js";
+import { stageAutoSubagentDispatch, stageSchema } from "./stage-schema.js";
 function rationalizationTable(stage) {
     const schema = stageSchema(stage);
     return `| Rationalization | Reality |
@@ -67,6 +67,25 @@ function decisionRecordBlock(stage) {
         return "";
     return `## Decision Record Template\n\nUse this format for every non-trivial architecture or scope decision made during this stage:\n\n\`\`\`\n${fmt}\n\`\`\`\n`;
 }
+function visualCommunicationBlock(stage) {
+    if (stage !== "design")
+        return "";
+    return `## Visual Communication Rules
+Diagrams are load-bearing artifacts in the design stage, not decoration. A diagram that encodes structure wrongly (or hides structure behind generic labels) misleads every downstream reader. Apply these rules to **every** diagram in the design artifact:
+1. **Concrete names, never generic.** "Service A → Service B" is not a diagram; it is a shape. Every node must name a real component the team will build or touch (\`NotificationPublisher\`, \`FeedReadModel\`, \`Stripe webhook handler\`). If you cannot name it concretely, the design is not ready.
+2. **Every arrow is labeled.** Label with the message, action, or protocol it carries (\`publishEvent(user_id, payload)\`, \`GET /snapshot\`, \`dedupe-key upsert\`). Unlabeled arrows silently lose the contract between components.
+3. **Direction is explicit.** Use arrowheads, not bare lines; draw the flow of *data* (not "dependency") unless the diagram type is explicitly a dependency graph, in which case say so in a one-line caption.
+4. **Distinguish sync vs async.** Use a convention and state it once in a legend: e.g. solid arrow = synchronous request/response, dashed arrow = async message via queue/bus, double arrow = two-way. Async edges always name the queue or topic.
+5. **Show at least one failure edge.** Every non-trivial diagram needs one branch that represents the degraded or error path (timeout, reconnect, fallback to cache, poison-message routing). A diagram with only the happy path hides the interesting half of the design.
+6. **One level of detail per diagram.** Do not mix "service-level" and "class-level" on the same canvas. If you need both, produce two diagrams — one at the system boundary, one at the internal module — and cross-reference them.
+7. **Caption, not decoration.** Each diagram gets a one-sentence caption below it stating what the reader should take away ("*Publish path with idempotent outbox; SSE stream reads the projection, not the bus directly*"). If you cannot write the caption in one sentence, the diagram is doing two things at once.
+8. **Prefer text-based formats** (Mermaid, ASCII) over binary images in \`.cclaw/artifacts/\` so diffs stay reviewable. Binary/SVG is allowed when the diagram is already the source of truth elsewhere (e.g. \`docs/architecture/\`) and the artifact embeds a link plus a text-based summary.
+If a diagram cannot satisfy rules 1–5, do NOT include it — a missing diagram is honest; a misleading diagram is worse. Surface the gap in **Unresolved Decisions** and proceed without the diagram until the decisions that would populate it are locked.
+`;
+}
 function contextLoadingBlock(stage) {
     const trace = stageSchema(stage).crossStageTrace;
     const readLines = trace.readsFrom.length > 0
@@ -81,7 +100,7 @@ Before starting stage execution:
 2. Resolve active artifact root: \`.cclaw/artifacts/\`.
 3. Load upstream artifacts required by this stage:
 ${readLines}
-4. Read \`.cclaw/knowledge.md\` and apply relevant entries before making decisions.
+4. Stream \`.cclaw/knowledge.jsonl\` (strict-JSONL knowledge store) and apply relevant entries before making decisions.
 `;
 }
 function whenNotToUseBlock(stage) {
@@ -236,7 +255,7 @@ function progressiveDisclosureBlock(stage) {
 - Primary stage procedure (this file): \`.cclaw/skills/${schema.skillFolder}/SKILL.md\`
 - Orchestrator contract (gate language and handoff): \`.cclaw/commands/${stage}.md\`
 - Artifact structure baseline: \`.cclaw/templates/${schema.artifactFile}\`
-- Runtime state truth source: \`.cclaw/state/flow-state.json\` + \`.cclaw/artifacts/\` + \`.cclaw/knowledge.md\`
+- Runtime state truth source: \`.cclaw/state/flow-state.json\` + \`.cclaw/artifacts/\` + \`.cclaw/knowledge.jsonl\`
 ### See also
 - Meta routing and activation rules: \`.cclaw/skills/using-cclaw/SKILL.md\`
@@ -344,15 +363,14 @@ You MUST complete these steps in order:
 ${checklistItems}
+${stageGoodBadExamples(stage)}
 ${stageExamples(stage)}
 ${namedAntiPatternBlock(stage)}
 ${cognitivePatternsList(stage)}
 ## Interaction Protocol
 ${schema.interactionProtocol.map((item, i) => `${i + 1}. ${item}`).join("\n")}
-${QUESTION_FORMAT_SPEC}
-${ERROR_BUDGET_SPEC}
+**See \`.cclaw/skills/using-cclaw/SKILL.md\` "Shared Decision + Tool-Use Protocol"** for the full AskUserQuestion format, error/retry budget, and the 3-attempt escalation rule. Do not duplicate those rules here — apply them verbatim.
 ${waveExecutionModeBlock(stage)}
 ## Required Gates
@@ -368,15 +386,13 @@ ${reviewSectionsBlock(stage)}
 ${verificationBlock(stage)}
 ${crossStageTraceBlock(stage)}
 ${artifactValidationBlock(stage)}
+${visualCommunicationBlock(stage)}
 ${decisionRecordBlock(stage)}
 ## Common Rationalizations
 ${rationalizationTable(stage)}
-## Blockers
-${schema.blockers.length > 0 ? schema.blockers.map((item) => `- ${item}`).join("\n") : "- None — stage can always proceed"}
 ## Anti-Patterns
-${schema.antiPatterns.map((item) => `- ${item}`).join("\n")}
+${[...schema.antiPatterns, ...schema.blockers].map((item) => `- ${item}`).join("\n")}
 ## Red Flags
 ${schema.redFlags.map((item) => `- ${item}`).join("\n")}
@@ -389,6 +405,25 @@ ${stageTransitionAutoAdvanceBlock(schema)}
 ${progressiveDisclosureBlock(stage)}
 ${selfImprovementBlock(stage)}
 ## Handoff
+Before closing the stage, announce the handoff explicitly so the user can steer. Use the **Handoff Menu** below; never auto-advance silently, even when \`/cc-next\` is available.
+### Handoff Menu
+Offer the user a lettered choice at the end of the stage (use \`AskUserQuestion\` / \`AskQuestion\` when the harness supports it, otherwise plain lettered text):
+- **A) Advance** — run \`/cc-next\` and continue to the next stage. Default when all gates are satisfied and there are no open concerns.
+- **B) Revise this stage** — stay on the current stage; apply the user's feedback, then re-ask for handoff.
+- **C) Pause / park** — save state; stop here. Useful when the user wants to share the artifact with a human reviewer before continuing.
+- **D) Rewind** — move to a prior stage (user names which). Use when downstream work revealed that an earlier stage was wrong.
+- **E) Abandon** — mark the flow as cancelled; no further stages will run. Artifacts remain on disk.
+Recommendation rules:
+- If all required gates are satisfied AND the stage's completion status is \`DONE\`, recommend **A (Advance)**.
+- If completion status is \`DONE_WITH_CONCERNS\`, recommend **B (Revise)** and name the concern.
+- If completion status is \`BLOCKED\`, recommend **B (Revise)** or **C (Pause)** depending on whether the blocker is internal or external.
+Reference data for the user:
 - Next command: \`/cc-next\` (loads whatever stage is current in flow-state)
 - Required artifact: \`.cclaw/artifacts/${schema.artifactFile}\`
 - Stage stays blocked if any required gate is unsatisfied

package/dist/content/stage-schema.js CHANGED Viewed

@@ -195,7 +195,7 @@ const SCOPE = {
         "**Error and Rescue Registry** — For each capability: what breaks, how detected, what fallback."
     ],
     interactionProtocol: [
-        "For scope mode selection: use the Decision Protocol — present expand/selective/hold/reduce as labeled options with trade-offs and mark one as (recommended). **Score each option `Completeness: X/10`** (10 = covers every prime-directive failure mode, four data-flow paths, observability, and deferred handling for the in-scope set; subtract for each gap). Recommend the highest-scoring option; if scores tie, pick the lowest blast radius. Base your recommendation on default heuristics: greenfield -> expand, enhancement -> selective, bugfix/hotfix/refactor -> hold, broad blast radius -> reduce. If AskQuestion/AskUserQuestion is available, send exactly ONE question per call, validate fields against runtime schema, and on schema error immediately fall back to plain-text question instead of retrying guessed payloads.",
+        "For scope mode selection: use the Decision Protocol — present expand/selective/hold/reduce as labeled options with trade-offs and mark one as (recommended). Do NOT use a numeric Completeness rubric; recommend the option that best covers the prime-directive failure modes, four data-flow paths, observability, and deferred handling for the in-scope set with the smallest blast radius. Base your recommendation on default heuristics: greenfield -> expand, enhancement -> selective, bugfix/hotfix/refactor -> hold, broad blast radius -> reduce. If AskQuestion/AskUserQuestion is available, send exactly ONE question per call, validate fields against runtime schema, and on schema error immediately fall back to plain-text question instead of retrying guessed payloads.",
         "Walk through the scope checklist interactively. Each checklist item that surfaces a decision should be presented to the user as a question, not as a monologue. Do not dump all items at once.",
         "Challenge premise and verify the problem framing before anything else.",
         "Take a position on every scope decision. Avoid hedging phrases like 'this could work' or 'there are many ways'; state your recommendation and one concrete condition that would change it.",
@@ -350,6 +350,7 @@ const SCOPE = {
     artifactValidation: [
         { section: "Prime Directives", required: true, validationRule: "For each scoped capability: named failure modes, explicit error surface, four data-flow paths, interaction edge cases, observability expectations, and deferred-item handling." },
         { section: "Premise Challenge", required: true, validationRule: "Must contain explicit answers to: right problem? direct path? what if nothing?" },
+        { section: "Requirements", required: true, validationRule: "Table of stable requirement IDs (R1, R2, R3…) one per row with observable outcome, priority, and source. IDs are assigned once and never renumbered across scope/design/spec/plan/review; dropped requirements stay with Priority `DROPPED`." },
         { section: "Implementation Alternatives", required: true, validationRule: "2-3 options with Name, Summary, Effort, Risk, Pros, Cons, and Reuses. Must include minimal viable and ideal architecture options." },
         { section: "Scope Mode", required: true, validationRule: "Must state selected mode and rationale with default heuristic justification." },
         { section: "Mode-Specific Analysis", required: true, validationRule: "Must document the analysis matching the selected scope mode: EXPAND (10x and delight opportunities), SELECTIVE (hold-scope baseline then cherry-picked expansions), HOLD (minimum-change-set hardening), REDUCE (ruthless cuts and follow-up split)." },
@@ -393,7 +394,7 @@ const DESIGN = {
         "Codebase Investigation — Before any design decision, read the actual code in the blast radius. List every file that will be touched, its current responsibilities, and existing patterns (error handling, naming, test style). Design must conform to discovered patterns, not impose new ones without justification.",
         "Step 0: Scope Challenge — what existing code solves sub-problems? Minimum change set? Complexity check: 8+ files or 2+ new services = complexity smell → flag for possible scope reduction.",
         "Search Before Building — For each technical choice (library, pattern, architecture), search for existing solutions. Label findings: Layer 1 (exact match), Layer 2 (partial match, needs adaptation), Layer 3 (inspiration only), EUREKA (unexpected perfect solution). Default to existing before custom.",
-        "Architecture Review — system design, component boundaries, data flow, scaling, security architecture. For each new codepath: one realistic production failure scenario. **Mandatory:** produce at least one architecture diagram (ASCII, Mermaid, or tool-generated) showing component boundaries and data flow direction.",
+        "Architecture Review — system design, component boundaries, data flow, scaling, security architecture. For each new codepath: one realistic production failure scenario. **Mandatory:** produce at least one architecture diagram (ASCII, Mermaid, or tool-generated) showing component boundaries and data flow direction. Apply the **Visual Communication rules** (see below) — an unlabeled or generic diagram is worse than no diagram, because it pretends to encode decisions it does not.",
         "Code Quality Review — code organization, DRY violations, error handling patterns, over/under-engineering assessment.",
         "Test Review — diagram every new flow, data path, error path. For each: what test type covers it? Does one exist? What is the gap? Produce test plan artifact.",
         "Performance Review — N+1 queries, memory concerns, caching opportunities, slow code paths. What breaks at 10x load? At 100x?",
@@ -405,7 +406,7 @@ const DESIGN = {
     interactionProtocol: [
         "Review architecture decisions section-by-section.",
         "For EACH issue found in a review section, present it ONE AT A TIME. Do NOT batch multiple issues.",
-        "For each issue: use the Decision Protocol — describe concretely with file/line references, present labeled options (A/B/C) with trade-offs, effort estimate (S/M/L/XL), risk level (Low/Med/High), **`Completeness: X/10` per option** (10 = fully addresses architecture/data-flow/failure-modes/test+perf review concerns for the issue, subtract for each unaddressed dimension), and mark one as (recommended). Prefer the highest-scoring option; if scores tie, prefer the lower-risk one. If AskQuestion/AskUserQuestion is available, send exactly ONE question per call, validate fields against runtime schema, and on schema error immediately fall back to plain-text question instead of retrying guessed payloads.",
+        "For each issue: use the Decision Protocol — describe concretely with file/line references, present labeled options (A/B/C) with trade-offs, effort estimate (S/M/L/XL), risk level (Low/Med/High), and mark one as (recommended). Do NOT use a numeric Completeness rubric; recommend the option that best covers architecture, data-flow, failure-modes, test, and perf review concerns for the issue with the lowest risk. If AskQuestion/AskUserQuestion is available, send exactly ONE question per call, validate fields against runtime schema, and on schema error immediately fall back to plain-text question instead of retrying guessed payloads.",
         "Only proceed to the next review section after ALL issues in the current section are resolved.",
         "If a section has no issues, say 'No issues found' and move on.",
         "Do not skip failure-mode mapping.",
@@ -583,7 +584,7 @@ const DESIGN = {
         { section: "Codebase Investigation", required: true, validationRule: "Must list blast-radius files with current responsibilities and discovered patterns." },
         { section: "Search Before Building", required: true, validationRule: "For each technical choice: Layer 1 (exact match), Layer 2 (partial match), Layer 3 (inspiration), EUREKA labels with reuse-first default." },
         { section: "Architecture Boundaries", required: true, validationRule: "Must list component boundaries with ownership." },
-        { section: "Architecture Diagram", required: true, validationRule: "At least one diagram (ASCII, Mermaid, or image) showing component boundaries and data flow direction." },
+        { section: "Architecture Diagram", required: true, validationRule: "At least one diagram (ASCII, Mermaid, or image) showing component boundaries and data flow direction. Diagram must: (1) label every node with a concrete component name (no generic 'Service A/B'), (2) label every arrow with the action or message (no unlabeled arrows), (3) mark direction of data flow explicitly, (4) distinguish synchronous from asynchronous edges (e.g. solid vs dashed, or `sync:` / `async:` prefix), (5) show at least one failure edge or degraded-mode branch when the system has one." },
         { section: "Data Flow", required: true, validationRule: "Must include happy path, nil input, empty input, upstream error paths." },
         { section: "Failure Mode Table", required: true, validationRule: "Each failure mode has: trigger, detection, mitigation, user impact." },
         { section: "Test Strategy", required: true, validationRule: "Must define unit/integration/e2e expectations with coverage targets." },
@@ -748,7 +749,7 @@ const SPEC = {
         traceabilityRule: "Every acceptance criterion must trace to a design decision. Every downstream plan task must trace to a spec criterion."
     },
     artifactValidation: [
-        { section: "Acceptance Criteria", required: true, validationRule: "Each criterion is observable, measurable, and falsifiable. Table should include a Design Decision Ref column tracing back to design artifact." },
+        { section: "Acceptance Criteria", required: true, validationRule: "Each criterion is observable, measurable, and falsifiable. Table must include a Requirement Ref column linking to R# IDs in 02-scope.md and a Design Decision Ref column tracing back to design artifact. AC IDs (AC-1, AC-2…) are stable across revisions — dropped ACs stay with Priority `DROPPED`." },
         { section: "Edge Cases", required: true, validationRule: "At least one boundary and one error condition per criterion." },
         { section: "Constraints and Assumptions", required: true, validationRule: "All implicit assumptions surfaced. Constraints have sources." },
         { section: "Testability Map", required: true, validationRule: "Each criterion maps to a concrete test description with verification approach (unit, integration, e2e, manual) and command or manual steps." },
@@ -1037,7 +1038,10 @@ const TDD = {
         { name: "Regression Paranoia", description: "Assume every change breaks something until the full suite proves otherwise. Partial test runs are lies of omission." },
         { name: "Refactor-as-Hygiene", description: "Refactoring is not optional cleanup — it is the third leg of TDD. GREEN without REFACTOR accumulates mess. REFACTOR without GREEN breaks things." },
         { name: "Evidence Over Anecdote", description: "Every claim about test state must be backed by captured output. 'It passed' without terminal evidence is not evidence. 'I saw it fail' without the failure output is not RED. Capture commands, outputs, and results — not summaries from memory." },
-        { name: "Characterization First", description: "Before changing existing behavior, write characterization tests that capture current behavior as-is. These tests document what the system does today — even if that behavior is wrong. Only after the characterization suite is green do you add the new RED test for the desired change. This prevents accidental behavior destruction during refactoring." }
+        { name: "Characterization First", description: "Before changing existing behavior, write characterization tests that capture current behavior as-is. These tests document what the system does today — even if that behavior is wrong. Only after the characterization suite is green do you add the new RED test for the desired change. This prevents accidental behavior destruction during refactoring." },
+        { name: "Test Pyramid Shape", description: "Healthy test suites look like a pyramid: many small fast tests at the base, fewer medium integration tests in the middle, few large end-to-end tests at the top. Each layer catches a different class of bug; none of them substitutes for another. If your suite is top-heavy (mostly E2E) it is slow and flaky; if it is base-only it misses integration contracts. During TDD, default to the smallest layer that can prove the behavior." },
+        { name: "Prove-It Pattern (bug fixes)", description: "For any reported regression or hotfix, the FIRST test is a reproduction — it must fail without your fix, pass with your fix, and fail again if the fix is reverted. This is the only way to prove you fixed the reported bug and not a superficially similar one. Skipping this step is how bugs come back two releases later wearing a different name." },
+        { name: "Test Size Model", description: "Size tests by scope, not by name: Small = pure logic, no I/O, <50ms; Medium = one process boundary, possibly filesystem or an in-memory DB; Large = multi-process / network / real external service. Small tests are the default; escalate to Medium only when a real boundary must be exercised, and to Large only for end-to-end user journeys. Record the size class in the TDD artifact so reviewers can sanity-check the pyramid shape." }
     ],
     reviewSections: [
         {
@@ -1061,6 +1065,26 @@ const TDD = {
                 "Is traceability complete: every change links to plan task ID and spec criterion?"
             ],
             stopGate: true
+        },
+        {
+            title: "Test Pyramid + Size Audit",
+            evaluationPoints: [
+                "Is the tests-added count skewed toward Small (unit) tests, with Medium and Large used only when a real boundary justifies the cost?",
+                "Does every newly added test declare a size class (Small / Medium / Large) — either inline in the test file or in the TDD artifact table?",
+                "Are Large tests reserved for genuine end-to-end user journeys (not substitutes for unit coverage)?",
+                "Has the slice avoided using Medium/Large tests to paper over testability problems that should be fixed at the design layer?"
+            ],
+            stopGate: false
+        },
+        {
+            title: "Prove-It Reproduction (bug-fix slices)",
+            evaluationPoints: [
+                "Does the artifact identify this slice as a bug fix, and if so, include a reproduction test checked in alongside the fix?",
+                "Is there captured RED evidence from running the reproduction WITHOUT the fix applied?",
+                "Is there captured GREEN evidence from the same reproduction AFTER the fix was applied?",
+                "Is there a note confirming the reproduction test fails again if the fix is reverted (or equivalent evidence that the test is actually pinned to this fix)?"
+            ],
+            stopGate: false
         }
     ],
     completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
@@ -1077,7 +1101,9 @@ const TDD = {
         { section: "REFACTOR Notes", required: true, validationRule: "What changed, why, behavior preservation confirmed." },
         { section: "Traceability", required: true, validationRule: "Plan task ID and spec criterion linked." },
         { section: "Verification Ladder", required: false, validationRule: "If present: per-slice verification tier (static, command, behavioral, human) with evidence for highest tier reached." },
-        { section: "Coverage Targets", required: false, validationRule: "If present: per-module or per-code-type coverage thresholds with current values and measurement commands." }
+        { section: "Coverage Targets", required: false, validationRule: "If present: per-module or per-code-type coverage thresholds with current values and measurement commands." },
+        { section: "Test Pyramid Shape", required: false, validationRule: "If present: per-slice count of Small/Medium/Large tests added, to let reviewers verify the suite is not drifting top-heavy." },
+        { section: "Prove-It Reproduction", required: false, validationRule: "Required for bug-fix slices: original failing reproduction test (RED without fix), passing output with fix (GREEN), and a note confirming the test fails again if the fix is reverted." }
     ],
     namedAntiPattern: {
         title: "Code Before Failing Test",
@@ -1125,7 +1151,7 @@ const REVIEW = {
         "Run Layer 1 (spec compliance) completely before starting Layer 2.",
         "In each review section, present findings ONE AT A TIME. Do NOT batch.",
         "Classify every finding as Critical, Important, or Suggestion.",
-        "For each Critical finding: use the Decision Protocol — present resolution options (A/B/C) with trade-offs, **score each option `Completeness: X/10`** (10 = fully closes the finding with no carry-over risk; subtract for partial fixes, deferred follow-ups, or new risk introduced), and mark one as (recommended). Prefer the highest-scoring option; if scores tie, prefer the option with the smallest blast radius. If AskQuestion/AskUserQuestion is available, send exactly ONE question per call, validate fields against runtime schema, and on schema error immediately fall back to plain-text question instead of retrying guessed payloads.",
+        "For each Critical finding: use the Decision Protocol — present resolution options (A/B/C) with trade-offs, and mark one as (recommended). Do NOT use a numeric Completeness rubric; recommend the option that fully closes the finding with no carry-over risk and the smallest blast radius. If AskQuestion/AskUserQuestion is available, send exactly ONE question per call, validate fields against runtime schema, and on schema error immediately fall back to plain-text question instead of retrying guessed payloads.",
         "Resolve all critical blockers before ship.",
         "For final verdict: use AskQuestion/AskUserQuestion only if runtime schema is confirmed; otherwise collect verdict with a plain-text single-choice prompt (APPROVED / APPROVED_WITH_CONCERNS / BLOCKED).",
         "**STOP.** Do NOT proceed to ship until the user provides an explicit verdict."
@@ -1336,7 +1362,7 @@ const SHIP = {
     interactionProtocol: [
         "Run preflight checks before any release action.",
         "Document release notes and rollback plan explicitly.",
-        "For finalization mode: use the Decision Protocol — present modes as labeled options (A/B/C/D) with consequences, **score each option `Completeness: X/10`** (10 = fully addresses release blast-radius, rollback readiness, observability, and stakeholder communication), and mark one as (recommended). Prefer the highest-scoring option; if scores tie, prefer the most reversible one. If AskQuestion/AskUserQuestion is available, send exactly ONE question per call, validate fields against runtime schema, and on schema error immediately fall back to plain-text question instead of retrying guessed payloads.",
+        "For finalization mode: use the Decision Protocol — present modes as labeled options (A/B/C/D) with consequences, and mark one as (recommended). Do NOT use a numeric Completeness rubric; recommend the mode that best addresses release blast-radius, rollback readiness, observability, and stakeholder communication — ties go to the most reversible option. If AskQuestion/AskUserQuestion is available, send exactly ONE question per call, validate fields against runtime schema, and on schema error immediately fall back to plain-text question instead of retrying guessed payloads.",
         "Do not proceed if critical blockers remain from review.",
         "**STOP.** Present finalization options and wait for user selection before executing any finalization action."
     ],
@@ -1455,7 +1481,7 @@ const SHIP = {
         { section: "Monitoring", required: false, validationRule: "If applicable: what metrics/logs to watch post-deploy. Risk note if no monitoring." },
         { section: "Finalization", required: true, validationRule: "Exactly one finalization enum token selected. Execution result documented. Worktree cleaned if applicable." },
         { section: "Completion Status", required: false, validationRule: "If present: exactly one of SHIPPED, SHIPPED_WITH_EXCEPTIONS, BLOCKED. Exceptions documented when applicable." },
-        { section: "Compound Step", required: false, validationRule: "Optional retrospective: at least one bullet of the form 'Insight: ... | Action: append [compound] entry to .cclaw/knowledge.md', or an explicit 'No compound insight this run.' line." }
+        { section: "Compound Step", required: false, validationRule: "Optional retrospective: at least one bullet of the form 'Insight: ... | Action: append [compound] entry to .cclaw/knowledge.jsonl', or an explicit 'No compound insight this run.' line." }
     ],
     namedAntiPattern: {
         title: "Green CI Means Safe to Merge",

package/dist/content/start-command.js CHANGED Viewed

@@ -25,30 +25,69 @@ This is the **recommended way to start** working with cclaw. Use \`/cc-next\` fo
 ## HARD-GATE
 - **Do not** skip reading \`${flowPath}\` — always check current state before acting.
-- **Do not** start implementation stages directly from \`/cc <prompt>\` — always begin at brainstorm.
+- **Do not** start implementation stages directly from \`/cc <prompt>\` — always begin at the first stage of the resolved track (brainstorm for standard, spec for quick).
+- **Do not** start a stage pipeline for a task that is not a software change (pure question, non-software task, conversation).
 ## Algorithm
 ### With prompt (\`/cc <text>\`)
-1. Read \`${flowPath}\`.
-2. If flow already has completed stages beyond brainstorm, warn the user that starting a new brainstorm will reset progress. Ask for confirmation before proceeding.
-3. **Track heuristic** — classify the idea text and **recommend** a track (the user can override before any state mutation):
+1. **Phase 0 — Task classification.** Before any stage routing, classify the prompt:
+   | Class | Signals | Action |
+   |---|---|---|
+   | **non-software** | legal text / docs / marketing copy / meeting notes / therapy-style conversation | Respond directly, do NOT open a stage, do NOT mutate flow state. |
+   | **pure-question** | "how does X work?", "explain Y", "what are the trade-offs of Z?" | Answer directly, do NOT open a stage. |
+   | **trivial** | typo, one-liner, rename, config tweak, copy change, version bump with zero behavior change | Fast-path: skip \`brainstorm\` and \`scope\`, seed \`00-idea.md\`, move straight to \`design\` or \`spec\` depending on whether an interface change is involved. |
+   | **software — bug fix with repro** | regression / hotfix / named symptom + repro steps | Fast-path: set track to \`quick\`, seed \`04-spec.md\` with the reproduction, enter \`tdd\` with a RED reproduction test first. |
+   | **software — standard** | feature, refactor, migration, integration, architecture change | Full 8-stage flow starting at \`brainstorm\`. |
+   Record the chosen class in \`.cclaw/artifacts/00-idea.md\` on the \`Class:\` line. Do NOT silently treat a non-software task as software.
+2. **Phase 1 — Origin-document discovery.** Before asking the user for context, scan for existing requirements/plan artifacts and merge them into initial context:
+   - \`.cclaw/artifacts/00-idea.md\` if it already exists (resumed flow).
+   - Common origin locations: \`docs/prd/**\`, \`docs/rfcs/**\`, \`docs/adr/**\`, \`docs/design/**\`, \`specs/**\`, \`prd/**\`, \`rfc/**\`, \`design/**\`, root-level \`PRD.md\` / \`SPEC.md\` / \`DESIGN.md\` / \`REQUIREMENTS.md\` / \`ROADMAP.md\`.
+   - Summarize each discovered doc in \`00-idea.md\` under a \`Discovered context\` section with path + 1-line summary.
+   - If an origin doc contradicts the prompt, surface the conflict to the user before routing.
+3. **Phase 2 — Tech-stack + version detection.** Sniff the repo for stack + language versions and record under \`Stack:\`:
+   - Node: \`package.json\` \`engines\` / \`volta\` / \`packageManager\` / \`devDependencies\`.
+   - Python: \`pyproject.toml\` / \`requirements*.txt\` / \`.python-version\`.
+   - Go: \`go.mod\` (module + Go version).
+   - Rust: \`Cargo.toml\` (\`[package]\` + \`rust-version\`).
+   - Java/Kotlin: \`pom.xml\` / \`build.gradle*\` + toolchain version.
+   - Containers: \`Dockerfile\`, \`docker-compose*.yml\`.
+   - CI: \`.github/workflows\`, \`.gitlab-ci.yml\`.
+   Skip detection quietly if no markers are found — do NOT invent a stack.
+4. Read \`${flowPath}\`.
+5. If flow already has completed stages beyond brainstorm, warn the user that starting a new brainstorm will reset progress. Ask for confirmation before proceeding.
+6. **Track heuristic** — classify the idea text and **recommend** a track (the user can override before any state mutation):
    - **quick** (\`spec → tdd → review → ship\`) — single-purpose work where the spec is essentially already known.
      Triggers (case-insensitive substring or close variant): \`bug\`, \`bugfix\`, \`fix\`, \`hotfix\`, \`patch\`, \`typo\`, \`regression\`, \`copy change\`, \`rename\`, \`bump\`, \`upgrade dep\`, \`config tweak\`, \`docs only\`, \`comment\`, \`lint\`, \`format\`, \`small\`, \`tiny\`, \`one-liner\`, \`revert\`.
    - **standard** (full 8 stages — default) — anything that introduces a new capability, touches multiple modules, or has unclear scope.
      Triggers: \`new feature\`, \`add\`, \`build\`, \`design\`, \`refactor\`, \`migration\`, \`platform\`, \`architecture\`, \`endpoint\`, \`schema\`, \`api\`, \`integrate\`, \`workflow\`, \`onboarding\`, or any prompt that does not match quick triggers.
    - When triggers conflict (e.g. "small refactor that touches 5 modules") prefer **standard** — quick is opt-in and only safe when scope is genuinely tiny.
-4. Present the recommendation as a single decision with explicit options:
+7. Present the recommendation as a single decision with explicit options:
    > \`Recommended track: <quick|standard>\` because \`<one-line reason citing matched triggers>\`.
    > Override? (A) keep \`<recommended>\`  (B) switch to \`<other>\`  (C) cancel.
    If \`AskQuestion\`/\`AskUserQuestion\` is available, send exactly ONE question; on schema error, fall back to plain text.
-5. Persist the chosen track to \`${flowPath}\` (\`track\` field). Compute \`skippedStages\` from the track and write that too. Use the **first stage of the chosen track** as \`currentStage\` (quick → \`spec\`, standard → \`brainstorm\`).
-6. Write the prompt to \`.cclaw/artifacts/00-idea.md\` as the raw idea capture, and append a \`Track:\` line referencing the chosen track and the matched heuristic.
-7. Load the **first-stage skill for the chosen track** and its command file:
-   - quick → \`.cclaw/skills/specification-authoring/SKILL.md\` + \`.cclaw/commands/spec.md\`
-   - standard → \`.cclaw/skills/brainstorming/SKILL.md\` + \`.cclaw/commands/brainstorm.md\`
-8. Execute that stage with the prompt as initial context.
+8. Persist the chosen track to \`${flowPath}\` (\`track\` field). Compute \`skippedStages\` from the track and write that too. Use the **first stage of the chosen track** as \`currentStage\` (quick → \`spec\`, standard → \`brainstorm\`, trivial fast-path → \`design\` or \`spec\` per Phase 0).
+9. Write the prompt to \`.cclaw/artifacts/00-idea.md\` with the following header lines: \`Class:\` (from Phase 0), \`Track:\` (chosen track + matched heuristic), \`Stack:\` (from Phase 2 detection, or \`unknown\`), and a \`Discovered context\` section if Phase 1 found origin docs.
+10. Load the **first-stage skill for the chosen track** and its command file:
+    - quick → \`.cclaw/skills/specification-authoring/SKILL.md\` + \`.cclaw/commands/spec.md\`
+    - standard → \`.cclaw/skills/brainstorming/SKILL.md\` + \`.cclaw/commands/brainstorm.md\`
+    - trivial fast-path → design or spec skill per Phase 0 decision.
+11. Execute that stage with the prompt + Phase 1/Phase 2 context as initial input.
+### Reclassification on discovery
+If during any stage the agent discovers evidence that contradicts the initial Phase 0 / track decision (e.g. a supposedly \`trivial\` change turns out to require schema migration, a \`quick\` bug fix turns out to need design discussion, an origin doc reveals scope 3× larger than the prompt), STOP and re-classify:
+1. Surface the new evidence in plain text.
+2. Propose the updated \`Class\` + \`Track\` with a one-line reason.
+3. Use the Decision Protocol to let the user accept, override, or cancel.
+4. On acceptance: update \`00-idea.md\` with a \`Reclassification:\` entry (old → new, reason, ISO timestamp) and update \`flow-state.json\` accordingly — do NOT rewrite prior artifacts, they stay as history.
 ### Without prompt (\`/cc\`)
@@ -88,12 +127,15 @@ Do **not** silently discard an existing flow when the user provides a prompt. If
 ### Path A: \`/cc <prompt>\`
-1. Read \`${flowPath}\`.
-2. If \`completedStages\` is non-empty:
+1. **Task classification (Phase 0).** Decide whether the prompt is \`software-standard\`, \`software-trivial\`, \`software-bugfix\`, \`pure-question\`, or \`non-software\`. Non-software and pure-question exit immediately — answer directly, do not open a stage.
+2. **Origin-document discovery (Phase 1).** Scan for \`docs/prd/**\`, \`docs/rfcs/**\`, \`docs/adr/**\`, \`docs/design/**\`, \`specs/**\`, root-level \`PRD.md\` / \`SPEC.md\` / \`DESIGN.md\` / \`REQUIREMENTS.md\`. Summarize any hits in \`00-idea.md\` under \`Discovered context\`. Surface conflicts with the prompt before routing.
+3. **Stack detection (Phase 2).** Inspect \`package.json\` engines, \`pyproject.toml\`, \`go.mod\`, \`Cargo.toml\`, \`pom.xml\`, \`build.gradle*\`, \`Dockerfile\`, \`docker-compose*.yml\`, and CI configs. Record stack + versions on the \`Stack:\` line. Do not invent stack details.
+4. Read \`${flowPath}\`.
+5. If \`completedStages\` is non-empty:
    - Inform: "You have an active flow at stage **{currentStage}** with {N} completed stages. Starting a new brainstorm will reset progress."
    - Ask: "Continue with reset? (A) Yes, start fresh (B) No, resume current flow"
    - If (B) → switch to Path B behavior.
-3. **Classify the idea** using the heuristic below and present a single track recommendation. Wait for explicit confirmation or override before mutating any state.
+6. **Classify the idea** using the heuristic below and present a single track recommendation. Wait for explicit confirmation or override before mutating any state.
    **Track heuristic** (lowercase substring match against the user prompt):
@@ -104,9 +146,13 @@ Do **not** silently discard an existing flow when the user provides a prompt. If
    - On conflict, prefer \`standard\` (quick is opt-in for genuinely tiny work).
    - Always state the recommendation as a one-line reason citing the matched trigger.
-4. Persist the chosen track in \`${flowPath}\` (\`track\` + \`skippedStages\`). Set \`currentStage\` to the first stage of the chosen track (\`quick\` → \`spec\`, \`standard\` → \`brainstorm\`). Reset gate catalog.
-5. Write \`${RUNTIME_ROOT}/artifacts/00-idea.md\` with the user's prompt and an explicit \`Track:\` line capturing the heuristic decision.
-6. Load and execute the **first stage skill of the chosen track** (\`brainstorming\` for standard, \`specification-authoring\` for quick) plus its matching command file.
+7. Persist the chosen track in \`${flowPath}\` (\`track\` + \`skippedStages\`). Set \`currentStage\` to the first stage of the chosen track (\`quick\` → \`spec\`, \`standard\` → \`brainstorm\`, trivial fast-path → \`design\` or \`spec\`). Reset gate catalog.
+8. Write \`${RUNTIME_ROOT}/artifacts/00-idea.md\` with the user's prompt plus header lines: \`Class:\`, \`Track:\`, \`Stack:\`, and a \`Discovered context\` section from Phase 1.
+9. Load and execute the **first stage skill of the chosen track** (\`brainstorming\` for standard, \`specification-authoring\` for quick) plus its matching command file.
+### Reclassification on discovery
+If mid-stage evidence contradicts the initial Class/Track decision (the "trivial" change needs a migration, the "quick" bug fix needs architecture work, an origin doc multiplies scope), STOP and re-classify using the Decision Protocol. Record \`Reclassification:\` in \`00-idea.md\` with old/new class and a one-line reason. Do NOT rewrite prior artifacts — they stay as history.
 ### Path B: \`/cc\` (no arguments)

package/dist/content/status-command.js CHANGED Viewed

@@ -8,7 +8,7 @@ function delegationLogPath() {
     return `${RUNTIME_ROOT}/state/delegation-log.json`;
 }
 function knowledgePath() {
-    return `${RUNTIME_ROOT}/knowledge.md`;
+    return `${RUNTIME_ROOT}/knowledge.jsonl`;
 }
 function contextModePath() {
     return `${RUNTIME_ROOT}/state/context-mode.json`;
@@ -131,7 +131,7 @@ a read-only command.
    - Prefer \`${checkpointPath()}\` when \`stage === currentStage\` and \`timestamp\` parses as ISO 8601.
    - Else scan \`${stageActivityPath()}\` from tail for the most recent entry whose \`stage === currentStage\`; use its \`ts\`.
    - Render \`<X>d<Y>h\`, \`<X>h<Y>m\`, \`<X>m\`, or \`(unknown)\`.
-5. Read \`${RUNTIME_ROOT}/knowledge.md\`. If missing or empty → knowledge highlights are \`(none recorded)\`.
+5. Read \`${RUNTIME_ROOT}/knowledge.jsonl\`. If missing or empty → knowledge highlights are \`(none recorded)\`. Parse each line as JSON and surface its \`trigger\`/\`action\`.
 6. For each gate in \`stageGateCatalog[currentStage].required\`:
    - Satisfied if present in \`passed\` and absent from \`blocked\`.
 7. Build and print the status block (see command contract for layout).

package/dist/content/subagents.js CHANGED Viewed

@@ -10,6 +10,11 @@ const SUBAGENT_AGENT_NAMES = [
     "security-reviewer",
     "test-author",
     "doc-updater",
+    "repo-research-analyst",
+    "learnings-researcher",
+    "framework-docs-researcher",
+    "best-practices-researcher",
+    "git-history-analyzer",
 ];
 export function subagentDrivenDevSkill() {
     return `---
@@ -59,6 +64,20 @@ If delegation tooling is unavailable in the active harness, run the same control
 - **Use a more capable model** for high-ambiguity or high-risk analysis (security review, architecture conflicts, spec contradiction resolution).
 - During review-heavy stages, prefer **mixed routing**: faster first-pass triage + escalate only high-severity/low-confidence findings.
+### Cost-aware routing (tier table)
+| Tier | Use for | Example agents |
+|---|---|---|
+| \`deep\` | one heavy reasoning pass per stage (planner, final reconciliation) | planner |
+| \`balanced\` | spec compliance + code/security review with enough context | spec-reviewer, code-reviewer, security-reviewer, test-author |
+| \`fast\` | read-only research / narrow machine checks / docs updates — safe to fan out | repo-research-analyst, learnings-researcher, framework-docs-researcher, best-practices-researcher, git-history-analyzer, doc-updater |
+**Routing rules:**
+- At most ONE \`deep\` agent per stage (planner OR final reconciliation, not both).
+- \`balanced\` agents are default for review-stage specialists.
+- \`fast\` agents are the only tier you should fan out in parallel (3-5 at a time is fine).
+- Never escalate a \`fast\` agent's output directly to ship decisions — always have a \`balanced\` reviewer consume the evidence first.
 ## HARD-GATE
 **Never dispatch a subagent without a concrete, self-contained task description pasted into the prompt. Do not pass file references the subagent must read to understand its task.**
@@ -556,6 +575,146 @@ Process (mandatory):
    - Report: FILES_EDITED, GREEN_COMMAND_RUN, REFACTOR_NOTES, STATUS: DONE|BLOCKED.
 \`\`\`
+`;
+}
+function repoResearchAnalystEnhancedBody() {
+    return `
+## Task Tool Delegation
+Launch **read-only repo exploration** at the start of brainstorm/scope/design so the primary agent plans on a grounded map, not guesses. Run as a \`fast\` tier agent — cheap to fan out alongside learnings-researcher and best-practices-researcher.
+\`\`\`
+You are a repo research analyst subagent.
+TASK DOMAIN: {1-sentence description of the feature/fix/refactor being planned}
+REPO HINTS: {known directories, module names, patterns the primary agent already knows}
+OUT OF SCOPE: {paths not to read (large vendor dirs, generated code)}
+Deliverables:
+- Relevant modules: list of \`path — purpose\` (cite file:line on ambiguous claims).
+- Reuse candidates: list of \`file:line — why this absorbs the change\`.
+- Ownership hints: CODEOWNERS / README / comment signals.
+- Gaps: capabilities NOT yet present that the task would need.
+Rules:
+- Read-only. Do NOT edit files.
+- Cite file:line for every claim; never invent paths.
+- If the scope is too large to fully explore, say so and bound your search.
+\`\`\`
+`;
+}
+function learningsResearcherEnhancedBody() {
+    return `
+## Task Tool Delegation
+Dispatch before any non-trivial stage to stream \`.cclaw/knowledge.jsonl\` and surface prior learnings. Cheap \`fast\` tier — fan out with other research agents.
+\`\`\`
+You are a learnings researcher subagent.
+TASK DESCRIPTION: {verbatim prompt + current stage}
+DOMAIN HINTS: {keywords from Task Classification / Origin Docs}
+Deliverables:
+- Matched rules: list of \`trigger → action (confidence)\`.
+- Matched patterns: list of \`trigger → action (confidence)\`.
+- Matched lessons: list of \`trigger → action (confidence)\`.
+- Matched compounds: list of \`trigger → action (confidence)\`.
+- No-match note (if nothing relevant exists).
+Rules:
+- Read-only; NEVER rewrite or delete entries.
+- Return at most 10 entries, ranked by confidence then recency.
+- Quote the entries verbatim — do NOT paraphrase.
+\`\`\`
+`;
+}
+function frameworkDocsResearcherEnhancedBody() {
+    return `
+## Task Tool Delegation
+Use for any task that depends on a specific framework/library/SDK/CLI. Prefer context7 MCP when available for version-accurate docs; otherwise WebSearch/WebFetch official sources.
+\`\`\`
+You are a framework documentation researcher subagent.
+LIBRARY + VERSION: {name + resolved version from lockfile / pyproject / go.mod / Cargo.toml / pom.xml / build.gradle}
+TASK USAGE: {which APIs the task will actually call}
+CONTEXT7: {"available" | "not available"}
+Deliverables:
+- Key APIs: list of signatures the task will touch.
+- Breaking changes since the last major release relevant to the task.
+- Gotchas: deprecated paths, version-gated flags, platform caveats.
+- Source: URL(s) or MCP reference used.
+Rules:
+- Never invent APIs. Prefer silence + UNKNOWN over speculation.
+- Tie every statement to an authoritative source; avoid blog posts when official docs exist.
+\`\`\`
+`;
+}
+function bestPracticesResearcherEnhancedBody() {
+    return `
+## Task Tool Delegation
+Use when the task touches a well-known domain (auth, caching, rate limiting, observability, accessibility, etc.) and the primary agent needs a short, citable best-practice summary.
+\`\`\`
+You are a best-practices researcher subagent.
+DOMAIN: {one word, e.g. auth, caching, rate-limiting, a11y, observability, retries}
+SUB-PROBLEM: {narrow one-sentence statement of what the task is actually deciding}
+Deliverables:
+- Recommended practices: 5-8 entries of \`practice — rationale — source\`.
+- Common traps / anti-patterns: list of \`trap — why it fails — source\`.
+- Decision hooks: 1-3 explicit questions the primary agent must answer.
+Rules:
+- Cite 3-5 authoritative sources (official docs, IETF/W3C/OWASP, well-known standards).
+- If the domain has no authoritative answer, say so; do NOT substitute opinion.
+\`\`\`
+`;
+}
+function gitHistoryAnalyzerEnhancedBody() {
+    return `
+## Task Tool Delegation
+Use when the task touches existing code, so the primary agent can see prior attempts, reverts, and owners before proposing changes.
+\`\`\`
+You are a git history analyzer subagent.
+IMPACTED PATHS: {list of files/directories the task plans to touch}
+WINDOW: {default 90 days; adjust only if explicitly needed}
+Commands to run (read-only):
+- git log --follow -n 20 -- <path>
+- git blame <path>
+- git log --since="<window>" --grep="revert|regression" -- <path>
+- git log --since="<window>" --format="%an" -- <path> | sort | uniq -c | sort -nr
+Deliverables:
+- Recent themes: 3-5 bullets on what changed lately per path.
+- Revert/regression signals: list with commit SHAs.
+- Owners: best-guess from blame + committer frequency.
+- Collision risks: in-flight refactors/migrations visible in log.
+Rules:
+- Read-only. Never amend history, never git push.
+- If a path is new (no history), say so explicitly rather than fabricating context.
+\`\`\`
 `;
 }
 function docUpdaterEnhancedBody() {
@@ -597,6 +756,16 @@ export function enhancedAgentBody(agentName) {
             return testAuthorEnhancedBody();
         case "doc-updater":
             return docUpdaterEnhancedBody();
+        case "repo-research-analyst":
+            return repoResearchAnalystEnhancedBody();
+        case "learnings-researcher":
+            return learningsResearcherEnhancedBody();
+        case "framework-docs-researcher":
+            return frameworkDocsResearcherEnhancedBody();
+        case "best-practices-researcher":
+            return bestPracticesResearcherEnhancedBody();
+        case "git-history-analyzer":
+            return gitHistoryAnalyzerEnhancedBody();
         default:
             return `