npm - cclaw-cli - Versions diffs - 0.3.0 → 0.5.0 - Mend

cclaw-cli 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +7 -7
package/dist/constants.d.ts +2 -2
package/dist/constants.js +6 -3
package/dist/content/agents.d.ts +1 -1
package/dist/content/agents.js +12 -16
package/dist/content/contracts.js +2 -3
package/dist/content/examples.js +4 -3
package/dist/content/hooks.js +4 -6
package/dist/content/learnings.js +1 -1
package/dist/content/meta-skill.js +21 -32
package/dist/content/next-command.d.ts +3 -3
package/dist/content/next-command.js +78 -63
package/dist/content/observe.js +22 -8
package/dist/content/session-hooks.js +1 -1
package/dist/content/skills.js +42 -52
package/dist/content/stage-schema.d.ts +1 -1
package/dist/content/stage-schema.js +75 -160
package/dist/content/start-command.d.ts +10 -0
package/dist/content/start-command.js +109 -0
package/dist/content/subagents.js +3 -3
package/dist/content/templates.d.ts +2 -2
package/dist/content/templates.js +5 -5
package/dist/content/utility-skills.js +2 -2
package/dist/doctor.js +10 -41
package/dist/harness-adapters.js +13 -42
package/dist/install.js +10 -6
package/dist/policy.js +1 -10
package/dist/types.d.ts +1 -1
package/dist/types.js +1 -2
package/package.json +1 -1
package/dist/content/autoplan.d.ts +0 -7
package/dist/content/autoplan.js +0 -344

package/dist/content/observe.js CHANGED Viewed

@@ -226,10 +226,9 @@ stage_index() {
     design) echo 3 ;;
     spec) echo 4 ;;
     plan) echo 5 ;;
-    test) echo 6 ;;
-    build) echo 7 ;;
-    review) echo 8 ;;
-    ship) echo 9 ;;
+    tdd) echo 6 ;;
+    review) echo 7 ;;
+    ship) echo 8 ;;
     *) echo 0 ;;
   esac
 }
@@ -262,8 +261,8 @@ is_preimplementation_stage() {
 detect_target_stage() {
   local text="$1"
-  for stage in brainstorm scope design spec plan test build review ship; do
-    if printf '%s' "$text" | grep -Eq "(/cc-$stage|cc-$stage)\\b"; then
+  for stage in brainstorm scope design spec plan tdd review ship; do
+    if printf '%s' "$text" | grep -Eq "(/cc-$stage|cc-$stage)([^[:alnum:]_-]|$)"; then
       printf '%s' "$stage"
       return 0
     fi
@@ -272,7 +271,22 @@ detect_target_stage() {
   return 0
 }
+is_flow_progression_command() {
+  local text="$1"
+  if printf '%s' "$text" | grep -Eq '(/cc-next|cc-next)([^[:alnum:]_-]|$)'; then
+    return 0
+  fi
+  if printf '%s' "$text" | grep -Eq '/cc([^[:alnum:]_-]|$)'; then
+    return 0
+  fi
+  return 1
+}
 TARGET_STAGE=$(detect_target_stage "$PAYLOAD_LOWER")
+FLOW_COMMAND_INVOKED=0
+if is_flow_progression_command "$PAYLOAD_LOWER"; then
+  FLOW_COMMAND_INVOKED=1
+fi
 if [ -n "$TARGET_STAGE" ] && [ "$CURRENT_STAGE" != "none" ]; then
   CURRENT_IDX=$(stage_index "$CURRENT_STAGE")
   TARGET_IDX=$(stage_index "$TARGET_STAGE")
@@ -305,7 +319,7 @@ if is_preimplementation_stage "$CURRENT_STAGE" && ! is_plan_mode_safe_tool "$TOO
   fi
 fi
-if [ -n "$TARGET_STAGE" ]; then
+if [ -n "$TARGET_STAGE" ] || [ "$FLOW_COMMAND_INVOKED" -eq 1 ]; then
   if [ "$LAST_FLOW_READ_AT" -le 0 ] || [ "$NOW_EPOCH" -le 0 ] || [ $((NOW_EPOCH - LAST_FLOW_READ_AT)) -gt "$MAX_FLOW_READ_AGE_SEC" ]; then
     if [ -n "$REASONS" ]; then
       REASONS="$REASONS,stage_invocation_without_recent_flow_read"
@@ -357,7 +371,7 @@ PY
 fi
 if [ -n "$REASONS" ]; then
-  NOTE="Cclaw workflow guard: detected potential flow violation (\${REASONS}). Re-read ${RUNTIME_ROOT}/state/flow-state.json, avoid source edits before build/test stages, and continue from current stage ordering."
+  NOTE="Cclaw workflow guard: detected potential flow violation (\${REASONS}). Re-read ${RUNTIME_ROOT}/state/flow-state.json, avoid source edits before tdd stage, and continue from current stage ordering."
   if command -v jq >/dev/null 2>&1; then
     ENTRY=$(jq -n -c \
       --arg ts "$TS" \

package/dist/content/session-hooks.js CHANGED Viewed

@@ -90,7 +90,7 @@ When creating a checkpoint at session boundaries:
 \`\`\`json
 {
-  "stage": "build",
+  "stage": "tdd",
   "status": "in_progress",
   "lastCompletedStep": "GREEN for task T2",
   "remainingSteps": ["REFACTOR T2", "RED T3", "GREEN T3", "REFACTOR T3"],

package/dist/content/skills.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { RUNTIME_ROOT } from "../constants.js";
 import { stageExamples } from "./examples.js";
 import { selfImprovementBlock } from "./learnings.js";
-import { nextCclawCommand, QUESTION_FORMAT_SPEC, ERROR_BUDGET_SPEC, stageAutoSubagentDispatch, stageSchema } from "./stage-schema.js";
+import { QUESTION_FORMAT_SPEC, ERROR_BUDGET_SPEC, stageAutoSubagentDispatch, stageSchema } from "./stage-schema.js";
 function artifactFileName(artifactPath) {
     const parts = artifactPath.split("/");
     return parts[parts.length - 1] ?? artifactPath;
@@ -133,7 +133,7 @@ Mandatory agents for this stage: ${mandatoryList}. Stage transition is BLOCKED u
 On session stop or stage completion, the agent should write delegation entries to \`${delegationLogRel}\` for audit.
 `;
 }
-const VERIFICATION_STAGES = ["build", "review", "ship"];
+const VERIFICATION_STAGES = ["tdd", "review", "ship"];
 function waveExecutionModeBlock(stage) {
     const schema = stageSchema(stage);
     if (!schema.waveExecutionAllowed) {
@@ -145,55 +145,50 @@ After plan approval (**WAIT_FOR_CONFIRM** / \`plan_wait_for_confirm\` satisfied)
 `;
 }
-function stageRequiresExplicitPause(schema) {
-    const pauseRules = [
-        /\bWAIT_FOR_CONFIRM\b/,
-        /\*\*STOP\.\*\*/,
-        /Do NOT auto-advance/i,
-        /Do NOT proceed until user/i,
-        /wait for explicit user approval/i,
-        /wait for explicit approval/i,
-        /explicitly pause/i
-    ];
-    const stageText = [
-        schema.hardGate,
-        ...schema.checklist,
-        ...schema.interactionProtocol,
-        ...schema.process,
-        ...schema.exitCriteria
-    ];
-    return stageText.some((line) => pauseRules.some((rule) => rule.test(line)));
-}
-function stageTransitionAutoAdvanceBlock(schema, nextCommand) {
-    if (schema.next === "done") {
-        return "";
+function stageCompletionProtocol(schema) {
+    const stage = schema.stage;
+    const gateIds = schema.requiredGates.map((g) => g.id);
+    const gateList = gateIds.map((id) => `\`${id}\``).join(", ");
+    const nextStage = schema.next === "done" ? null : schema.next;
+    const stateUpdate = nextStage
+        ? `   - Set \`currentStage\` to \`"${nextStage}"\`
+   - Add \`"${stage}"\` to \`completedStages\` array
+   - Move all gate IDs for this stage (${gateList}) into \`stageGateCatalog.${stage}.passed\`
+   - Clear \`stageGateCatalog.${stage}.blocked\``
+        : `   - Add \`"${stage}"\` to \`completedStages\` array
+   - Move all gate IDs for this stage (${gateList}) into \`stageGateCatalog.${stage}.passed\`
+   - Clear \`stageGateCatalog.${stage}.blocked\``;
+    let nextAction;
+    if (nextStage) {
+        const nextSchema = stageSchema(nextStage);
+        const nextDescription = nextSchema.skillDescription.charAt(0).toLowerCase() + nextSchema.skillDescription.slice(1);
+        nextAction = `3. Tell the user:\n\n   > **Stage \`${stage}\` complete.** Next: **${nextStage}** — ${nextDescription}\n   >\n   > Run \`/cc-next\` to continue.`;
     }
-    if (stageRequiresExplicitPause(schema)) {
-        return `## Stage transition (gate chain)
-**STOP.** This stage requires explicit user confirmation before advancing.
-Even if project config has \`autoAdvance: true\`, this stage's pause rule takes precedence.
-Do NOT auto-advance after gates pass. Present a summary of completed gates and suggest \`${nextCommand}\`, then wait for explicit user approval.
-`;
+    else {
+        nextAction = `3. Tell the user:\n\n   > **Flow complete.** All stages finished. The project is ready for release.`;
     }
-    return `## Stage transition (gate chain)
+    return `## Stage Completion Protocol
+When all required gates are satisfied and the artifact is written:
-After all gates pass, suggest the next command (\`${nextCommand}\`).
-If project config at \`${RUNTIME_ROOT}/config.yaml\` has \`autoAdvance: true\`, proceed automatically.
-Otherwise, **STOP** and wait for user confirmation before advancing.
+1. **Update \`${RUNTIME_ROOT}/state/flow-state.json\`:**
+${stateUpdate}
+2. **Sync artifact** to \`${RUNTIME_ROOT}/runs/<activeRunId>/artifacts/${schema.artifactFile}\`
+${nextAction}
+**STOP.** Do not load the next stage skill yourself. The user will run \`/cc-next\` when ready (same session or new session).
 `;
 }
-function progressiveDisclosureBlock(stage, nextCommand) {
+function stageTransitionAutoAdvanceBlock(schema) {
+    return stageCompletionProtocol(schema);
+}
+function progressiveDisclosureBlock(stage) {
     const schema = stageSchema(stage);
     const stageSpecificRefs = {
         brainstorm: [
-            "- `.cclaw/skills/autoplan/SKILL.md` — when the user wants brainstorm→plan orchestration in one flow",
             "- `.cclaw/skills/learnings/SKILL.md` — to capture durable framing insights early"
         ],
         scope: [
-            "- `.cclaw/skills/autoplan/SKILL.md` — for coordinated premise challenge across early stages",
             "- `.cclaw/skills/learnings/SKILL.md` — to persist rejected assumptions and constraints"
         ],
         design: [
@@ -208,12 +203,9 @@ function progressiveDisclosureBlock(stage, nextCommand) {
             "- `.cclaw/skills/subagent-dev/SKILL.md` — for specialist delegation prompts by task slice",
             "- `.cclaw/skills/parallel-dispatch/SKILL.md` — for multi-agent review planning and reconciliation setup"
         ],
-        test: [
-            "- `.cclaw/skills/debugging/SKILL.md` — when RED behavior is unclear or flakes appear",
-            "- `.cclaw/skills/subagent-dev/SKILL.md` — for machine-only test-slice delegation"
-        ],
-        build: [
-            "- `.cclaw/skills/debugging/SKILL.md` — for root-cause workflow when implementation fails tests",
+        tdd: [
+            "- `.cclaw/skills/debugging/SKILL.md` — when RED behavior is unclear, flakes appear, or implementation fails tests",
+            "- `.cclaw/skills/subagent-dev/SKILL.md` — for machine-only test-slice delegation",
             "- `.cclaw/skills/performance/SKILL.md` — when implementation choices impact latency/throughput"
         ],
         review: [
@@ -237,7 +229,7 @@ function progressiveDisclosureBlock(stage, nextCommand) {
 - Meta routing and activation rules: \`.cclaw/skills/using-cclaw/SKILL.md\`
 - Session continuity and checkpoint behavior: \`.cclaw/skills/session/SKILL.md\`
 ${stageSpecificRefs[stage].join("\n")}
-- Next-stage handoff command: \`${nextCommand}\`
+- Progression command: \`/cc-next\` (reads flow-state, loads the next stage)
 `;
 }
 function verificationBlock(stage) {
@@ -282,7 +274,6 @@ export function stageSkillFolder(stage) {
 }
 function quickStartBlock(stage) {
     const schema = stageSchema(stage);
-    const nextCommand = nextCclawCommand(stage);
     const topGates = schema.requiredGates.slice(0, 3).map((g) => `\`${g.id}\``).join(", ");
     return `## Quick Start (minimum compliance)
@@ -291,12 +282,11 @@ function quickStartBlock(stage) {
 > 2. Complete every checklist step in order and write the artifact to \`.cclaw/artifacts/${schema.artifactFile}\` (canonical run copy: \`.cclaw/runs/<activeRunId>/artifacts/${schema.artifactFile}\`).
 > 3. Do not claim completion without satisfying gates: ${topGates}${schema.requiredGates.length > 3 ? ` (+${schema.requiredGates.length - 3} more)` : ""}.
 >
-> **Next command after this stage:** ${nextCommand}
+> **After this stage:** update \`flow-state.json\` and tell the user to run \`/cc-next\`.
 `;
 }
 export function stageSkillMarkdown(stage) {
     const schema = stageSchema(stage);
-    const nextCommand = nextCclawCommand(stage);
     const gateList = schema.requiredGates
         .map((g) => `- \`${g.id}\` — ${g.description}`)
         .join("\n");
@@ -382,11 +372,11 @@ ${completionStatusBlock(stage)}
 ## Verification
 ${schema.exitCriteria.map((item) => `- [ ] ${item}`).join("\n")}
-${stageTransitionAutoAdvanceBlock(schema, nextCommand)}
-${progressiveDisclosureBlock(stage, nextCommand)}
+${stageTransitionAutoAdvanceBlock(schema)}
+${progressiveDisclosureBlock(stage)}
 ${selfImprovementBlock(stage)}
 ## Handoff
-- Next command: ${nextCommand}
+- Next command: \`/cc-next\` (loads whatever stage is current in flow-state)
 - Required artifact: \`.cclaw/artifacts/${schema.artifactFile}\` (canonical: \`.cclaw/runs/<activeRunId>/artifacts/${schema.artifactFile}\`)
 - Stage stays blocked if any required gate is unsatisfied
 `;

package/dist/content/stage-schema.d.ts CHANGED Viewed

@@ -69,7 +69,7 @@ export interface StageSchema {
     artifactValidation: ArtifactValidation[];
     namedAntiPattern?: NamedAntiPattern;
     decisionRecordFormat?: string;
-    /** When true, stage skill includes wave auto-execute guidance (test/build). */
+    /** When true, stage skill includes wave auto-execute guidance (tdd). */
     waveExecutionAllowed?: boolean;
     /** Agent names that MUST be dispatched (or waived) before stage transition — derived from mandatory auto-subagent rows. */
     mandatoryDelegations: string[];

package/dist/content/stage-schema.js CHANGED Viewed

@@ -44,7 +44,7 @@ const BRAINSTORM = {
         "Write design doc — save to `.cclaw/artifacts/01-brainstorm.md`.",
         "Self-review — scan for placeholders, TBDs, contradictions, ambiguity, scope creep. Fix inline.",
         "User reviews written artifact — ask user to review before proceeding. **STOP.** Do NOT proceed until user responds.",
-        "Transition — invoke /cc-scope only after explicit user approval. **STOP.** Do NOT auto-advance to scope."
+        "Stage complete — update `flow-state.json` per the Stage Completion Protocol. Tell user to run `/cc-next` to continue to scope."
     ],
     interactionProtocol: [
         "Explore context first (files, docs, existing behavior).",
@@ -679,7 +679,7 @@ const PLAN = {
     ],
     whenNotToUse: [
         "Specification is unapproved or lacks measurable acceptance criteria",
-        "Execution is already in test/build stages with active slice evidence",
+        "Execution is already in TDD stage with active slice evidence",
         "The request is only release packaging with no task decomposition needed"
     ],
     checklist: [
@@ -689,15 +689,15 @@ const PLAN = {
         "Slice into vertical tasks — each task targets 2-5 minutes, produces one testable outcome, and touches one coherent area.",
         "Attach verification — every task has an acceptance criterion mapping and a concrete verification command.",
         "Define checkpoints — mark points where progress should be validated before continuing.",
-        "WAIT_FOR_CONFIRM — write plan artifact and explicitly pause. **STOP.** Do NOT proceed to /cc-test until user confirms."
+        "WAIT_FOR_CONFIRM — write plan artifact and explicitly pause. **STOP.** Do NOT proceed until user confirms. Then update `flow-state.json` and tell user to run `/cc-next`."
     ],
     interactionProtocol: [
         "Plan in read-only mode relative to implementation.",
         "Split work into small vertical slices (target 2-5 minute tasks).",
         "Publish explicit dependency waves with entry and exit checks for each wave.",
         "Attach verification step to every task.",
-        "Enforce WAIT_FOR_CONFIRM before moving to /cc-test. Use AskQuestion/AskUserQuestion tool: present the plan summary with options (A) Approve / (B) Revise / (C) Reject.",
-        "**STOP.** Do NOT proceed to /cc-test until user explicitly approves. Do not auto-advance."
+        "Enforce WAIT_FOR_CONFIRM: present the plan summary with options (A) Approve / (B) Revise / (C) Reject.",
+        "**STOP.** Do NOT proceed until user explicitly approves. Then update `flow-state.json` and tell user to run `/cc-next`."
     ],
     process: [
         "Build dependency graph and ordered slices.",
@@ -760,7 +760,7 @@ const PLAN = {
     ],
     policyNeedles: ["WAIT_FOR_CONFIRM", "Task Graph", "Dependency Waves", "Acceptance Mapping", "verification steps"],
     artifactFile: "05-plan.md",
-    next: "test",
+    next: "tdd",
     cognitivePatterns: [
         { name: "Vertical Slice Thinking", description: "Each task delivers one thin end-to-end slice of value. Horizontal layers (all models, then all controllers) create integration risk. Vertical slices (one feature through all layers) reduce it." },
         { name: "Two-Minute Smell Test", description: "If a competent engineer cannot understand and start a task in two minutes, the task is too large or too vague. Break it down further." },
@@ -782,202 +782,124 @@ const PLAN = {
     ]
 };
 // ---------------------------------------------------------------------------
-// TEST — TDD RED stage
+// TDD — RED → GREEN → REFACTOR cycle (merged test + build)
 // ---------------------------------------------------------------------------
-const TEST = {
-    stage: "test",
-    skillFolder: "red-first-testing",
-    skillName: "red-first-testing",
-    skillDescription: "TDD RED stage. Establish failing tests as proof before implementation changes.",
-    hardGate: "Do NOT change implementation code. This stage writes failing tests ONLY. If you find yourself editing non-test files, STOP — you have left the RED stage.",
-    purpose: "Create RED evidence tied to acceptance criteria before any implementation.",
+const TDD = {
+    stage: "tdd",
+    skillFolder: "test-driven-development",
+    skillName: "test-driven-development",
+    skillDescription: "Full TDD cycle: RED (failing tests), GREEN (minimal implementation), REFACTOR (cleanup). One plan slice at a time with strict traceability.",
+    hardGate: "Do NOT merge, ship, or skip review. Follow RED → GREEN → REFACTOR strictly for each plan slice. Do NOT write implementation code before RED tests exist. Do NOT skip the REFACTOR step.",
+    purpose: "Implement features through the TDD cycle: write failing tests, make them pass with minimal code, then refactor.",
     whenToUse: [
         "After plan confirmation",
-        "Before /cc-build",
-        "For every behavior change in scope"
+        "For every behavior change in scope",
+        "Before review stage"
     ],
     whenNotToUse: [
         "Plan approval is still pending WAIT_FOR_CONFIRM",
         "The change is docs-only and does not alter behavior",
-        "GREEN implementation has started before RED evidence"
+        "The stage intent is review/ship sign-off rather than implementation"
     ],
     checklist: [
         "Select plan slice — pick one task from the plan. Do not batch multiple tasks.",
         "Map to acceptance criterion — identify the specific spec criterion this test proves.",
-        "Write behavior-focused test — test the expected behavior, not implementation details. Name tests descriptively.",
-        "Run tests and observe failure — tests MUST fail. If they pass, either the behavior already exists or the test is wrong.",
-        "Capture failure output — copy the exact failure output as RED evidence. Record in artifact.",
+        "RED: Write behavior-focused test — test the expected behavior, not implementation details. Tests MUST fail.",
+        "RED: Capture failure output — copy the exact failure output as RED evidence. Record in artifact.",
+        "GREEN: Minimal implementation — write the smallest code change that makes the RED tests pass. No extra features.",
+        "GREEN: Run full suite — execute ALL tests, not just the ones you wrote. The full suite must be GREEN.",
+        "GREEN: Verify no regressions — if any existing test breaks, fix the regression before proceeding.",
+        "REFACTOR: Improve code quality — without changing behavior. Document what you changed and why.",
+        "Record evidence — capture RED failure, GREEN output, and REFACTOR notes in the TDD artifact.",
+        "Annotate traceability — link to plan task ID and spec criterion.",
         "Repeat for each slice — return to step 1 for the next plan slice."
     ],
     interactionProtocol: [
         "Pick one planned slice at a time.",
-        "Write behavior-focused tests before changing implementation.",
+        "Write behavior-focused tests before changing implementation (RED).",
         "Capture and store failing output as RED evidence.",
-        "Do not proceed to build without RED evidence.",
+        "Apply minimal change to satisfy RED tests (GREEN).",
+        "Run full suite, not partial checks, for GREEN validation.",
+        "Refactor without changing behavior and document rationale (REFACTOR).",
+        "Stop if regressions appear and fix before proceeding.",
         "If a test passes unexpectedly, investigate: does the behavior already exist, or is the test wrong?"
     ],
     process: [
         "Select slice and map to acceptance criterion.",
-        "Write test(s) that fail for expected reason.",
+        "Write test(s) that fail for expected reason (RED).",
         "Run tests and capture failure output.",
-        "Record RED evidence in TDD artifact.",
-        "Verify failure reason matches expected missing behavior."
-    ],
-    requiredGates: [
-        { id: "tdd_red_test_written", description: "Failing tests exist before implementation changes." },
-        { id: "tdd_red_failure_captured", description: "Failure output is captured as evidence." },
-        { id: "tdd_trace_to_acceptance", description: "RED tests trace to explicit acceptance criteria." },
-        { id: "tdd_red_failure_reason_verified", description: "Failure is for the expected reason, not an unrelated error." }
-    ],
-    requiredEvidence: [
-        "Artifact updated at `.cclaw/artifacts/06-tdd.md` RED section.",
-        "Failing command output captured.",
-        "Acceptance mapping documented.",
-        "Failure reason analysis recorded."
-    ],
-    inputs: ["approved plan slice", "spec acceptance criterion", "test harness configuration"],
-    requiredContext: ["plan artifact", "spec artifact", "existing test patterns"],
-    outputs: ["failing test set", "captured RED evidence", "ready signal for GREEN stage"],
-    blockers: [
-        "tests pass before behavior change",
-        "failure reason does not match expected behavior",
-        "no evidence recorded"
-    ],
-    exitCriteria: [
-        "RED evidence exists and is traceable",
-        "required gates marked satisfied",
-        "no implementation changes made in this stage",
-        "failure reason verified for each test"
-    ],
-    antiPatterns: [
-        "Writing code before failing test",
-        "Asserting implementation details instead of behavior",
-        "Skipping evidence capture",
-        "Testing multiple slices without recording evidence for each"
-    ],
-    rationalizations: [
-        { claim: "This change is obvious, tests can be added later.", reality: "Without RED proof, regressions hide behind optimistic assumptions." },
-        { claim: "A passing baseline is enough to continue.", reality: "Baseline pass does not prove new behavior requirements." },
-        { claim: "One broad integration test is enough.", reality: "Slice-level RED tests are required for precise failure signal." }
-    ],
-    redFlags: [
-        "No failing test output",
-        "No acceptance linkage",
-        "Implementation edits appear before RED evidence",
-        "Test passes without behavior change"
-    ],
-    policyNeedles: ["RED", "failing test", "acceptance criteria", "no implementation changes"],
-    artifactFile: "06-tdd.md",
-    next: "build",
-    cognitivePatterns: [
-        { name: "Behavior Over Implementation", description: "Tests describe WHAT the system does, not HOW. Test the observable behavior from outside the unit. If you need to test internals, the design needs work." },
-        { name: "Failure-First Thinking", description: "The failing test IS the specification. Until you see the right failure, you do not understand what you are building. Wrong failures are information." },
-        { name: "Proof Before Claim", description: "Do not claim a feature works without evidence. RED output is proof of what is missing. GREEN output is proof it was added. Both are required." }
-    ],
-    reviewSections: [],
-    completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
-    crossStageTrace: {
-        readsFrom: [".cclaw/artifacts/05-plan.md", ".cclaw/artifacts/04-spec.md"],
-        writesTo: [".cclaw/artifacts/06-tdd.md"],
-        traceabilityRule: "Every RED test traces to a plan task. Every plan task traces to a spec criterion. Evidence chain: spec -> plan -> RED test -> failure output."
-    },
-    artifactValidation: [
-        { section: "RED Evidence", required: true, validationRule: "Failing test output captured per slice." },
-        { section: "Acceptance Mapping", required: true, validationRule: "Each RED test links to a plan task and spec criterion." },
-        { section: "Failure Analysis", required: true, validationRule: "Failure reason matches expected missing behavior." }
-    ],
-    waveExecutionAllowed: true
-};
-// ---------------------------------------------------------------------------
-// BUILD — TDD GREEN + REFACTOR stage
-// ---------------------------------------------------------------------------
-const BUILD = {
-    stage: "build",
-    skillFolder: "incremental-implementation",
-    skillName: "incremental-implementation",
-    skillDescription: "TDD GREEN and REFACTOR stage with strict traceability to plan slices.",
-    hardGate: "Do NOT merge, ship, or skip review. This stage produces GREEN and REFACTOR evidence for one plan slice at a time. If you are touching files unrelated to the current slice, STOP.",
-    purpose: "Implement minimal passing change, run full suite GREEN, then refactor safely.",
-    whenToUse: [
-        "After RED evidence is complete",
-        "For one accepted plan slice at a time",
-        "Before review stage"
-    ],
-    whenNotToUse: [
-        "RED evidence is missing or failure reason is unverified",
-        "Multiple unrelated slices are being merged into one build pass",
-        "The stage intent is review/ship sign-off rather than implementation"
-    ],
-    checklist: [
-        "Minimal GREEN change — implement the smallest code change that makes the RED tests pass. No extra features.",
-        "Run full suite — execute ALL tests, not just the ones you wrote. The full suite must be GREEN.",
-        "Verify no regressions — if any existing test breaks, fix the regression before proceeding.",
-        "Refactor pass — improve code quality without changing behavior. Document what you changed and why.",
-        "Record evidence — capture GREEN output and REFACTOR notes in the TDD artifact.",
-        "Annotate traceability — link the implementation to the plan task ID and spec criterion."
-    ],
-    interactionProtocol: [
-        "Apply minimal change to satisfy RED tests.",
-        "Run full suite, not partial checks, for GREEN validation.",
-        "Refactor without changing behavior and document rationale.",
-        "Stop if regressions appear and return to prior step.",
-        "Record traceability to plan slice explicitly."
-    ],
-    process: [
         "Implement smallest change needed for GREEN.",
         "Run full tests and build checks.",
         "Perform refactor pass preserving behavior.",
-        "Record GREEN and REFACTOR evidence in artifact.",
+        "Record RED, GREEN, and REFACTOR evidence in artifact.",
         "Annotate traceability to plan task and spec criterion."
     ],
     requiredGates: [
-        { id: "build_minimal_change_applied", description: "Implementation matches a single plan slice." },
+        { id: "tdd_red_test_written", description: "Failing tests exist before implementation changes." },
+        { id: "tdd_red_failure_captured", description: "Failure output is captured as evidence." },
+        { id: "tdd_trace_to_acceptance", description: "RED tests trace to explicit acceptance criteria." },
+        { id: "tdd_red_failure_reason_verified", description: "Failure is for the expected reason, not an unrelated error." },
         { id: "tdd_green_full_suite", description: "Full relevant suite passes in GREEN state." },
         { id: "tdd_refactor_completed", description: "Refactor pass completed with behavior preservation verified." },
         { id: "tdd_refactor_notes_written", description: "Refactor decisions and outcomes are documented." },
-        { id: "build_traceable_to_plan", description: "Change traceability to plan slice is explicit." }
+        { id: "tdd_traceable_to_plan", description: "Change traceability to plan slice is explicit." }
     ],
     requiredEvidence: [
-        "Artifact `.cclaw/artifacts/06-tdd.md` includes GREEN and REFACTOR sections.",
-        "Full test/build output recorded.",
-        "Traceability to task identifier is documented.",
-        "Refactor rationale captured."
-    ],
-    inputs: ["RED evidence", "approved plan slice", "coding standards and constraints"],
-    requiredContext: ["tdd artifact", "plan artifact", "spec acceptance criteria"],
-    outputs: ["passing implementation", "refactor evidence", "review-ready change set"],
+        "Artifact updated at `.cclaw/artifacts/06-tdd.md` with RED, GREEN, and REFACTOR sections.",
+        "Failing command output captured (RED).",
+        "Full test/build output recorded (GREEN).",
+        "Acceptance mapping documented.",
+        "Failure reason analysis recorded.",
+        "Refactor rationale captured.",
+        "Traceability to task identifier is documented."
+    ],
+    inputs: ["approved plan slice", "spec acceptance criterion", "test harness configuration", "coding standards and constraints"],
+    requiredContext: ["plan artifact", "spec artifact", "existing test patterns"],
+    outputs: ["failing test set", "passing implementation", "refactor evidence", "review-ready change set"],
     blockers: [
-        "no RED evidence",
+        "tests pass before behavior change (RED failure missing)",
         "full suite not green",
-        "behavior changed during refactor"
+        "behavior changed during refactor",
+        "no evidence recorded"
     ],
     exitCriteria: [
-        "GREEN evidence captured",
+        "RED evidence exists and is traceable",
+        "GREEN evidence captured with full suite pass",
         "REFACTOR evidence captured",
         "required gates marked satisfied",
         "traceability annotated"
     ],
     antiPatterns: [
+        "Writing code before failing test",
+        "Asserting implementation details instead of behavior",
         "Big-bang implementation across multiple slices",
         "Partial test runs presented as GREEN",
+        "Skipping evidence capture",
         "Undocumented refactor changes",
         "Adding features beyond what RED tests require"
     ],
     rationalizations: [
+        { claim: "This change is obvious, tests can be added later.", reality: "Without RED proof, regressions hide behind optimistic assumptions." },
+        { claim: "A passing baseline is enough to continue.", reality: "Baseline pass does not prove new behavior requirements." },
+        { claim: "One broad integration test is enough.", reality: "Slice-level RED tests are required for precise failure signal." },
         { claim: "Refactor can be skipped for speed.", reality: "Skipping refactor accumulates debt and weakens maintainability." },
         { claim: "Only changed tests need to pass.", reality: "Full-suite checks are needed to detect regressions." },
         { claim: "Traceability is implied by commit diff.", reality: "Explicit mapping avoids ambiguity in review and rollback." }
     ],
     redFlags: [
+        "No failing test output (RED missing)",
+        "Implementation edits appear before RED evidence",
         "No full-suite GREEN evidence",
         "No refactor notes",
         "Multiple tasks implemented in one pass without justification",
         "Files changed outside current slice scope"
     ],
-    policyNeedles: ["GREEN", "full test suite", "REFACTOR", "traceable to plan slice"],
+    policyNeedles: ["RED", "GREEN", "REFACTOR", "failing test", "full test suite", "acceptance criteria", "traceable to plan slice"],
     artifactFile: "06-tdd.md",
     next: "review",
     cognitivePatterns: [
+        { name: "Behavior Over Implementation", description: "Tests describe WHAT the system does, not HOW. Test the observable behavior from outside the unit. If you need to test internals, the design needs work." },
+        { name: "Failure-First Thinking", description: "The failing test IS the specification. Until you see the right failure, you do not understand what you are building. Wrong failures are information." },
         { name: "Minimal Viable Change", description: "The best implementation is the smallest one that passes all RED tests. Every extra line is risk. Resist the urge to 'improve while you are here.'" },
         { name: "Regression Paranoia", description: "Assume every change breaks something until the full suite proves otherwise. Partial test runs are lies of omission." },
         { name: "Refactor-as-Hygiene", description: "Refactoring is not optional cleanup — it is the third leg of TDD. GREEN without REFACTOR accumulates mess. REFACTOR without GREEN breaks things." }
@@ -985,11 +907,14 @@ const BUILD = {
     reviewSections: [],
     completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
     crossStageTrace: {
-        readsFrom: [".cclaw/artifacts/06-tdd.md", ".cclaw/artifacts/05-plan.md"],
+        readsFrom: [".cclaw/artifacts/05-plan.md", ".cclaw/artifacts/04-spec.md"],
         writesTo: [".cclaw/artifacts/06-tdd.md"],
-        traceabilityRule: "Every GREEN change traces to a RED test. Every RED test traces to a plan task. Evidence chain must be unbroken."
+        traceabilityRule: "Every RED test traces to a plan task. Every GREEN change traces to a RED test. Every plan task traces to a spec criterion. Evidence chain must be unbroken."
     },
     artifactValidation: [
+        { section: "RED Evidence", required: true, validationRule: "Failing test output captured per slice." },
+        { section: "Acceptance Mapping", required: true, validationRule: "Each RED test links to a plan task and spec criterion." },
+        { section: "Failure Analysis", required: true, validationRule: "Failure reason matches expected missing behavior." },
         { section: "GREEN Evidence", required: true, validationRule: "Full suite pass output captured." },
         { section: "REFACTOR Notes", required: true, validationRule: "What changed, why, behavior preservation confirmed." },
         { section: "Traceability", required: true, validationRule: "Plan task ID and spec criterion linked." }
@@ -1007,13 +932,13 @@ const REVIEW = {
     hardGate: "Do NOT ship, merge, or release until both review layers complete with an explicit verdict. No exceptions for urgency. Critical blockers MUST be resolved before handoff.",
     purpose: "Validate that implementation matches spec and meets quality/security/performance bar through structured two-layer review.",
     whenToUse: [
-        "After build stage completes",
+        "After TDD stage completes",
         "Before any ship action",
         "When release risk must be assessed explicitly"
     ],
     whenNotToUse: [
         "There is no implementation diff to review",
-        "Build stage evidence is missing or stale",
+        "TDD stage evidence is missing or stale",
         "The goal is direct release execution without layered quality checks"
     ],
     checklist: [
@@ -1364,8 +1289,7 @@ const STAGE_SCHEMA_MAP = {
     design: DESIGN,
     spec: SPEC,
     plan: PLAN,
-    test: TEST,
-    build: BUILD,
+    tdd: TDD,
     review: REVIEW,
     ship: SHIP
 };
@@ -1422,21 +1346,12 @@ const STAGE_AUTO_SUBAGENT_DISPATCH = {
             requiresUserGate: false
         }
     ],
-    test: [
-        {
-            agent: "test-author",
-            mode: "mandatory",
-            when: "Always during RED stage.",
-            purpose: "Guarantee failing tests are created before implementation.",
-            requiresUserGate: false
-        }
-    ],
-    build: [
+    tdd: [
         {
             agent: "test-author",
             mode: "mandatory",
-            when: "Always during GREEN + REFACTOR.",
-            purpose: "Keep implementation traceable to RED evidence and full-suite verification.",
+            when: "Always during TDD cycle (RED → GREEN → REFACTOR).",
+            purpose: "Guarantee failing tests, traceable implementation, and full-suite verification.",
             requiresUserGate: false
         },
         {