cclaw-cli 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,240 @@
1
+ /**
2
+ * Per-harness tool-mapping reference files.
3
+ *
4
+ * Addresses A.1#4: the four supported harnesses (claude, cursor, opencode, codex)
5
+ * expose different primitive names for the same capabilities (ask-user,
6
+ * delegate/Task, web fetch, file edit, code execution, ...). cclaw's stage skills
7
+ * need to pick the right name at runtime without bloating every stage with per-harness
8
+ * if/else ladders.
9
+ *
10
+ * Each file below is short (one table per capability), authoritative, and materialised
11
+ * at `.cclaw/references/harness-tools/<harness>.md`. Stage skills and the meta-skill
12
+ * cite the folder instead of duplicating the mappings inline.
13
+ *
14
+ * When a new harness is added (or an existing one renames a tool), update the
15
+ * corresponding entry here — do NOT scatter tool names across skill text.
16
+ */
17
+ export const HARNESS_TOOL_REFS_DIR = "references/harness-tools";
18
+ const CLAUDE_TOOLS_MD = `---
19
+ harness: claude
20
+ name: Claude Code tool map
21
+ description: "Canonical mapping of cclaw capability names → Claude Code tool names. Cited by stage skills; do not duplicate in per-stage text."
22
+ ---
23
+
24
+ # Claude Code — Tool Map
25
+
26
+ Use this file as the single source of truth for which Claude Code tool to call when a cclaw skill references a generic capability.
27
+
28
+ ## Core capabilities
29
+
30
+ | cclaw capability | Claude Code tool | Notes |
31
+ |---|---|---|
32
+ | Ask user a structured question | \`AskUserQuestion\` | Max 4 options; lettered labels ≤12 chars. Fall back to plain-text lettered list on schema error. |
33
+ | Dispatch a subagent (read-only or write) | \`Task\` with \`subagent_type\` | \`explore\` = read-only; \`generalPurpose\` = read-write. Background via \`run_in_background: true\`. |
34
+ | Read file | \`Read\` | Prefer this over \`cat\` / \`head\` / \`tail\`. |
35
+ | Edit file | \`StrReplace\` (exact match) or \`Write\` (overwrite) | Always \`Read\` before editing; avoid \`sed\`/\`awk\` unless asked. |
36
+ | Create file | \`Write\` | Reject if the task can be solved by editing an existing file. |
37
+ | Search file contents | \`Grep\` (ripgrep-backed) | Use \`output_mode: files_with_matches\` for file lists. |
38
+ | Find files by name / glob | \`Glob\` | Pattern matches mtime-sorted. |
39
+ | Shell command | \`Shell\` | Background long-running jobs with \`block_until_ms: 0\`; poll with \`Await\`. |
40
+ | Fetch URL | \`WebFetch\` | Returns markdown. No auth, no binaries. |
41
+ | Web search | \`WebSearch\` | Use for docs, real-time info, version lookups. |
42
+ | Semantic code search | \`SemanticSearch\` | One directory per call; whole-repo via \`[]\`. |
43
+ | Todo tracking | \`TodoWrite\` | Use \`merge: true\` to update; keep one task \`in_progress\`. |
44
+ | Ask tool (multi-question) | \`AskQuestion\` (Cursor-only, unavailable in Claude) | NOT available in Claude — use \`AskUserQuestion\` instead. |
45
+ | MCP tool call | \`CallMcpTool\` | Always read the tool's schema descriptor first. |
46
+
47
+ ## Decision-protocol mapping
48
+
49
+ When a stage skill says "ask the user a structured question", in Claude Code that means:
50
+
51
+ \`\`\`
52
+ AskUserQuestion({
53
+ questions: [{
54
+ id: "...",
55
+ prompt: "One-sentence decision, plain English",
56
+ options: [
57
+ { id: "a", label: "Short label" }, // ≤12 chars
58
+ { id: "b", label: "Alt label" },
59
+ { id: "c", label: "Recommended" }
60
+ ]
61
+ }]
62
+ })
63
+ \`\`\`
64
+
65
+ One question per call. Never batch.
66
+
67
+ ## Escalation / fall-back
68
+
69
+ If a tool returns a schema error twice in a row (see the meta-skill's Error / Retry Budget), switch to plain-text equivalents:
70
+
71
+ - \`AskUserQuestion\` → write a numbered list in the response, wait for reply.
72
+ - \`Task\` (dispatch) → inline the work in the current turn.
73
+ - \`WebFetch\` → ask the user for the URL's content.
74
+ `;
75
+ const CURSOR_TOOLS_MD = `---
76
+ harness: cursor
77
+ name: Cursor tool map
78
+ description: "Canonical mapping of cclaw capability names → Cursor agent tool names. Cited by stage skills; do not duplicate in per-stage text."
79
+ ---
80
+
81
+ # Cursor — Tool Map
82
+
83
+ Use this file as the single source of truth for which Cursor agent tool to call when a cclaw skill references a generic capability.
84
+
85
+ ## Core capabilities
86
+
87
+ | cclaw capability | Cursor tool | Notes |
88
+ |---|---|---|
89
+ | Ask user a structured question | \`AskQuestion\` | \`questions\` is an array; each question has \`id\`, \`prompt\`, \`options\`, optional \`allow_multiple\`. |
90
+ | Dispatch a subagent | \`Task\` with \`subagent_type\` | Available types: \`generalPurpose\`, \`explore\` (readonly), \`shell\`, \`browser-use\`, \`best-of-n-runner\`. |
91
+ | Read file | \`Read\` | Line-numbered output; avoid \`cat\` / \`head\` / \`tail\`. |
92
+ | Edit file | \`StrReplace\` | Unique \`old_string\` required; use \`replace_all: true\` for bulk renames. |
93
+ | Create file | \`Write\` | Prefer editing existing files. |
94
+ | Search file contents | \`Grep\` (ripgrep-backed) | Output modes: \`content\`, \`files_with_matches\`, \`count\`. |
95
+ | Find files by name / glob | \`Glob\` | Auto-prepends \`**/\` when pattern does not start with it. |
96
+ | Shell command | \`Shell\` | Long-running jobs go to background via \`block_until_ms: 0\`; poll with \`Await\`. |
97
+ | Fetch URL | \`WebFetch\` | Markdown output. |
98
+ | Web search | \`WebSearch\` | Use for real-time info, framework docs, news. |
99
+ | Semantic code search | \`SemanticSearch\` | Prefer for exploratory "how does X work?" queries. |
100
+ | Todo tracking | \`TodoWrite\` | Supports \`merge: true\` for partial updates. |
101
+ | Generate image | \`GenerateImage\` | Only on explicit user request. |
102
+ | Ask structured questions (Claude-style) | \`AskUserQuestion\` | NOT available in Cursor — use \`AskQuestion\`. |
103
+ | MCP tool call | \`CallMcpTool\` | Cursor exposes MCP tools via this wrapper; read the descriptor first. |
104
+ | Jupyter notebook edit | \`EditNotebook\` | Use for \`.ipynb\` only; cell-granular edits. |
105
+ | Mode switching | \`SwitchMode\` | Propose plan/agent mode changes when task character shifts. |
106
+
107
+ ## Decision-protocol mapping
108
+
109
+ In Cursor, structured asks look like:
110
+
111
+ \`\`\`
112
+ AskQuestion({
113
+ questions: [{
114
+ id: "...",
115
+ prompt: "One-sentence decision",
116
+ options: [
117
+ { id: "a", label: "Option A" },
118
+ { id: "b", label: "Option B" }
119
+ ]
120
+ }]
121
+ })
122
+ \`\`\`
123
+
124
+ ## Escalation / fall-back
125
+
126
+ On repeated tool errors, fall back to plain-text equivalents just like Claude — see the meta-skill's Error / Retry Budget.
127
+ `;
128
+ const OPENCODE_TOOLS_MD = `---
129
+ harness: opencode
130
+ name: OpenCode tool map
131
+ description: "Canonical mapping of cclaw capability names → OpenCode primitives. Cited by stage skills; do not duplicate in per-stage text."
132
+ ---
133
+
134
+ # OpenCode — Tool Map
135
+
136
+ OpenCode exposes a leaner tool surface than Claude Code / Cursor. When a cclaw skill describes a capability that OpenCode lacks, fall back to the plain-text equivalent listed below.
137
+
138
+ ## Core capabilities
139
+
140
+ | cclaw capability | OpenCode primitive | Notes |
141
+ |---|---|---|
142
+ | Ask user a structured question | **Not available as a tool.** | Emit a plain-text numbered list: \`A) ... B) ... C) (recommended) ...\`. Wait for the user's letter. |
143
+ | Dispatch a subagent | **Not available as a tool.** | Inline the work in the current turn, or split across multiple turns with the user driving. |
144
+ | Read file | file-read primitive | Same role as \`Read\`. |
145
+ | Edit file | file-edit primitive | Same role as \`StrReplace\`; confirm diff before writing. |
146
+ | Create file | file-write primitive | Prefer editing existing files. |
147
+ | Search file contents | \`rg\` via shell | Cite \`rg\` output verbatim as evidence when a skill requires a grep result. |
148
+ | Find files by name / glob | \`fd\` or \`find\` via shell | Capture the command + output. |
149
+ | Shell command | shell primitive | Long-running jobs require explicit background + polling — check the OpenCode docs for \`&\` semantics. |
150
+ | Fetch URL | \`curl\` via shell | No markdown conversion; extract manually. |
151
+ | Web search | **Not available.** | Ask the user to paste docs or provide a URL, then fetch via shell. |
152
+ | Todo tracking | **Not available as a tool.** | Maintain a \`### TODO\` block inline in your response; keep one item in progress. |
153
+ | MCP tool call | Depends on runtime config. | If MCP is enabled, use the documented invocation; otherwise treat as unavailable. |
154
+
155
+ ## Decision-protocol mapping
156
+
157
+ \`\`\`
158
+ Decision: <one sentence>.
159
+
160
+ A) <label> — <trade-off>
161
+ B) <label> — <trade-off>
162
+ C) <label> — <trade-off> (recommended, because <one-line reason>)
163
+
164
+ Please reply with the letter.
165
+ \`\`\`
166
+
167
+ ## Escalation / fall-back
168
+
169
+ Because OpenCode lacks native ask-user and dispatch tools, more of cclaw's protocols degrade to plain text. This is expected — the flow gates and artifacts are identical; only the delivery channel changes.
170
+ `;
171
+ const CODEX_TOOLS_MD = `---
172
+ harness: codex
173
+ name: Codex tool map
174
+ description: "Canonical mapping of cclaw capability names → Codex CLI primitives. Cited by stage skills; do not duplicate in per-stage text."
175
+ ---
176
+
177
+ # Codex — Tool Map
178
+
179
+ Codex (OpenAI Codex CLI) exposes roughly the same core surface as OpenCode: file I/O, shell, no native ask-user, no dispatch. Fall back to plain text for anything else.
180
+
181
+ ## Core capabilities
182
+
183
+ | cclaw capability | Codex primitive | Notes |
184
+ |---|---|---|
185
+ | Ask user a structured question | **Not available as a tool.** | Emit a plain-text lettered list; wait for the user's reply. |
186
+ | Dispatch a subagent | **Not available as a tool.** | Inline the work; split turns if needed. |
187
+ | Read file | \`read\` / \`open\` primitive | Same role as \`Read\`. |
188
+ | Edit file | \`edit\` / \`patch\` primitive | Same role as \`StrReplace\`. |
189
+ | Create file | \`write\` primitive | Prefer editing existing files. |
190
+ | Search file contents | \`rg\` via shell | Capture command + output verbatim. |
191
+ | Find files by name / glob | \`fd\` / \`find\` / \`ls\` via shell | Capture command + output. |
192
+ | Shell command | shell primitive | Codex CLI may restrict some binaries by default — check the effective permissions. |
193
+ | Fetch URL | \`curl\` via shell | Extract markdown manually. |
194
+ | Web search | **Not available.** | Ask user for docs / URL. |
195
+ | Todo tracking | **Not available as a tool.** | Keep an inline \`### TODO\` section; update it as you progress. |
196
+ | MCP tool call | Depends on runtime config. | If MCP is wired, cite the descriptor; otherwise treat as unavailable. |
197
+
198
+ ## Decision-protocol mapping
199
+
200
+ \`\`\`
201
+ Decision: <one sentence>.
202
+
203
+ A) <label> — <trade-off>
204
+ B) <label> — <trade-off> (recommended, because <reason>)
205
+ C) <label> — <trade-off>
206
+
207
+ Please reply with the letter.
208
+ \`\`\`
209
+
210
+ ## Escalation / fall-back
211
+
212
+ Treat missing tools as "plain-text required", not "skip the step". The gate still has to pass; only the channel changes.
213
+ `;
214
+ const HARNESS_TOOL_REFS = {
215
+ claude: CLAUDE_TOOLS_MD,
216
+ cursor: CURSOR_TOOLS_MD,
217
+ opencode: OPENCODE_TOOLS_MD,
218
+ codex: CODEX_TOOLS_MD
219
+ };
220
+ export function harnessToolRefMarkdown(harness) {
221
+ return HARNESS_TOOL_REFS[harness];
222
+ }
223
+ export const HARNESS_TOOL_REFS_INDEX_MD = `---
224
+ name: Harness tool maps
225
+ description: "Index file. One reference per supported harness — cite the per-harness file instead of hardcoding tool names in stage skills."
226
+ ---
227
+
228
+ # Harness Tool Maps
229
+
230
+ cclaw supports four harnesses; each exposes different primitive names for the same capabilities. Stage skills and utility skills cite the file matching the currently active harness and fall back to plain-text equivalents for capabilities that the harness lacks.
231
+
232
+ | Harness | File | Notes |
233
+ |---|---|---|
234
+ | Claude Code | \`.cclaw/${HARNESS_TOOL_REFS_DIR}/claude.md\` | Richest tool surface (AskUserQuestion, Task, WebFetch, WebSearch, MCP, …). |
235
+ | Cursor | \`.cclaw/${HARNESS_TOOL_REFS_DIR}/cursor.md\` | Near-parity with Claude; uses \`AskQuestion\` instead of \`AskUserQuestion\`. |
236
+ | OpenCode | \`.cclaw/${HARNESS_TOOL_REFS_DIR}/opencode.md\` | No native ask-user / dispatch; more plain-text fallbacks. |
237
+ | Codex | \`.cclaw/${HARNESS_TOOL_REFS_DIR}/codex.md\` | No native ask-user / dispatch; shell + file I/O only by default. |
238
+
239
+ When a new harness is added or an existing one renames a tool, update the corresponding file (and this index) — do NOT scatter tool names across skill text.
240
+ `;
@@ -309,14 +309,60 @@ if [ -f "$META_SKILL" ]; then
309
309
  META_CONTENT=$(cat "$META_SKILL" 2>/dev/null || echo "")
310
310
  fi
311
311
 
312
- # --- Load knowledge snapshot (canonical JSONL tail) ---
312
+ # --- Load knowledge snapshot (canonical JSONL tail + total count) ---
313
313
  KNOWLEDGE_SUMMARY=""
314
+ LEARNINGS_COUNT=0
314
315
  if [ -f "$KNOWLEDGE_FILE" ] && [ -s "$KNOWLEDGE_FILE" ]; then
315
316
  KNOWLEDGE_SUMMARY=$(tail -n 30 "$KNOWLEDGE_FILE" 2>/dev/null || echo "")
317
+ LEARNINGS_COUNT=$(grep -c '^{' "$KNOWLEDGE_FILE" 2>/dev/null || echo "0")
318
+ fi
319
+
320
+ # --- Installed cclaw-cli version vs. project's recorded version (one-block
321
+ # upgrade-check, gstack-style). Purely informational — we never block. ---
322
+ VERSION_NOTE=""
323
+ INSTALLED_VERSION=""
324
+ PROJECT_VERSION=""
325
+ # Version lookup is skipped by default — spawning the cli on every session
326
+ # start adds ~10s on Node-based installs. Opt-in via CCLAW_HOOK_VERSION_CHECK=1.
327
+ if [ "\${CCLAW_HOOK_VERSION_CHECK:-0}" = "1" ] && command -v cclaw >/dev/null 2>&1; then
328
+ INSTALLED_VERSION=$(cclaw --version 2>/dev/null | head -1 | awk '{print $NF}' || echo "")
329
+ fi
330
+ CONFIG_FILE="$ROOT/${RUNTIME_ROOT}/config.json"
331
+ if [ -f "$CONFIG_FILE" ]; then
332
+ if command -v jq >/dev/null 2>&1; then
333
+ PROJECT_VERSION=$(jq -r '.version // ""' "$CONFIG_FILE" 2>/dev/null || echo "")
334
+ else
335
+ PROJECT_VERSION=$(grep -o '"version"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" 2>/dev/null | head -1 | sed 's/.*"\\([^"]*\\)"$/\\1/' || echo "")
336
+ fi
337
+ fi
338
+ if [ -n "$INSTALLED_VERSION" ] && [ -n "$PROJECT_VERSION" ] && [ "$INSTALLED_VERSION" != "$PROJECT_VERSION" ]; then
339
+ VERSION_NOTE="cclaw-cli $INSTALLED_VERSION installed; project recorded $PROJECT_VERSION — run 'cclaw sync' to realign."
340
+ fi
341
+
342
+ # --- Routing-check: AGENTS.md / CLAUDE.md must contain the cclaw block. ---
343
+ ROUTING_NOTE=""
344
+ ROUTING_MISSING=""
345
+ for routing_file in "$ROOT/AGENTS.md" "$ROOT/CLAUDE.md"; do
346
+ if [ -f "$routing_file" ]; then
347
+ if ! grep -q "cclaw-start" "$routing_file" 2>/dev/null; then
348
+ ROUTING_MISSING="$ROUTING_MISSING $(basename "$routing_file")"
349
+ fi
350
+ fi
351
+ done
352
+ if [ -n "$ROUTING_MISSING" ]; then
353
+ ROUTING_NOTE="Routing block missing from:\${ROUTING_MISSING}. Run 'cclaw sync' to re-inject."
316
354
  fi
317
355
 
318
356
  # --- Build context message ---
319
- CTX="cclaw loaded. Flow: stage=$STAGE ($COMPLETED/8 completed, run=$ACTIVE_RUN). Active artifacts: ${RUNTIME_ROOT}/artifacts/"
357
+ CTX="cclaw loaded. Flow: stage=$STAGE ($COMPLETED/8 completed, run=$ACTIVE_RUN). Active artifacts: ${RUNTIME_ROOT}/artifacts/. Learnings: $LEARNINGS_COUNT entries."
358
+ if [ -n "$VERSION_NOTE" ]; then
359
+ CTX="$CTX
360
+ $VERSION_NOTE"
361
+ fi
362
+ if [ -n "$ROUTING_NOTE" ]; then
363
+ CTX="$CTX
364
+ $ROUTING_NOTE"
365
+ fi
320
366
  if [ -n "$CONTEXT_MODE_NOTE" ]; then
321
367
  CTX="$CTX
322
368
  $CONTEXT_MODE_NOTE"
@@ -209,10 +209,7 @@ When a stage requires user input (approval, choice, direction):
209
209
  1. **State the decision** in one sentence.
210
210
  2. **Present options** as labeled choices (A, B, C...), one-line each, with trade-off / consequence.
211
211
  3. **Mark one option \`(recommended)\`** with a one-line reason. Do NOT use numeric "Completeness" rubrics — pick the option that best closes the decision with the smallest blast radius, lowest irreversible risk, and clearest evidence.
212
- 4. **Use the harness ask-user tool when available:**
213
- - Claude Code: \`AskUserQuestion\`
214
- - Cursor: \`AskQuestion\` (options array)
215
- - Codex/OpenCode: numbered list in plain text (no native ask tool).
212
+ 4. **Use the harness ask-user tool when available.** For the exact tool name and fallback, consult \`.cclaw/references/harness-tools/<harness>.md\` (one file per supported harness — claude, cursor, opencode, codex). Summary: Claude Code → \`AskUserQuestion\`; Cursor → \`AskQuestion\`; OpenCode / Codex → plain-text lettered list.
216
213
  5. **Wait for response.** Do not proceed until the user picks.
217
214
  6. **Commit to the choice.** Once decided, do not re-argue.
218
215
 
@@ -236,6 +233,43 @@ When a stage requires user input (approval, choice, direction):
236
233
 
237
234
  If the same approach fails three times in a row (same verification command, same review finding, same tool invocation), STOP and escalate: summarize what you tried, what evidence you have, what hypothesis you are now testing, and ask the user how to proceed. Do not invent a new angle silently on the fourth attempt.
238
235
 
236
+ ### Shared Stage Completion Protocol
237
+
238
+ Every stage skill ends with a completion block parameterized by four values: \`next\` (next stage or \`done\`), \`gates\` (gate IDs to mark passed), \`artifact\` (file under \`.cclaw/artifacts/\`), and \`mandatory\` (agents required by delegation enforcement). Stage skills print their **Completion Parameters** and then defer to this procedure — do NOT re-print the full procedure per stage.
239
+
240
+ When all required gates are satisfied and the artifact is written, execute **in this exact order**:
241
+
242
+ 0. **Delegation pre-flight** (BLOCKING, only when \`mandatory\` is non-empty).
243
+ - For each agent in \`mandatory\`: confirm it was dispatched (via Task/delegate) and completed, OR record an explicit waiver with reason in \`.cclaw/state/delegation-log.json\`.
244
+ - Write a JSON entry per agent: \`{ "stage": "<stage>", "agent": "<name>", "mode": "mandatory", "status": "completed"|"waived", "waiverReason": "<if waived>", "ts": "<ISO timestamp>" }\`.
245
+ - If the harness does not support delegation, record status \`"waived"\` with reason \`"harness_limitation"\`.
246
+ - **Do NOT proceed to step 1 until every mandatory agent has an entry in the delegation log.**
247
+ 1. **Update \`.cclaw/state/flow-state.json\`:**
248
+ - Set \`currentStage\` to \`next\` (or leave unchanged when \`next === "done"\`).
249
+ - Add the current stage to \`completedStages\`.
250
+ - Move every gate ID in \`gates\` into \`stageGateCatalog.<stage>.passed\`.
251
+ - Clear \`stageGateCatalog.<stage>.blocked\`.
252
+ - For each passed gate, add an entry to \`guardEvidence\`: \`"<gate_id>": "<artifact path or excerpt proving the gate>"\`. Do NOT leave \`guardEvidence\` empty.
253
+ 2. **Persist artifact** at \`.cclaw/artifacts/<artifact>\`. Do NOT manually copy into \`.cclaw/runs/\`; archival is handled by \`cclaw archive\`.
254
+ 3. **Doctor pre-flight** — run \`npx cclaw doctor\` (or the installed cclaw binary). If any check fails, resolve the issue (missing delegation entry, artifact section, gate evidence) and re-run until all checks pass. Do NOT proceed while doctor reports failures.
255
+ 4. **Tell the user** (verbatim when \`next\` is a stage; use the flow-complete variant when \`next === "done"\`):
256
+ > **Stage \`<stage>\` complete.** Next: **<next>** — <one-line next-stage description>.
257
+ >
258
+ > Run \`/cc-next\` to continue.
259
+
260
+ Flow-complete variant:
261
+ > **Flow complete.** All stages finished. The project is ready for release.
262
+
263
+ 5. **STOP.** Do not load the next stage skill yourself. The user will run \`/cc-next\` when ready (same session or new session).
264
+
265
+ ### Shared Resume Protocol
266
+
267
+ When resuming a stage in a NEW session (artifact exists but gates are not all passed in \`flow-state.json\`):
268
+
269
+ 1. Read the existing artifact and mark every gate whose evidence is already present in the artifact.
270
+ 2. For each unverified gate, ask the user to confirm ONE gate at a time. Do NOT batch multiple gate confirmations in a single message.
271
+ 3. Update \`guardEvidence\` for each confirmed gate before proceeding to the next unverified gate.
272
+
239
273
  ## </EXTREMELY-IMPORTANT>
240
274
 
241
275
  ## Invocation Preamble (per turn, non-trivial tasks)
@@ -255,6 +289,40 @@ The preamble exists to prevent silent drift from the user's ask. If the preamble
255
289
 
256
290
  Do not re-emit the preamble on every subsequent tool call — once per user turn is sufficient. If the user message changes the goal mid-execution, emit a fresh preamble before acting on the new direction.
257
291
 
292
+ ## Engineering Ethos
293
+
294
+ Three guardrails apply to every stage, every turn. Internalise them — they trump speed, cleverness, and novelty:
295
+
296
+ ### Search Before Building
297
+
298
+ Before writing new code, a new skill, a new abstraction, or a new artifact section, spend 60–120 seconds checking whether the thing already exists. Order of search:
299
+
300
+ 1. **Project artifacts** — \`.cclaw/artifacts/**\`, \`docs/**\`, root-level \`README.md\` / \`SPEC.md\` / \`DESIGN.md\`.
301
+ 2. **Project knowledge** — \`.cclaw/knowledge.jsonl\` (lessons with matching \`domain\` / \`trigger\`).
302
+ 3. **Codebase** — \`rg\` / \`Grep\` for the symbol, function, test, or comment that describes what you're about to add.
303
+ 4. **Framework/library primitives** — prefer a stdlib or framework-native affordance over a handwritten helper.
304
+ 5. **Existing skill or stage rule** — \`.cclaw/skills/**/SKILL.md\` and \`.cclaw/commands/**/*.md\`.
305
+
306
+ Only after the first four turn up nothing do you build. Every duplicate helper, redefined type, parallel-but-incompatible artifact section, or re-discovered lesson is a tax on the next five sessions. Record the negative search result (what you looked for, where, and why nothing fit) in the turn's preamble or the stage artifact so future agents don't repeat the hunt.
307
+
308
+ ### Boil the Lake (scoped minimum-sweep rule)
309
+
310
+ "Boil the lake" normally means wasteful, exhaustive work. **cclaw inverts the phrase**: within the current stage, you are expected to sweep *the defined surface exhaustively* — not to stop at the first plausible answer.
311
+
312
+ - In \`brainstorm\` / \`scope\` — enumerate every viable approach in the defined option space; name the ones you rejected and why.
313
+ - In \`design\` — trace every data-flow and failure edge across the chosen component boundary, not just the happy path.
314
+ - In \`spec\` — list every acceptance criterion for the in-scope surface; "and similar" / "etc." is banned.
315
+ - In \`tdd\` — exercise every branch / error path / boundary of the slice under test, not only the canonical case.
316
+ - In \`review\` — audit every file touched in the diff, not just the files named in the spec.
317
+
318
+ The sweep is bounded by the stage's declared surface. Expanding the surface is a Decision Protocol question, not a silent enlargement.
319
+
320
+ ### Do Less, Prove More
321
+
322
+ When in doubt between adding code / scope / artifact sections and cutting them, cut. The flow already forces you to justify each stage's output — volume is never a proxy for quality. One acceptance criterion with captured evidence beats five without; one labeled architecture diagram beats three generic boxes-and-arrows; one REFACTOR note explaining a concrete trade-off beats a paragraph of filler.
323
+
324
+ If a rule, template section, or agent feels ornamental, flag it in \`Operational Self-Improvement\` and propose removal — cclaw's invariant is that every section must pay its tokens back by preventing a specific failure mode.
325
+
258
326
  ## Operational Self-Improvement (auto-learn)
259
327
 
260
328
  cclaw treats **lived friction** as first-class knowledge. When you observe one of the triggers below during a session, append a single JSONL line to \`.cclaw/knowledge.jsonl\` via \`/cc-learn add\` (or queue it for the next \`/cc-learn\` call) — do NOT let the signal evaporate when the session ends.
@@ -1,3 +1,8 @@
1
1
  import type { FlowStage } from "../types.js";
2
+ /**
3
+ * Long-form Wave Execution walkthrough. Rendered once into
4
+ * \`.cclaw/references/stages/tdd-wave-walkthrough.md\` by the installer.
5
+ */
6
+ export declare const TDD_WAVE_WALKTHROUGH_MARKDOWN = "# TDD \u2014 Wave Execution Walkthrough\n\nDetailed RED / GREEN / REFACTOR transcript for a 3-task wave. Illustrative\nonly \u2014 do not copy the command names blindly, match them to your stack.\n\n## Wave 1 example tasks\n\n| Task ID | Description | AC | Verification |\n|---|---|---|---|\n| T-1 `[~3m]` | Add `User.emailNormalized` column | AC-1 | `npm test -- users/schema` |\n| T-2 `[~4m]` | Normalize on write in `UserRepo.save` | AC-1 | `npm test -- users/repo` |\n| T-3 `[~3m]` | Reject duplicates in `UserService.signup` | AC-2 | `npm test -- users/service` |\n\n## Execution transcript\n\n### T-1 \u2014 RED\n\n> Run: `npm test -- users/schema` \u2192 **FAIL** (missing column: `emailNormalized`). Captured the failure stack as RED evidence. No production code touched yet.\n\n### T-1 \u2014 GREEN\n\n> Added the column in the schema module. Re-ran `npm test -- users/schema` \u2192 **PASS**. Ran the full suite `npm test` \u2192 **PASS**. Captured both outputs as GREEN evidence.\n\n### T-1 \u2014 REFACTOR\n\n> Extracted the column definition into a shared `NormalizedEmail` type used by T-2/T-3. Re-ran `npm test` \u2192 **PASS**. Captured REFACTOR note: \"Extracted NormalizedEmail type to keep T-2/T-3 DRY; zero behavior change, all tests still green.\"\n\n### T-2 \u2014 RED / GREEN / REFACTOR\n\nWrite the repo test that expects normalised writes, watch it fail (RED), implement normalisation inside `UserRepo.save` only (GREEN), then refactor the normaliser out of the repo into a helper shared with T-3 (REFACTOR).\n\n### T-3 \u2014 RED / GREEN / REFACTOR\n\nWrite the service-level duplicate test that expects a rejection, watch it fail (RED), add the duplicate check in `UserService.signup` (GREEN), refactor the error message into a named constant (REFACTOR).\n\n## Wave gate check\n\nAfter T-3 REFACTOR, before declaring Wave 1 done:\n\n1. Run the full suite (`npm test`) one final time \u2192 **PASS** captured as wave-exit evidence.\n2. Verify the TDD artifact contains RED, GREEN, and REFACTOR evidence for T-1, T-2, **and** T-3. No partial waves.\n3. Only now mark Wave 1 complete. Wave 2 cannot start until this step.\n\n## When to stop mid-wave (do NOT push through)\n\n- A RED test fails for a reason you did not predict (e.g. an unrelated flaky test) \u2192 **pause**, diagnose, log an operational-self-improvement entry, and decide with the user before proceeding.\n- A GREEN step would require touching code outside the task's acceptance criterion \u2192 **pause**, the task is scoped wrong; adjust the plan or open a follow-up task.\n- The same RED failure reappears after a GREEN change \u2192 **escalate** per the 3-attempts rule; do not keep patching.\n";
2
7
  export declare function stageSkillFolder(stage: FlowStage): string;
3
8
  export declare function stageSkillMarkdown(stage: FlowStage): string;
@@ -1,5 +1,5 @@
1
1
  import { RUNTIME_ROOT } from "../constants.js";
2
- import { stageExamples, stageGoodBadExamples } from "./examples.js";
2
+ import { STAGE_EXAMPLES_REFERENCE_DIR, stageDomainExamples, stageExamples, stageGoodBadExamples } from "./examples.js";
3
3
  import { selfImprovementBlock } from "./learnings.js";
4
4
  import { stageAutoSubagentDispatch, stageSchema } from "./stage-schema.js";
5
5
  function rationalizationTable(stage) {
@@ -146,6 +146,12 @@ On session stop or stage completion, the agent should write delegation entries t
146
146
  `;
147
147
  }
148
148
  const VERIFICATION_STAGES = ["tdd", "review", "ship"];
149
+ /**
150
+ * Short inline summary of Wave Execution Mode. The detailed 3-task
151
+ * walkthrough (RED/GREEN/REFACTOR transcript per slice) lives in the
152
+ * companion reference file so the always-rendered skill body stays under
153
+ * the 400-line soft budget.
154
+ */
149
155
  function waveExecutionModeBlock(stage) {
150
156
  const schema = stageSchema(stage);
151
157
  if (!schema.waveExecutionAllowed) {
@@ -155,60 +161,103 @@ function waveExecutionModeBlock(stage) {
155
161
 
156
162
  After plan approval (**WAIT_FOR_CONFIRM** / \`plan_wait_for_confirm\` satisfied), process **all tasks in the current dependency wave** sequentially: **RED → GREEN → REFACTOR** per task, recording evidence per slice. **Stop** only on **BLOCKED**, a test failure that **requires user input**, or **wave completion** (every task in the wave has the required RED / GREEN / REFACTOR evidence per the plan artifact).
157
163
 
164
+ **Wave gate check (before marking a wave complete):**
165
+
166
+ 1. Run the **full suite** one final time → PASS, captured as wave-exit evidence.
167
+ 2. Verify the TDD artifact contains RED, GREEN, and REFACTOR evidence for every task in the wave. No partial waves.
168
+ 3. Only then declare the wave complete. The next wave cannot start until this step.
169
+
170
+ **When to stop mid-wave (do NOT push through):**
171
+
172
+ - A RED test fails for an unpredicted reason (e.g. an unrelated flaky test) → **pause**, diagnose, log an operational-self-improvement entry.
173
+ - A GREEN step would require touching code outside the task's acceptance criterion → **pause**, the task is scoped wrong.
174
+ - The same RED failure reappears after a GREEN change → **escalate** per the 3-attempts rule.
175
+
176
+ > **Full 3-task walkthrough transcript** (RED/GREEN/REFACTOR per slice, with wave gate check): see \`.cclaw/${STAGE_EXAMPLES_REFERENCE_DIR}/tdd-wave-walkthrough.md\`.
158
177
  `;
159
178
  }
179
+ /**
180
+ * Long-form Wave Execution walkthrough. Rendered once into
181
+ * \`.cclaw/references/stages/tdd-wave-walkthrough.md\` by the installer.
182
+ */
183
+ export const TDD_WAVE_WALKTHROUGH_MARKDOWN = `# TDD — Wave Execution Walkthrough
184
+
185
+ Detailed RED / GREEN / REFACTOR transcript for a 3-task wave. Illustrative
186
+ only — do not copy the command names blindly, match them to your stack.
187
+
188
+ ## Wave 1 example tasks
189
+
190
+ | Task ID | Description | AC | Verification |
191
+ |---|---|---|---|
192
+ | T-1 \`[~3m]\` | Add \`User.emailNormalized\` column | AC-1 | \`npm test -- users/schema\` |
193
+ | T-2 \`[~4m]\` | Normalize on write in \`UserRepo.save\` | AC-1 | \`npm test -- users/repo\` |
194
+ | T-3 \`[~3m]\` | Reject duplicates in \`UserService.signup\` | AC-2 | \`npm test -- users/service\` |
195
+
196
+ ## Execution transcript
197
+
198
+ ### T-1 — RED
199
+
200
+ > Run: \`npm test -- users/schema\` → **FAIL** (missing column: \`emailNormalized\`). Captured the failure stack as RED evidence. No production code touched yet.
201
+
202
+ ### T-1 — GREEN
203
+
204
+ > Added the column in the schema module. Re-ran \`npm test -- users/schema\` → **PASS**. Ran the full suite \`npm test\` → **PASS**. Captured both outputs as GREEN evidence.
205
+
206
+ ### T-1 — REFACTOR
207
+
208
+ > Extracted the column definition into a shared \`NormalizedEmail\` type used by T-2/T-3. Re-ran \`npm test\` → **PASS**. Captured REFACTOR note: "Extracted NormalizedEmail type to keep T-2/T-3 DRY; zero behavior change, all tests still green."
209
+
210
+ ### T-2 — RED / GREEN / REFACTOR
211
+
212
+ Write the repo test that expects normalised writes, watch it fail (RED), implement normalisation inside \`UserRepo.save\` only (GREEN), then refactor the normaliser out of the repo into a helper shared with T-3 (REFACTOR).
213
+
214
+ ### T-3 — RED / GREEN / REFACTOR
215
+
216
+ Write the service-level duplicate test that expects a rejection, watch it fail (RED), add the duplicate check in \`UserService.signup\` (GREEN), refactor the error message into a named constant (REFACTOR).
217
+
218
+ ## Wave gate check
219
+
220
+ After T-3 REFACTOR, before declaring Wave 1 done:
221
+
222
+ 1. Run the full suite (\`npm test\`) one final time → **PASS** captured as wave-exit evidence.
223
+ 2. Verify the TDD artifact contains RED, GREEN, and REFACTOR evidence for T-1, T-2, **and** T-3. No partial waves.
224
+ 3. Only now mark Wave 1 complete. Wave 2 cannot start until this step.
225
+
226
+ ## When to stop mid-wave (do NOT push through)
227
+
228
+ - A RED test fails for a reason you did not predict (e.g. an unrelated flaky test) → **pause**, diagnose, log an operational-self-improvement entry, and decide with the user before proceeding.
229
+ - A GREEN step would require touching code outside the task's acceptance criterion → **pause**, the task is scoped wrong; adjust the plan or open a follow-up task.
230
+ - The same RED failure reappears after a GREEN change → **escalate** per the 3-attempts rule; do not keep patching.
231
+ `;
160
232
  function stageCompletionProtocol(schema) {
161
233
  const stage = schema.stage;
162
234
  const gateIds = schema.requiredGates.map((g) => g.id);
163
235
  const gateList = gateIds.map((id) => `\`${id}\``).join(", ");
164
- const nextStage = schema.next === "done" ? null : schema.next;
236
+ const nextStage = schema.next === "done" ? "done" : schema.next;
165
237
  const mandatory = schema.mandatoryDelegations;
166
- const delegationLogRel = `${RUNTIME_ROOT}/state/delegation-log.json`;
167
- const stateUpdate = nextStage
168
- ? ` - Set \`currentStage\` to \`"${nextStage}"\`
169
- - Add \`"${stage}"\` to \`completedStages\` array
170
- - Move all gate IDs for this stage (${gateList}) into \`stageGateCatalog.${stage}.passed\`
171
- - Clear \`stageGateCatalog.${stage}.blocked\``
172
- : ` - Add \`"${stage}"\` to \`completedStages\` array
173
- - Move all gate IDs for this stage (${gateList}) into \`stageGateCatalog.${stage}.passed\`
174
- - Clear \`stageGateCatalog.${stage}.blocked\``;
175
- const delegationBlock = mandatory.length > 0
176
- ? `0. **Delegation pre-flight** (BLOCKING):
177
- - Mandatory agents for this stage: ${mandatory.map((a) => `\`${a}\``).join(", ")}.
178
- - For each mandatory agent: confirm it was dispatched (via Task/delegate) and completed, OR record an explicit waiver with reason in \`${delegationLogRel}\`.
179
- - Write a JSON entry per agent: \`{ "stage": "${stage}", "agent": "<name>", "mode": "mandatory", "status": "completed"|"waived", "waiverReason": "<if waived>", "ts": "<ISO timestamp>" }\`.
180
- - If the harness does not support delegation, record status \`"waived"\` with reason \`"harness_limitation"\`.
181
- - **Do NOT proceed to step 1 until every mandatory agent has an entry in the delegation log.**
182
- `
183
- : "";
184
- let nextAction;
185
- if (nextStage) {
186
- const nextSchema = stageSchema(nextStage);
187
- const nextDescription = nextSchema.skillDescription.charAt(0).toLowerCase() + nextSchema.skillDescription.slice(1);
188
- nextAction = `4. Tell the user:\n\n > **Stage \`${stage}\` complete.** Next: **${nextStage}** — ${nextDescription}\n >\n > Run \`/cc-next\` to continue.`;
189
- }
190
- else {
191
- nextAction = `4. Tell the user:\n\n > **Flow complete.** All stages finished. The project is ready for release.`;
192
- }
238
+ const mandatoryList = mandatory.length > 0 ? mandatory.map((a) => `\`${a}\``).join(", ") : "none";
239
+ const nextDescription = schema.next === "done"
240
+ ? "flow complete release cut and handoff signed off"
241
+ : (() => {
242
+ const nextSchema = stageSchema(schema.next);
243
+ return nextSchema.skillDescription.charAt(0).toLowerCase() + nextSchema.skillDescription.slice(1);
244
+ })();
193
245
  return `## Stage Completion Protocol
194
246
 
195
- When all required gates are satisfied and the artifact is written:
247
+ Apply the **Shared Stage Completion Protocol** from \`.cclaw/skills/using-cclaw/SKILL.md\` with these parameters — do NOT re-derive the generic steps here.
196
248
 
197
- ${delegationBlock}1. **Update \`${RUNTIME_ROOT}/state/flow-state.json\`:**
198
- ${stateUpdate}
199
- - For each passed gate, add an entry to \`guardEvidence\`: \`"<gate_id>": "<artifact path or excerpt proving the gate>"\`. Do NOT leave \`guardEvidence\` empty.
200
- 2. **Persist artifact** at \`${RUNTIME_ROOT}/artifacts/${schema.artifactFile}\`. Do NOT manually copy into \`${RUNTIME_ROOT}/runs/\`; archival is handled by \`cclaw archive\`.
201
- 3. **Doctor pre-flight** — Run \`npx cclaw doctor\` (or the installed cclaw binary). If any check fails, resolve the issue (missing delegation entry, artifact section, gate evidence) and re-run until all checks pass. Do NOT proceed to the next step while doctor reports failures.
202
- ${nextAction}
249
+ **Completion Parameters**
250
+ - \`stage\` — \`${stage}\`
251
+ - \`next\` \`${nextStage}\` (${nextDescription})
252
+ - \`gates\` ${gateList}
253
+ - \`artifact\` \`${RUNTIME_ROOT}/artifacts/${schema.artifactFile}\`
254
+ - \`mandatory\` — ${mandatoryList}
203
255
 
204
- **STOP.** Do not load the next stage skill yourself. The user will run \`/cc-next\` when ready (same session or new session).
256
+ When all required gates are satisfied and the artifact is written, execute the shared procedure (delegation pre-flight → flow-state update → artifact persistence → \`npx cclaw doctor\` user handoff STOP) using the parameters above. If any check fails, resolve the issue and re-run before proceeding.
205
257
 
206
258
  ## Resume Protocol
207
259
 
208
- When resuming a stage in a NEW session (artifact exists but gates are not all passed in flow-state):
209
- 1. Read the existing artifact and check which gates can be verified from artifact evidence.
210
- 2. For each unverified gate, ask the user to confirm ONE gate at a time. Do NOT batch multiple gate confirmations in a single message.
211
- 3. Update \`guardEvidence\` for each confirmed gate before proceeding.
260
+ When resuming this stage in a NEW session (artifact exists but not all of ${gateList} are passed), follow the **Shared Resume Protocol** in \`.cclaw/skills/using-cclaw/SKILL.md\` — confirm one gate at a time, update \`guardEvidence\` for each, never batch confirmations.
212
261
  `;
213
262
  }
214
263
  function stageTransitionAutoAdvanceBlock(schema) {
@@ -335,6 +384,14 @@ description: "${schema.skillDescription}"
335
384
 
336
385
  # ${schema.skillName}
337
386
 
387
+ <EXTREMELY-IMPORTANT>
388
+
389
+ **IRON LAW — ${stage.toUpperCase()}:** ${schema.ironLaw}
390
+
391
+ If you are about to violate the Iron Law, STOP. No amount of urgency, partial progress, or clever reinterpretation overrides it. Escalate via the Decision Protocol or abandon the stage.
392
+
393
+ </EXTREMELY-IMPORTANT>
394
+
338
395
  ${quickStartBlock(stage)}
339
396
  ## Overview
340
397
  ${schema.purpose}
@@ -364,6 +421,7 @@ You MUST complete these steps in order:
364
421
  ${checklistItems}
365
422
 
366
423
  ${stageGoodBadExamples(stage)}
424
+ ${stageDomainExamples(stage)}
367
425
  ${stageExamples(stage)}
368
426
  ${namedAntiPatternBlock(stage)}
369
427
  ${cognitivePatternsList(stage)}
@@ -391,11 +449,25 @@ ${decisionRecordBlock(stage)}
391
449
  ## Common Rationalizations
392
450
  ${rationalizationTable(stage)}
393
451
 
394
- ## Anti-Patterns
395
- ${[...schema.antiPatterns, ...schema.blockers].map((item) => `- ${item}`).join("\n")}
396
-
397
- ## Red Flags
398
- ${schema.redFlags.map((item) => `- ${item}`).join("\n")}
452
+ ## Anti-Patterns & Red Flags
453
+
454
+ > One consolidated list of observable failure modes for this stage. Mix of
455
+ > behavioural anti-patterns (things you might do wrong) and red-flag
456
+ > signals (things you might notice going wrong). Dedup-merged so no item
457
+ > appears twice.
458
+
459
+ ${(() => {
460
+ const merged = [];
461
+ const seen = new Set();
462
+ for (const item of [...schema.antiPatterns, ...schema.blockers, ...schema.redFlags]) {
463
+ const key = item.trim().toLowerCase();
464
+ if (seen.has(key))
465
+ continue;
466
+ seen.add(key);
467
+ merged.push(item);
468
+ }
469
+ return merged.map((item) => `- ${item}`).join("\n");
470
+ })()}
399
471
 
400
472
  ${completionStatusBlock(stage)}
401
473
  ## Verification