cclaw-cli 4.0.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import { SHIP_FINALIZATION_MODES } from "../constants.js";
2
2
  import { questionBudgetHint } from "../track-heuristics.js";
3
3
  import { FLOW_STAGES } from "../types.js";
4
+ import { stageSchema } from "../content/stage-schema.js";
4
5
  /**
5
6
  * Recognized stop-signal phrases that satisfy the Q&A floor escape hatch
6
7
  * when recorded as a Q&A Log row. Mirrors `Stop Signals (Natural Language)`
@@ -29,9 +30,9 @@ const QA_LOG_STOP_SIGNAL_PATTERNS = [
29
30
  /(?<![\p{L}\p{N}_])рухаємось\s+далі(?![\p{L}\p{N}_])/iu
30
31
  ];
31
32
  /**
32
- * Stages that run adaptive elicitation. The `qa_log_below_min` rule only
33
- * fires for these. Other stages may still record a Q&A Log but no floor is
34
- * enforced.
33
+ * Stages that run adaptive elicitation. The `qa_log_unconverged` rule
34
+ * only fires for these. Other stages may still record a Q&A Log but no
35
+ * convergence floor is enforced.
35
36
  */
36
37
  export const ELICITATION_STAGES = new Set([
37
38
  "brainstorm",
@@ -39,9 +40,27 @@ export const ELICITATION_STAGES = new Set([
39
40
  "design"
40
41
  ]);
41
42
  /**
42
- * Decide whether a Q&A Log row counts as a "substantive" entry for the floor.
43
- * Rows whose disposition column reads `skipped` / `waived` only do not
44
- * count toward the minimum.
43
+ * Phrases that mark a Q&A Log row as "no new decision" used by the
44
+ * Ralph-Loop convergence detector. When the last 2 substantive rows have
45
+ * a Decision impact tagged with one of these phrases, convergence has
46
+ * been reached even if not every forcing question was explicitly
47
+ * addressed.
48
+ */
49
+ const QA_LOG_NO_DECISION_TOKENS = [
50
+ /\bskip(?:ped)?\b/iu,
51
+ /\bcontinue\b/iu,
52
+ /\bno[-\s]?change\b/iu,
53
+ /\bno[-\s]?decision\b/iu,
54
+ /\bno[-\s]?op\b/iu,
55
+ /\bnoop\b/iu,
56
+ /\bdone\b/iu,
57
+ /\bsame\b/iu,
58
+ /\bok\b/iu
59
+ ];
60
+ /**
61
+ * Decide whether a Q&A Log row counts as a "substantive" entry. Rows
62
+ * whose decision_impact column reads `skipped` / `waived` only do not
63
+ * count.
45
64
  */
46
65
  function isSubstantiveQaRow(cells) {
47
66
  if (cells.length === 0)
@@ -53,8 +72,8 @@ function isSubstantiveQaRow(cells) {
53
72
  return true;
54
73
  }
55
74
  /**
56
- * Detect a stop-signal row in the Q&A Log. Pattern is matched across all
57
- * cells of any row so the user's quote can live in any column.
75
+ * Detect a stop-signal row in the Q&A Log. Pattern is matched across
76
+ * all cells of any row so the user's quote can live in any column.
58
77
  */
59
78
  function detectStopSignal(rows) {
60
79
  for (const row of rows) {
@@ -67,60 +86,185 @@ function detectStopSignal(rows) {
67
86
  return false;
68
87
  }
69
88
  /**
70
- * Evaluate the Q&A Log floor for a brainstorm / scope / design artifact.
71
- * Returns ok=true when the floor is satisfied or any escape hatch fires.
89
+ * Extract forcing-question topics from a stage's checklist. Looks for
90
+ * the canonical `**<Stage> forcing questions (must be covered or
91
+ * explicitly waived)** — <topic1>, <topic2>, ...` row and tokenizes the
92
+ * comma-separated topic list. Returns trimmed topic strings stripped of
93
+ * leading question words (`what`/`who`/`where`/`which`/`how`/`is`/`do`/`does`).
72
94
  *
73
- * Escape hatches (any one is sufficient):
74
- * - Q&A Log contains a stop-signal row.
95
+ * Returns empty array when no forcing-questions row is present (caller
96
+ * should treat absence as "no forcing requirement" — convergence falls
97
+ * back to the no-new-decisions / stop-signal detectors).
98
+ */
99
+ export function extractForcingQuestions(stage) {
100
+ let checklist;
101
+ try {
102
+ checklist = stageSchema(stage).executionModel.checklist;
103
+ }
104
+ catch {
105
+ return [];
106
+ }
107
+ for (const row of checklist) {
108
+ const headerMatch = /\*\*\s*[A-Za-z]+\s+forcing\s+questions\s*\([^)]*\)\s*\*\*\s*(?:[—\-–:]+)?\s*(.+)/iu.exec(row);
109
+ if (!headerMatch)
110
+ continue;
111
+ const body = (headerMatch[1] ?? "")
112
+ .replace(/\.$/u, "")
113
+ .trim();
114
+ if (body.length === 0)
115
+ return [];
116
+ return body
117
+ .split(/,\s*(?:and\s+)?|\s+and\s+/iu)
118
+ .map((topic) => topic.trim())
119
+ .filter((topic) => topic.length > 0)
120
+ .map((topic) => topic
121
+ .replace(/^[*_`]+|[*_`]+$/gu, "")
122
+ .replace(/^(?:what|who|where|which|how|is|are|do|does|did|can|will|would|could|should|may|might)\s+/iu, "")
123
+ .replace(/\?+$/u, "")
124
+ .trim())
125
+ .filter((topic) => topic.length > 0);
126
+ }
127
+ return [];
128
+ }
129
+ /**
130
+ * Build a salient-keyword set for a forcing-question topic. Splits on
131
+ * whitespace, drops short/stop words, lowercases. Used for fuzzy
132
+ * substring match against Q&A Log row content.
133
+ */
134
+ function topicKeywords(topic) {
135
+ const STOP_WORDS = new Set([
136
+ "the", "a", "an", "is", "are", "was", "were", "be", "to", "of", "in", "on", "at",
137
+ "for", "and", "or", "but", "if", "then", "else", "with", "without", "by", "as",
138
+ "we", "us", "our", "they", "them", "their", "you", "your", "i", "me", "my",
139
+ "this", "that", "these", "those", "it", "its", "do", "does", "did", "can",
140
+ "will", "would", "should", "could", "may", "might", "any", "some", "no", "not",
141
+ "from", "into", "onto", "upon", "than", "very", "much", "many", "more", "most",
142
+ "must", "have", "has", "had", "been", "being", "where", "when", "while",
143
+ "what", "which", "who", "whose", "whom", "why", "how", "non"
144
+ ]);
145
+ return topic
146
+ .toLowerCase()
147
+ .split(/[\s\-/.,;:()\[\]{}'"`*_]+/u)
148
+ .map((token) => token.replace(/[^\p{L}\p{N}-]/gu, ""))
149
+ .filter((token) => token.length >= 3 && !STOP_WORDS.has(token));
150
+ }
151
+ function isTopicAddressed(topic, rows) {
152
+ const keywords = topicKeywords(topic);
153
+ if (keywords.length === 0)
154
+ return true;
155
+ const minHits = keywords.length === 1 ? 1 : Math.min(2, keywords.length);
156
+ for (const row of rows) {
157
+ const haystack = row.join(" | ").toLowerCase();
158
+ let hits = 0;
159
+ for (const keyword of keywords) {
160
+ if (haystack.includes(keyword))
161
+ hits += 1;
162
+ if (hits >= minHits)
163
+ return true;
164
+ }
165
+ }
166
+ return false;
167
+ }
168
+ function lastTwoRowsAllNoDecision(substantiveRows) {
169
+ if (substantiveRows.length < 2)
170
+ return false;
171
+ const tail = substantiveRows.slice(-2);
172
+ for (const row of tail) {
173
+ const decisionImpact = (row[row.length - 1] ?? "").trim();
174
+ if (decisionImpact.length === 0)
175
+ return false;
176
+ const matched = QA_LOG_NO_DECISION_TOKENS.some((pattern) => pattern.test(decisionImpact));
177
+ if (!matched)
178
+ return false;
179
+ }
180
+ return true;
181
+ }
182
+ /**
183
+ * Evaluate the Q&A Log convergence floor for a brainstorm / scope /
184
+ * design artifact. Returns ok=true when convergence is reached or any
185
+ * escape hatch fires.
186
+ *
187
+ * Convergence sources (any one is sufficient):
188
+ * - All forcing-question topics from the stage checklist appear addressed
189
+ * in `## Q&A Log` (substring keyword match in question/answer columns).
190
+ * - The Ralph-Loop convergence detector reports the last 2 substantive
191
+ * rows have decision_impact marking `skip`/`continue`/`no-change`/`done`
192
+ * (i.e. the dialogue is no longer producing decision-changing rows).
193
+ * - Q&A Log contains a stop-signal row (existing
194
+ * `QA_LOG_STOP_SIGNAL_PATTERNS` keep working).
75
195
  * - `--skip-questions` flag was persisted to the active stage flags
76
- * (passed via `options.skipQuestions=true`); finding downgrades to advisory.
77
- * - Track is `quick` (lite tier ~ lightweight complexity) AND substantive
78
- * count >= 1.
196
+ * (`options.skipQuestions=true`); finding downgrades to advisory.
197
+ * - The stage checklist exposes no forcing-questions row (e.g. simple
198
+ * refactor) AND the artifact has at least one substantive row — treat
199
+ * as converged because there is nothing left to force.
200
+ *
201
+ * Wave 23 (v5.0.0) replaces the count-based `qa_log_below_min` rule with
202
+ * `qa_log_unconverged`. The fixed count constant (10 for standard) and
203
+ * the `CCLAW_ELICITATION_FLOOR=advisory` env override were removed. The
204
+ * `min` and `liteShortCircuit` fields on the result are retained for
205
+ * harness UI compatibility but are always 0/false.
79
206
  */
80
207
  export function evaluateQaLogFloor(qaLogBody, track, stage, options = {}) {
81
- const hint = questionBudgetHint(track, stage);
82
- const min = hint.min;
83
208
  const rows = qaLogBody !== null ? getMarkdownTableRows(qaLogBody) : [];
84
209
  const substantiveRows = rows.filter(isSubstantiveQaRow);
85
210
  const count = substantiveRows.length;
86
211
  const hasStopSignal = detectStopSignal(rows);
87
- const liteShortCircuit = track === "quick" && count >= 1;
88
- // Emergency override (undocumented for users): set
89
- // `CCLAW_ELICITATION_FLOOR=advisory` to downgrade qa_log_below_min from
90
- // blocking to advisory globally. This is a safety net for incidents where
91
- // the floor mis-fires across an org; treat as `--skip-questions` semantics.
92
- const envOverride = (typeof process !== "undefined" ? process.env?.CCLAW_ELICITATION_FLOOR : undefined) === "advisory";
93
- const skipQuestionsAdvisory = options.skipQuestions === true || envOverride;
94
- const ok = count >= min || hasStopSignal || liteShortCircuit;
212
+ const skipQuestionsAdvisory = options.skipQuestions === true;
213
+ const forcingTopics = options.forcingQuestions ?? extractForcingQuestions(stage);
214
+ const forcingCovered = [];
215
+ const forcingPending = [];
216
+ for (const topic of forcingTopics) {
217
+ if (isTopicAddressed(topic, rows))
218
+ forcingCovered.push(topic);
219
+ else
220
+ forcingPending.push(topic);
221
+ }
222
+ const noNewDecisions = lastTwoRowsAllNoDecision(substantiveRows);
223
+ const allForcingCovered = forcingTopics.length > 0 ? forcingPending.length === 0 : count >= 1;
224
+ const ok = allForcingCovered || noNewDecisions || hasStopSignal;
95
225
  let details;
96
226
  if (ok) {
97
- if (count >= min) {
98
- details = `Q&A Log has ${count} substantive entries (floor for ${track}/${stage}: ${min}).`;
227
+ if (allForcingCovered && forcingTopics.length > 0) {
228
+ details = `Q&A Log converged: all ${forcingTopics.length} forcing-question topic(s) addressed across ${count} substantive row(s).`;
99
229
  }
100
- else if (hasStopSignal) {
101
- details = `Q&A Log has ${count} substantive entries with an explicit user stop-signal row recorded (floor: ${min}).`;
230
+ else if (allForcingCovered) {
231
+ details = `Q&A Log converged: stage exposes no forcing-questions row and ${count} substantive entry recorded.`;
232
+ }
233
+ else if (noNewDecisions) {
234
+ const remaining = forcingPending.length > 0
235
+ ? ` ${forcingPending.length} forcing topic(s) still pending but last 2 rows produced no decision changes (Ralph-Loop convergence).`
236
+ : " Ralph-Loop convergence detector says no new decision-changing rows in the last 2 turns.";
237
+ details = `Q&A Log converged via no-new-decisions detector at ${count} row(s).${remaining}`;
102
238
  }
103
239
  else {
104
- details = `Q&A Log has ${count} substantive entry under lightweight track short-circuit (default floor: ${min}).`;
240
+ details = `Q&A Log converged: explicit user stop-signal row recorded at ${count} row(s).`;
105
241
  }
106
242
  }
107
243
  else if (skipQuestionsAdvisory) {
108
- const reason = options.skipQuestions === true
109
- ? "--skip-questions flag was set"
110
- : "CCLAW_ELICITATION_FLOOR=advisory env override is active";
111
- details = `Q&A Log has ${count} substantive entries, minimum for ${track}/${stage} is ${min}; ${reason}, finding downgraded to advisory.`;
244
+ details = `Q&A Log unconverged at ${count} row(s); --skip-questions flag downgraded the finding to advisory. Pending forcing topic(s): ${forcingPending.length > 0 ? forcingPending.join("; ") : "(none extracted)"}.`;
112
245
  }
113
246
  else {
114
- details = `Q&A Log has ${count} substantive entries, minimum for ${track}/${stage} is ${min}. Continue the elicitation loop or record an explicit user stop-signal row in Q&A Log.`;
247
+ details = `Q&A Log unconverged at ${count} row(s). Continue the elicitation loop until forcing-question topics are addressed (${forcingPending.length > 0 ? forcingPending.join("; ") : "no forcing topics extracted"}), the last 2 rows record no-decision impact, or an explicit user stop-signal row is appended.`;
115
248
  }
249
+ // Surface advisory budget hint for harness UI without re-introducing a
250
+ // blocking count. `recommended` is the soft budget per track/stage.
251
+ const advisoryBudget = questionBudgetHint(track, stage).recommended;
116
252
  return {
117
253
  ok,
118
254
  count,
119
- min,
255
+ // Wave 23: floor no longer enforces a count. Surfacing 0 keeps the
256
+ // QaLogFloorSignal shape stable for harness consumers; harness UIs
257
+ // may show `recommended` from `questionBudgetHint` separately.
258
+ min: 0,
120
259
  hasStopSignal,
121
- liteShortCircuit,
260
+ liteShortCircuit: false,
122
261
  skipQuestionsAdvisory,
123
- details
262
+ forcingCovered,
263
+ forcingPending,
264
+ noNewDecisions,
265
+ details: advisoryBudget > 0
266
+ ? `${details} (advisory budget for ${track}/${stage}: ~${advisoryBudget} Q&A turns)`
267
+ : details
124
268
  };
125
269
  }
126
270
  export function normalizeHeadingTitle(title) {
@@ -678,61 +822,12 @@ export function extractCanonicalScopeMode(body) {
678
822
  }
679
823
  return null;
680
824
  }
681
- export function validatePremiseChallenge(sectionBody) {
682
- // gstack-style premise challenge requires a real Q/A structure (table or
683
- // list), not free-form prose. The validation is *structural* only — we do
684
- // NOT keyword-grep for English phrases like "right problem"; authors may
685
- // write the questions in any language, and the answers carry the meaning.
686
- // The template ships with canonical question labels as scaffolding, but
687
- // the linter only enforces that the section actually compares premise
688
- // questions to answers.
689
- const tableRows = getMarkdownTableRows(sectionBody);
690
- const bulletRows = sectionBody
691
- .split(/\r?\n/u)
692
- .map((line) => line.trim())
693
- .filter((line) => /^(?:[-*]|\d+\.)\s+\S/u.test(line));
694
- const rowCount = Math.max(tableRows.length, bulletRows.length);
695
- if (rowCount < 3) {
696
- return {
697
- ok: false,
698
- details: `Premise Challenge needs at least 3 substantive rows in a table or bullet list. Found ${rowCount}.`
699
- };
700
- }
701
- // For tables, each data row must have at least 2 non-empty cells so the
702
- // section is genuinely a premise/answer comparison, not a list of headlines.
703
- // For bullet lists, each line must be substantive so we don't accept
704
- // placeholders like `- a`; punctuation style and natural language do not
705
- // matter.
706
- if (tableRows.length >= 3) {
707
- const sparseRows = tableRows.filter((row) => {
708
- const filledCells = row.filter((cell) => cell.replace(/[\s|]/gu, "").length >= 2);
709
- return filledCells.length < 2;
710
- });
711
- if (sparseRows.length > 0) {
712
- return {
713
- ok: false,
714
- details: "Premise Challenge table rows must populate at least the question and answer columns (no empty answers)."
715
- };
716
- }
717
- }
718
- else if (bulletRows.length >= 3) {
719
- const sparseBullets = bulletRows.filter((line) => {
720
- const cleaned = line.replace(/^[-*\d.\s]+/u, "").replace(/[`*_]/gu, "").trim();
721
- const meaningful = cleaned.match(/[\p{L}\p{N}]/gu)?.length ?? 0;
722
- return meaningful < 12;
723
- });
724
- if (sparseBullets.length > 0) {
725
- return {
726
- ok: false,
727
- details: "Premise Challenge bullet list must include at least 3 substantive rows, not placeholders."
728
- };
729
- }
730
- }
731
- return {
732
- ok: true,
733
- details: `Premise Challenge structures ${rowCount} Q/A rows.`
734
- };
735
- }
825
+ // `validatePremiseChallenge` was removed in Wave 23 (v5.0.0). Premise
826
+ // challenge is now owned solely by brainstorm (`## Premise Check`); scope
827
+ // only records `## Premise Drift` when scope-stage Q&A surfaces new
828
+ // evidence that materially changes the brainstorm answer. The drift
829
+ // section is optional and structural-only via the default `validateSectionBody`
830
+ // path (no specialized validator required).
736
831
  export function validateScopeSummary(sectionBody) {
737
832
  const meaningfulLines = sectionBody
738
833
  .split(/\r?\n/)
@@ -1551,9 +1646,6 @@ export function validateSectionBody(sectionBody, rule, sectionName) {
1551
1646
  if (sectionNameNormalized === "scope summary") {
1552
1647
  return validateScopeSummary(sectionBody);
1553
1648
  }
1554
- if (sectionNameNormalized === "premise challenge") {
1555
- return validatePremiseChallenge(sectionBody);
1556
- }
1557
1649
  if (sectionNameNormalized.startsWith("requirements")) {
1558
1650
  return validateRequirementsTaxonomy(sectionBody);
1559
1651
  }
@@ -1,6 +1,6 @@
1
1
  import type { FlowStage, FlowTrack } from "./types.js";
2
2
  import { type LintResult } from "./artifact-linter/shared.js";
3
- export { validateReviewArmy, checkReviewVerdictConsistency, checkReviewSecurityNoChangeAttestation, type ReviewVerdictConsistencyResult, type ReviewSecurityNoChangeAttestationResult } from "./artifact-linter/review-army.js";
3
+ export { validateReviewArmy, checkReviewVerdictConsistency, checkReviewSecurityNoChangeAttestation, checkReviewTddNoCrossArtifactDuplication, type ReviewVerdictConsistencyResult, type ReviewSecurityNoChangeAttestationResult, type ReviewTddDuplicationConflict, type ReviewTddDuplicationResult } from "./artifact-linter/review-army.js";
4
4
  export { type LintFinding, type LintResult, type LearningEntryType, type LearningConfidence, type LearningSeverity, type LearningSource, type LearningSeedEntry, type LearningsParseResult, extractMarkdownSectionBody, parseLearningsSection } from "./artifact-linter/shared.js";
5
5
  export interface LintArtifactOptions {
6
6
  /**
@@ -12,7 +12,7 @@ import { lintSpecStage } from "./artifact-linter/spec.js";
12
12
  import { lintTddStage } from "./artifact-linter/tdd.js";
13
13
  import { lintReviewStage } from "./artifact-linter/review.js";
14
14
  import { lintShipStage } from "./artifact-linter/ship.js";
15
- export { validateReviewArmy, checkReviewVerdictConsistency, checkReviewSecurityNoChangeAttestation } from "./artifact-linter/review-army.js";
15
+ export { validateReviewArmy, checkReviewVerdictConsistency, checkReviewSecurityNoChangeAttestation, checkReviewTddNoCrossArtifactDuplication } from "./artifact-linter/review-army.js";
16
16
  export { extractMarkdownSectionBody, parseLearningsSection } from "./artifact-linter/shared.js";
17
17
  const FRONTMATTER_REQUIRED_KEYS = [
18
18
  "stage",
@@ -392,7 +392,12 @@ export const CCLAW_AGENTS = [
392
392
  "Compatibility: NO_IMPACT / FOUND_<n>",
393
393
  "Observability: NO_IMPACT / FOUND_<n>",
394
394
  "Security: routed to security-reviewer (always separate)",
395
- "For unusually large/high-risk diffs, optional deep-dive context skills may be loaded: `review-perf-lens`, `review-compat-lens`, `review-observability-lens`.",
395
+ "",
396
+ "### Companion lens skills (load on-demand, never all-at-once)",
397
+ "- **review-perf-lens** — load when reviewing code touching hot paths, loops over large data, network/disk I/O, render hot paths, or sub-100ms latency budgets.",
398
+ "- **review-compat-lens** — load when reviewing code that runs on multiple OS/runtime/browser targets, modifies shared library APIs, or changes serialized payload shapes.",
399
+ "- **review-observability-lens** — load when reviewing code that adds/removes logging, metrics, traces, error reporting, or audit/compliance signals.",
400
+ "If none of those triggers apply, do NOT load the lens skills — they are deep-dive context, not default reading.",
396
401
  "",
397
402
  "For each finding include:",
398
403
  "- Severity: `Critical` | `Important` | `Suggestion`",
@@ -247,7 +247,12 @@ ${frameBullets}
247
247
  8. **Write the artifact** at
248
248
  \`${IDEA_ARTIFACT_PATTERN}\` using the schema in the skill.
249
249
  9. **Present the handoff prompt** with four concrete options - not A/B/C
250
- letters. Default = "Start /cc on the top recommendation".
250
+ letters. Default = "Start /cc on the top recommendation". When the user
251
+ picks the start option, plumb the chosen candidate forward via
252
+ \`start-flow --from-idea-artifact=<path> --from-idea-candidate=I-<n>\`
253
+ (Wave 23 / v5.0.0) so brainstorm reuses the idea's divergent + critique +
254
+ rank work via \`interactionHints.brainstorm.fromIdeaArtifact\`; do NOT
255
+ ask brainstorm to regenerate it.
251
256
 
252
257
  ## Headless mode (CI/automation only)
253
258
 
@@ -390,7 +395,14 @@ Required options, in this order:
390
395
  ### Phase 6 - Execute the choice
391
396
 
392
397
  - Start /cc: load \`${RUNTIME_ROOT}/skills/using-cclaw/SKILL.md\` and run
393
- \`/cc <phrase>\`.
398
+ \`/cc <phrase>\`. **Wave 23 (v5.0.0) handoff carry-forward (mandatory when starting from /cc-idea):**
399
+ the harness shim that turns \`/cc <phrase>\` into a \`start-flow\` invocation
400
+ MUST forward the originating idea artifact and chosen candidate so brainstorm
401
+ reuses divergent + critique + rank work instead of redoing it. Equivalent CLI
402
+ call (used by automation; harness handles this transparently in interactive mode):
403
+ \`npx cclaw-cli internal start-flow --track=<track> --prompt='<phrase>' --from-idea-artifact=${IDEA_ARTIFACT_PATTERN} --from-idea-candidate=I-<n>\`.
404
+ The hint lands in \`flow-state.interactionHints.brainstorm\` and brainstorm's
405
+ \`Idea-evidence carry-forward\` checklist row picks it up.
394
406
  - Save and close: reply with artifact path and stop.
395
407
  - Discard: delete the artifact and stop.
396
408
 
@@ -47,7 +47,7 @@ These behaviors are the exact reason this skill exists. The linter will block yo
47
47
  - Ask exactly one question per turn and wait for the answer before asking the next one.
48
48
  - Use harness-native question tools first; prose fallback is allowed only when the tool is unavailable.
49
49
  - Keep a running Q&A trace in the active artifact under \`## Q&A Log\` in \`${RUNTIME_ROOT}/artifacts/\` as append-only rows.
50
- - **Hard floor**: do NOT advance the stage (do NOT call \`stage-complete.mjs\`) until \`## Q&A Log\` contains at least \`min(track, stage)\` substantive entries OR an explicit user stop-signal is recorded as a row. The linter rule \`qa_log_below_min\` enforces this; \`stage-complete\` will fail otherwise.
50
+ - **Convergence floor**: do NOT advance the stage (do NOT call \`stage-complete.mjs\`) until Q&A converges. Convergence is reached when ANY of: (a) all forcing-question topics are addressed in \`## Q&A Log\`, (b) the last 2 substantive rows produce no decision-changing impact (\`skip\`/\`continue\`/\`no-change\`/\`done\`), or (c) an explicit user stop-signal row is recorded. The linter rule \`qa_log_unconverged\` enforces this; \`stage-complete\` will fail otherwise. Wave 23 (v5.0.0) replaced the fixed-count floor with this convergence detector.
51
51
  - **NEVER run shell hash commands** (\`shasum\`, \`sha256sum\`, \`md5sum\`, \`Get-FileHash\`, \`certutil\`, etc.) to compute artifact hashes. If a linter ever asks you for a hash, that is a linter bug — report failure and stop, do not auto-fix in bash.
52
52
  - **NEVER paste cclaw command lines into chat** (e.g. \`node .cclaw/hooks/stage-complete.mjs ... --evidence-json '{...}'\`). Run them via the tool layer; report only the resulting summary. The user does not run cclaw manually and seeing the command line is noise.
53
53
 
@@ -103,16 +103,19 @@ Each grill question follows the same Core Protocol: ask one, wait, log, self-eva
103
103
 
104
104
  Do not ask extra questions "for theater" on simple low-risk work.
105
105
 
106
- ## Question Budget Hint (linter-enforced floor)
106
+ ## Question Budget Hint (advisory only — Wave 23 dropped the count floor)
107
107
 
108
- Source of truth: \`questionBudgetHint(track, stage)\`. The \`Min\` column is enforced by \`qa_log_below_min\` linter rule — \`stage-complete\` fails when below.
108
+ Source of truth: \`questionBudgetHint(track, stage)\`. The numbers below are
109
+ **soft hints** for harness UI and elicitation pacing; gate blocking is done
110
+ by the \`qa_log_unconverged\` rule (Ralph-Loop convergence detector), NOT by
111
+ a fixed count.
109
112
 
110
113
  ${budgetTable}
111
114
 
112
115
  Track mapping note: \`quick\` ~= lightweight, \`medium\` ~= standard, \`standard\` ~= deep.
113
116
 
114
117
  How to use the columns:
115
- - \`Min\` — hard floor. Below this, \`stage-complete\` is blocked unless escape hatch is recorded.
118
+ - \`Min\` — soft minimum to surface forcing questions; not a blocking gate.
116
119
  - \`Recommended\` — target for normal flows.
117
120
  - \`Hard cap warning\` — point at which to stop or compress remaining forcing questions into one final batched ask. Not skip.
118
121
 
@@ -439,6 +439,16 @@ const STAGE_SCHEMA_MAP = {
439
439
  review: REVIEW,
440
440
  ship: SHIP
441
441
  };
442
+ /**
443
+ * Stage-level subagent dispatch matrix.
444
+ *
445
+ * NOTE on `fixer`: the `fixer` agent is intentionally NOT listed in any stage
446
+ * row. It is dispatched on-demand by the SDD `subagent-dev` skill (and by
447
+ * reviewer flows) when a review surfaces a concrete failing criterion that
448
+ * needs a fresh worker. Adding `fixer` to the static matrix would create
449
+ * proactive-waiver theatre because it can only run after a specific review
450
+ * finding exists. See `core-agents.ts` `fixer` definition for the contract.
451
+ */
442
452
  const STAGE_AUTO_SUBAGENT_DISPATCH = {
443
453
  brainstorm: [
444
454
  {
@@ -18,8 +18,7 @@ const STAGE_POLICY_NEEDLES = {
18
18
  "In Scope",
19
19
  "Out of Scope",
20
20
  "Discretion Areas",
21
- "NOT in scope",
22
- "Premise Challenge",
21
+ "Premise Drift",
23
22
  "Locked Decisions",
24
23
  "Victory Detector",
25
24
  "Critic Pass"
@@ -36,7 +36,7 @@ export const BRAINSTORM = {
36
36
  },
37
37
  executionModel: {
38
38
  checklist: [
39
- "**ADAPTIVE ELICITATION COMES FIRST (no exceptions, no subagent dispatch before).** Load `.cclaw/skills/adaptive-elicitation/SKILL.md`. Walk the brainstorm forcing questions one-at-a-time via the harness-native question tool, append one row to `## Q&A Log` (`Turn | Question | User answer (1-line) | Decision impact`) after each user answer. Continue until all forcing questions are answered/skipped/waived OR user records an explicit stop-signal row. Only then proceed to delegations, drafts, or analysis. The linter `qa_log_below_min` rule will block `stage-complete` if Q&A Log is below floor.",
39
+ "**ADAPTIVE ELICITATION COMES FIRST (no exceptions, no subagent dispatch before).** Load `.cclaw/skills/adaptive-elicitation/SKILL.md`. Walk the brainstorm forcing questions one-at-a-time via the harness-native question tool, append one row to `## Q&A Log` (`Turn | Question | User answer (1-line) | Decision impact`) after each user answer. Continue until forcing-questions converge (all answered/skipped/waived) OR Ralph-Loop convergence detector says no new decision-changing rows in last 2 iterations OR user records an explicit stop-signal row. Only then proceed to delegations, drafts, or analysis. The linter `qa_log_unconverged` rule will block `stage-complete` if convergence is not reached.",
40
40
  "**Explore project context** — after the elicitation loop converges, inspect existing files/docs/recent activity to refine the Discovered context section; capture matching files/patterns/seeds in `Context > Discovered context` so downstream stages don't redo discovery.",
41
41
  "**Brainstorm forcing questions (must be covered or explicitly waived)** — what pain are we solving, what is the direct path, what happens if we do nothing, who is the first operator/user affected, and what no-go boundaries are non-negotiable.",
42
42
  "**Classify stage depth** — choose `lite` for clear low-risk tasks, `standard` for normal engineering/product changes, or `deep` for ambiguity, architecture, external dependency, security/data risk, or explicit think-bigger requests.",
@@ -48,6 +48,7 @@ export const BRAINSTORM = {
48
48
  "**Use compact discovery for low-risk asks** — for concrete bounded requests, do one context pass, compare one baseline and one challenger, and move to draft once context is sufficient; do not drag the user through a full workshop.",
49
49
  "**Early-exit concrete asks** — for unambiguous implementation-only requests, write a compact Problem Decision Record plus short-circuit handoff (context, approved intent, constraints, assumptions, next-stage risks) and request explicit approval when the draft is ready.",
50
50
  "**Ask only decision-changing questions** — one at a time; if answers would not change approach and are non-critical preference/default assumptions, state the assumption and continue; STOP on scope, architecture, security, data loss, public API, migration, auth/pricing, or user approval uncertainty.",
51
+ "**Idea-evidence carry-forward (when applicable).** If `flow-state.interactionHints.brainstorm.fromIdeaArtifact` is set, read that idea artifact and reuse its `Title`, `Why-now`, `Expected impact`, `Risk`, `Counter-argument` for the chosen `I-#` (`fromIdeaCandidateId`) as the seed of `## Selected Direction` and as one row of `## Approaches` (role: `baseline`, evidence: idea-artifact path). Generate ONLY the missing higher-upside `challenger` row(s); do NOT re-generate the candidate that came from `/cc-ideate`. Record the carry-forward in `## Idea Evidence Carry-forward` with at minimum `- Source: <path>`, `- Candidate: <I-#>`, `- Reused fields: Title, Why-now, Expected impact, Risk, Counter-argument`, `- Newly generated: challenger(s) only`.",
51
52
  "**Compare 2-3 distinct approaches with stable Role/Upside columns** — Role values are `baseline` | `challenger` | `wild-card`; Upside is `low` | `modest` | `high` | `higher`; include real trade-offs, reuse notes, and reference-pattern source/disposition when a known pattern influenced the option; include exactly one challenger with explicit `high` or `higher` upside.",
52
53
  "**Collect reaction before recommending** — ask which option feels closest and what concern remains, then recommend based on that reaction.",
53
54
  "**Write the `Not Doing` list** — name 3-5 things this brainstorm explicitly is not committing to (vs. deferred). This protects scope from silent enlargement and the next stage from rework.",
@@ -91,6 +92,7 @@ export const BRAINSTORM = {
91
92
  "Clarity Gate records ambiguity score, decision boundaries, reaffirmed non-goals, and residual-risk handoff.",
92
93
  "Clarifying questions are one-at-a-time and captured only when they change a decision or stop condition.",
93
94
  "2-3 approaches with trade-offs are recorded, including one higher-upside challenger option and reference-pattern source/disposition when applicable.",
95
+ "When `flow-state.interactionHints.brainstorm.fromIdeaArtifact` is set, the `## Idea Evidence Carry-forward` section cites the idea artifact + `I-#` and only the challenger rows are newly generated (idea candidate is reused as `baseline`, never re-derived).",
94
96
  "User reaction to approaches is captured before final recommendation.",
95
97
  "Final recommendation explicitly reflects user reaction.",
96
98
  "Early-loop status is reflected via `Victory Detector` / `Critic Pass` sections and `.cclaw/state/early-loop.json` when concerns remain.",
@@ -146,6 +148,7 @@ export const BRAINSTORM = {
146
148
  { section: "Approach Tier", required: true, validationRule: "Must classify depth as lite/standard/deep and explain the risk/uncertainty signal." },
147
149
  { section: "Short-Circuit Decision", required: false, validationRule: "Must include Status/Why/Scope handoff lines when short-circuit is discussed; compact stubs are valid for concrete asks." },
148
150
  { section: "Reference Pattern Candidates", required: false, validationRule: "Recommended when examples influence direction: list pattern/source, reusable invariant, accept/reject/defer disposition, and reason before approaches are finalized." },
151
+ { section: "Idea Evidence Carry-forward", required: false, validationRule: "Wave 23 (v5.0.0): when `flow-state.interactionHints.brainstorm.fromIdeaArtifact` is set, this section MUST cite the idea artifact path and the chosen `I-#`, list reused fields (Title, Why-now, Expected impact, Risk, Counter-argument), and explicitly state that only challenger row(s) were newly generated. Honors `/cc-ideate` handoff so divergent + critique + rank work is reused, not redone." },
149
152
  { section: "Approaches", required: true, validationRule: "Must compare 2-3 distinct options with real trade-offs. Use the canonical `Role` column with `baseline` | `challenger` | `wild-card` and the `Upside` column with `low` | `modest` | `high` | `higher`; include exactly one challenger row with `high` or `higher` upside, and cite reference-pattern source/disposition when applicable." },
150
153
  { section: "Approach Reaction", required: true, validationRule: "Must appear before Selected Direction and summarize user reaction before recommendation, including `Closest option`, `Concerns`, and what changed after reaction." },
151
154
  { section: "Selected Direction", required: true, validationRule: "Must include the selected approach, explicit approval marker, rationale traceable to Approach Reaction, and a scope handoff packet with selected direction, decisions, drift, confidence, unresolved questions, risk hints, and non-goals." },
@@ -34,19 +34,22 @@ export const DESIGN = {
34
34
  "Skipping outside-voice review loop and treating first draft as final",
35
35
  "Batching multiple design issues into one question",
36
36
  "Agreeing with user's architecture choice without evaluating alternatives",
37
- "No NOT-in-scope output section",
37
+ "Re-authoring scope's out-of-scope list instead of citing it via Upstream Handoff",
38
+ "Re-authoring scope's repo audit instead of diffing the blast radius since scope baseline",
38
39
  "Design decisions made without reading the actual code first"
39
40
  ]
40
41
  },
41
42
  executionModel: {
42
43
  checklist: [
43
- "**ADAPTIVE ELICITATION COMES FIRST (no exceptions, no subagent dispatch before).** Load `.cclaw/skills/adaptive-elicitation/SKILL.md`. Walk the design forcing questions one-at-a-time via the harness-native question tool, append one row to `## Q&A Log` (`Turn | Question | User answer (1-line) | Decision impact`) after each user answer. Continue until all forcing questions are answered/skipped/waived OR user records an explicit stop-signal row. Only then proceed to research, investigator pass, architecture lock, or any delegations. The linter `qa_log_below_min` rule will block `stage-complete` if Q&A Log is below floor.",
44
+ "**ADAPTIVE ELICITATION COMES FIRST (no exceptions, no subagent dispatch before).** Load `.cclaw/skills/adaptive-elicitation/SKILL.md`. Walk the design forcing questions one-at-a-time via the harness-native question tool, append one row to `## Q&A Log` (`Turn | Question | User answer (1-line) | Decision impact`) after each user answer. Continue until forcing-questions converge (all answered/skipped/waived) OR Ralph-Loop convergence detector says no new decision-changing rows in last 2 iterations OR user records an explicit stop-signal row. Only then proceed to research, investigator pass, architecture lock, or any delegations. The linter `qa_log_unconverged` rule will block `stage-complete` if convergence is not reached.",
44
45
  "**Design forcing questions (must be covered or explicitly waived)** — what is the end-to-end data flow, where are seams/ownership boundaries, which invariants must hold, and what will explicitly NOT be refactored now.",
46
+ "**Out-of-scope carry-forward (do NOT re-author)** — scope OWNS the out-of-scope list. Cite scope's `## In Scope / Out of Scope > Out of Scope` via `## Upstream Handoff > Decisions carried forward`; do NOT add a separate `## NOT in scope` section in the design artifact. Add a row to `## Spec Handoff` only if a design-stage decision NEWLY excludes something not already in scope's out-of-scope.",
45
47
  "Compact design lock — design does not decide what to build; it decides how the approved scope works. For simple slices, produce a tight lock: upstream handoff, existing fit, architecture boundary, one labeled diagram, data/state flow, critical path, failure/rescue, trust boundaries, test/perf expectations, rollout/rollback, rejected alternative, and spec handoff.",
46
48
  "Trivial-Change Escape Hatch — for <=3 files, no new interfaces, and no cross-module data flow, produce a mini-design (rationale, changed files, one risk) and proceed to spec.",
49
+ "**Architecture choice (design OWNS the tier decision)** — pick the architecture tier (minimum-viable / product-grade / ideal) using scope's `## Scope Contract > Design handoff` as the input. Record the tier and rationale in `## Architecture Decision Record (ADR)` and `## Engineering Lock`. Scope only locked the SCOPE MODE; it did NOT enumerate Implementation Alternatives.",
47
50
  "Tiered Research — for simple/medium work, do compact inline codebase/research synthesis in `Research Fleet Synthesis`; write `.cclaw/artifacts/02a-research.md` and run the full fleet only for deep/high-risk work or when external framework/architecture uncertainty exists.",
48
51
  "Design Doc Check — read upstream artifacts and current design docs; latest superseding doc wins.",
49
- "Investigator passbefore design decisions, read blast-radius code and record touched files, responsibilities, reuse candidates, and existing patterns.",
52
+ "**Blast-radius diff (do NOT re-audit the whole repo)** scope OWNS the full repo audit (`## Pre-Scope System Audit`). Design only diffs the blast radius SINCE scope baseline: `git diff <scope-artifact-head-sha>..HEAD -- <touched-paths>`. Record touched files, current responsibilities, reuse candidates, and existing patterns in `## Codebase Investigation` and `## Blast-radius Diff`. Do NOT re-author scope's git log/diff/stash audit.",
50
53
  "Scope Challenge + Search Before Building — find existing solutions, minimum change set, reference-grade contracts to mirror, and complexity smells before custom architecture.",
51
54
  "Architecture Review — lock boundaries, chosen path, shadow alternative, switch trigger, failure/rescue/degraded behavior, and verification evidence for every high-risk choice; include tier-required diagrams.",
52
55
  "Review core risk areas — existing system fit, data/state flow, critical path, security/trust boundaries, tests, performance budget, observability/debuggability, rollout/rollback, rejected alternatives, and spec handoff.",
@@ -77,7 +80,7 @@ export const DESIGN = {
77
80
  "Walk review sections interactively and lock boundaries, data flow, state transitions, edge cases, and failure modes.",
78
81
  "Cover security, observability, deployment, tests, and performance for Standard+ changes.",
79
82
  "Run stale-diagram audit (enabled by default unless explicitly disabled).",
80
- "Produce required outputs: NOT-in-scope, What-already-exists, tier diagrams, failure table, completion dashboard.",
83
+ "Produce required outputs: blast-radius diff (scope owns full repo audit), tier diagrams, failure table, completion dashboard. Out-of-scope is carried from scope via Upstream Handoff — do NOT re-author it.",
81
84
  "Plant high-upside deferred ideas when useful and reconcile critic/outside-voice findings.",
82
85
  "Write design lock artifact for downstream spec/plan with design decisions, rejected alternatives, verification evidence, and exact spec handoff."
83
86
  ],
@@ -107,8 +110,8 @@ export const DESIGN = {
107
110
  "Test-Diagram Mapping links critical flows to both validating tests and diagram anchors.",
108
111
  "Test strategy includes unit/integration/e2e expectations.",
109
112
  "When a high-upside idea is deferred, a seed file is created under `.cclaw/seeds/` and referenced in the artifact.",
110
- "NOT-in-scope section produced.",
111
- "What-already-exists section produced.",
113
+ "Out-of-scope is carried forward from scope's `## In Scope / Out of Scope > Out of Scope` via `## Upstream Handoff > Decisions carried forward`; design does NOT author its own NOT-in-scope section.",
114
+ "Blast-radius Diff section produced (git diff since scope artifact baseline) — scope owns the full repo audit; design only diffs touched paths.",
112
115
  "Completion dashboard lists review section status, critical/open gap counts, decision count, and unresolved items (or 'None')."
113
116
  ],
114
117
  inputs: ["scope agreement artifact", "system constraints", "non-functional requirements"],
@@ -174,7 +177,7 @@ export const DESIGN = {
174
177
  { section: "Performance Budget", required: false, validationRule: "For each critical path: metric name, target threshold, and measurement method." },
175
178
  { section: "Observability & Debuggability", required: true, validationRule: "Must define logs/metrics/traces plus alerting/debug path for critical failure modes." },
176
179
  { section: "Deployment & Rollout", required: true, validationRule: "Must define migration/flag strategy, rollout/rollback plan, switch trigger, and post-deploy verification steps." },
177
- { section: "What Already Exists", required: false, validationRule: "For each sub-problem: existing code/library found (Layer 1-3/EUREKA label), reuse decision, and adaptation needed." },
180
+ { section: "Blast-radius Diff", required: false, validationRule: "Diff since scope artifact baseline (`git diff <scope-sha>..HEAD -- <touched-paths>`): for each touched file, summarize change since scope, current responsibility, reuse candidate, and existing pattern. Scope OWNS the full repo audit; design only diffs the blast radius." },
178
181
  { section: "Reference-Grade Contracts", required: false, validationRule: "For every mirrored pattern: source, reusable invariant, local adaptation, rejection boundary, and verification signal. Omit with `None - no external or in-repo pattern mirrored` for compact local changes." },
179
182
  { section: "Rejected Alternatives", required: false, validationRule: "List alternatives considered, why rejected, and what signal would revive them." },
180
183
  { section: "Design Decisions", required: false, validationRule: "Stable design decisions with requirement/locked-decision refs and downstream spec impact." },
@@ -184,10 +187,9 @@ export const DESIGN = {
184
187
  { section: "Design Outside Voice Loop", required: false, validationRule: `Record iteration table with quality score per iteration, stop reason, and unresolved concerns. Enforce ${reviewLoopPolicySummary("design")}` },
185
188
  { section: "Victory Detector", required: false, validationRule: "Recommended early-loop checkpoint: cite `.cclaw/state/early-loop.json`, current iteration/maxIterations, open concern count, convergence status, and iterate/ready/escalate decision." },
186
189
  { section: "Critic Pass", required: false, validationRule: "Recommended producer/critic log contract: each iteration appends one JSONL row to `.cclaw/state/early-loop-log.jsonl` with runId, stage, iteration, and open concerns." },
187
- { section: "NOT in scope", required: false, validationRule: "Work considered and explicitly deferred with one-line rationale." },
188
190
  { section: "Completion Dashboard", required: true, validationRule: "Lists every review section with status (clear / issues-found-resolved / issues-open), critical/open gap counts, decision count, and unresolved items (or 'None')." }
189
191
  ],
190
- trivialOverrideSections: ["Architecture Boundaries", "NOT in scope", "Completion Dashboard"]
192
+ trivialOverrideSections: ["Architecture Boundaries", "Completion Dashboard"]
191
193
  },
192
194
  reviewLens: {
193
195
  outputs: [
@@ -195,8 +197,7 @@ export const DESIGN = {
195
197
  "architecture lock",
196
198
  "risk and failure map",
197
199
  "test and performance baseline",
198
- "NOT-in-scope section",
199
- "What-already-exists section",
200
+ "blast-radius diff since scope baseline",
200
201
  "design decisions and spec handoff",
201
202
  "design completion dashboard"
202
203
  ],