cclaw-cli 0.51.21 → 0.51.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/config.d.ts +8 -1
  2. package/dist/config.js +9 -6
  3. package/dist/content/hook-manifest.d.ts +2 -4
  4. package/dist/content/hook-manifest.js +4 -3
  5. package/dist/content/meta-skill.js +7 -9
  6. package/dist/content/next-command.js +2 -2
  7. package/dist/content/node-hooks.js +13 -3
  8. package/dist/content/review-loop.js +15 -5
  9. package/dist/content/review-prompts.js +1 -1
  10. package/dist/content/skills.js +3 -2
  11. package/dist/content/stage-schema.js +1 -0
  12. package/dist/content/stages/brainstorm.js +3 -3
  13. package/dist/content/stages/design.js +18 -17
  14. package/dist/content/stages/plan.js +2 -1
  15. package/dist/content/stages/review.js +10 -10
  16. package/dist/content/stages/scope.js +13 -13
  17. package/dist/content/stages/spec.js +7 -5
  18. package/dist/content/stages/tdd.js +2 -2
  19. package/dist/content/start-command.d.ts +4 -3
  20. package/dist/content/start-command.js +21 -17
  21. package/dist/content/templates.d.ts +1 -1
  22. package/dist/content/templates.js +48 -28
  23. package/dist/content/view-command.js +3 -1
  24. package/dist/delegation.js +28 -8
  25. package/dist/doctor.js +147 -21
  26. package/dist/gate-evidence.js +19 -7
  27. package/dist/harness-adapters.js +1 -5
  28. package/dist/install.js +87 -24
  29. package/dist/internal/advance-stage.js +90 -11
  30. package/dist/knowledge-store.d.ts +4 -1
  31. package/dist/knowledge-store.js +24 -14
  32. package/dist/retro-gate.d.ts +1 -0
  33. package/dist/retro-gate.js +9 -9
  34. package/dist/run-archive.js +19 -1
  35. package/dist/run-persistence.js +6 -2
  36. package/dist/tdd-cycle.js +6 -3
  37. package/package.json +1 -1
package/dist/config.d.ts CHANGED
@@ -1,4 +1,11 @@
1
1
  import type { CclawConfig, FlowTrack, HarnessId, LanguageRulePack } from "./types.js";
2
+ export interface ConfigWarningState {
3
+ emitted: Set<string>;
4
+ }
5
+ export interface ReadConfigOptions {
6
+ warningState?: ConfigWarningState;
7
+ }
8
+ export declare function createConfigWarningState(): ConfigWarningState;
2
9
  export declare class InvalidConfigError extends Error {
3
10
  constructor(message: string);
4
11
  }
@@ -34,7 +41,7 @@ export declare function createDefaultConfig(harnesses?: HarnessId[], defaultTrac
34
41
  * never surprise a user who intentionally cleared the list.
35
42
  */
36
43
  export declare function detectLanguageRulePacks(projectRoot: string): Promise<LanguageRulePack[]>;
37
- export declare function readConfig(projectRoot: string): Promise<CclawConfig>;
44
+ export declare function readConfig(projectRoot: string, options?: ReadConfigOptions): Promise<CclawConfig>;
38
45
  /**
39
46
  * Fields that live on the populated runtime `CclawConfig` but are considered
40
47
  * "advanced" — we keep them in the in-memory object so downstream callers
package/dist/config.js CHANGED
@@ -56,13 +56,15 @@ const MINIMAL_CONFIG_KEYS = [
56
56
  ];
57
57
  const DEFAULT_SLICE_REVIEW_THRESHOLD = 5;
58
58
  const DEFAULT_SLICE_REVIEW_TRACKS = ["standard"];
59
- const emittedConfigWarnings = new Set();
60
- function emitConfigWarningOnce(code, message) {
59
+ export function createConfigWarningState() {
60
+ return { emitted: new Set() };
61
+ }
62
+ function emitConfigWarningOnce(warningState, code, message) {
61
63
  const key = `${code}:${message}`;
62
- if (emittedConfigWarnings.has(key)) {
64
+ if (warningState.emitted.has(key)) {
63
65
  return;
64
66
  }
65
- emittedConfigWarnings.add(key);
67
+ warningState.emitted.add(key);
66
68
  process.emitWarning(message, { code });
67
69
  }
68
70
  function sameStringArray(a, b) {
@@ -196,7 +198,8 @@ export async function detectLanguageRulePacks(projectRoot) {
196
198
  }
197
199
  return [...new Set(detected)];
198
200
  }
199
- export async function readConfig(projectRoot) {
201
+ export async function readConfig(projectRoot, options = {}) {
202
+ const warningState = options.warningState ?? createConfigWarningState();
200
203
  const fullPath = configPath(projectRoot);
201
204
  if (!(await exists(fullPath))) {
202
205
  return createDefaultConfig();
@@ -269,7 +272,7 @@ export async function readConfig(projectRoot) {
269
272
  if (tddTestGlobsRaw !== undefined &&
270
273
  explicitTddTestPathPatterns !== undefined &&
271
274
  !sameStringArray(tddTestGlobs, explicitTddTestPathPatterns)) {
272
- emitConfigWarningOnce("CCLAW_CONFIG_DEPRECATED_TDD_TEST_GLOBS", `[cclaw] Both "tddTestGlobs" (deprecated) and "tdd.testPathPatterns" are set in ${fullPath}. ` +
275
+ emitConfigWarningOnce(warningState, "CCLAW_CONFIG_DEPRECATED_TDD_TEST_GLOBS", `[cclaw] Both "tddTestGlobs" (deprecated) and "tdd.testPathPatterns" are set in ${fullPath}. ` +
273
276
  `Using "tdd.testPathPatterns".`);
274
277
  }
275
278
  const resolvedTddTestPathPatterns = [
@@ -45,14 +45,12 @@ export interface HookHandlerSpec {
45
45
  description: string;
46
46
  /**
47
47
  * Semantic event id used by `HOOK_EVENTS_BY_HARNESS` / docs.
48
- * `null` means this handler contributes no semantic coverage row
49
- * (e.g. `verify-current-state` on codex is a supplementary guard,
50
- * not a top-level semantic event).
48
+ * `null` means this handler contributes no semantic coverage row.
51
49
  */
52
50
  semantic: HookSemanticEvent | null;
53
51
  bindings: Partial<Record<HookManifestHarness, HookBinding[]>>;
54
52
  }
55
- export declare const HOOK_SEMANTIC_EVENTS: readonly ["session_rehydrate", "pre_tool_prompt_guard", "pre_tool_workflow_guard", "post_tool_context_monitor", "stop_handoff", "precompact_compat"];
53
+ export declare const HOOK_SEMANTIC_EVENTS: readonly ["session_rehydrate", "pre_tool_prompt_guard", "pre_tool_workflow_guard", "post_tool_context_monitor", "stop_handoff", "precompact_compat", "strict_state_verify"];
56
54
  export type HookSemanticEvent = (typeof HOOK_SEMANTIC_EVENTS)[number];
57
55
  export declare const HOOK_MANIFEST: readonly HookHandlerSpec[];
58
56
  export interface EventGroup {
@@ -35,7 +35,8 @@ export const HOOK_SEMANTIC_EVENTS = [
35
35
  "pre_tool_workflow_guard",
36
36
  "post_tool_context_monitor",
37
37
  "stop_handoff",
38
- "precompact_compat"
38
+ "precompact_compat",
39
+ "strict_state_verify"
39
40
  ];
40
41
  export const HOOK_MANIFEST = [
41
42
  {
@@ -112,8 +113,8 @@ export const HOOK_MANIFEST = [
112
113
  },
113
114
  {
114
115
  handler: "verify-current-state",
115
- description: "Supplementary codex guard that runs on UserPromptSubmit to assert the live state matches the flow.",
116
- semantic: null,
116
+ description: "Supplementary Codex strict-mode guard that runs on UserPromptSubmit to assert the live state matches the flow.",
117
+ semantic: "strict_state_verify",
117
118
  bindings: {
118
119
  codex: [{ event: "UserPromptSubmit" }]
119
120
  }
@@ -14,7 +14,7 @@ description: "Routing brain for cclaw. Decide whether to start/resume a stage, a
14
14
  1. User message in current turn.
15
15
  2. Active stage skill and command contract.
16
16
  3. This routing file.
17
- 4. Contextual utility skills.
17
+ 4. Generated cclaw helper skills, research playbooks, and enabled rule packs.
18
18
  5. Default model behavior.
19
19
 
20
20
  If the user explicitly overrides a stage rule, record it in the artifact.
@@ -25,9 +25,7 @@ ${conversationLanguagePolicyMarkdown()}
25
25
  If \`.cclaw/state/flow-state.json\` exists and \`currentStage\` is set,
26
26
  load the matching stage SKILL before producing **substantive** work
27
27
  (artifact edits, code, structured clarifying questions). Do not improvise
28
- from memory. Also load a contextual utility skill when the task clearly
29
- triggers it (security, performance, debugging, docs, finishing-a-branch,
30
- verification-before-completion).
28
+ from memory. Load only generated helper surfaces that actually exist in this install: \`subagent-dev\`, \`parallel-dispatch\`, \`session\`, \`iron-laws\`, research playbooks, review prompts, or enabled language rule packs under \`.cclaw/rules/lang/\`. Do not invent helper-skill names beyond those generated surfaces.
31
29
 
32
30
  Substantive vs. non-substantive:
33
31
 
@@ -70,7 +68,7 @@ Task arrives
70
68
  | Class | Route |
71
69
  |---|---|
72
70
  | non-trivial software work | \`/cc <idea>\` |
73
- | trivial software fix | \`/cc <idea>\` (quick/medium track as recommended) |
71
+ | trivial software fix | \`/cc <idea>\` (quick track) |
74
72
  | bugfix with clear repro | \`/cc <idea>\` and enforce RED-first in tdd |
75
73
  | pure question / conversation | answer directly |
76
74
  | non-software work | answer directly |
@@ -115,10 +113,10 @@ Use the current stage skill plus \`.cclaw/state/flow-state.json\` for orientatio
115
113
 
116
114
  Use built-in judgment only when triggered by the current task:
117
115
 
118
- - security, performance, debugging, docs, and CI/CD review lenses
119
- - verification discipline before completion claims
120
- - branch-finishing discipline during ship/finalization
121
- - iron-laws as policy arbitration when instructions conflict
116
+ - generated subagent context skills for mandatory review/delegation contracts
117
+ - research playbooks and review prompts when a stage explicitly calls for them
118
+ - inline verification and ship/finalization sections in the active stage skill
119
+ - \`iron-laws\` as policy arbitration when instructions conflict
122
120
  - language rule packs from \`.cclaw/config.yaml\` when enabled
123
121
 
124
122
  ## Protocol Behavior
@@ -112,7 +112,7 @@ ${ralphLoopContractSnippet()}
112
112
  - If \`track === "quick"\`, the critical path is **spec → tdd → review → ship**. When advancing, skip any stage listed in \`skippedStages\` — i.e. after the current stage completes, pick the next stage that is NOT in \`skippedStages\`.
113
113
  - If \`track === "medium"\`, the critical path is **brainstorm → spec → plan → tdd → review → ship**. Scope and design are intentionally skipped unless the run is reclassified to standard.
114
114
  - If \`track === "standard"\`, advance through all 8 stages in their natural order.
115
- - Never reintroduce a skipped stage mid-run. If the user wants upstream scoping work, they must archive the run and start a new one with \`track: "standard"\`.
115
+ - Never manually reintroduce a skipped stage mid-run. If evidence shows the track was wrong, stop and use the managed start-flow helper with \`--reclassify\`; only that managed reclassification may add upstream stages back into the active track.
116
116
 
117
117
  ## Resume Semantics
118
118
 
@@ -130,7 +130,7 @@ When orchestrated by another skill/subagent, emit exactly one JSON envelope and
130
130
  no narrative text:
131
131
 
132
132
  \`\`\`json
133
- {"version":"1","kind":"gate-result","stage":"review","payload":{"command":"/cc-next","decision":"resume_or_advance","nextStage":"ship"},"emittedAt":"<ISO-8601>"}
133
+ {"version":"1","kind":"gate-result","stage":"<currentStage>","payload":{"command":"/cc-next","decision":"resume_or_advance","nextStage":"<nextStage>"},"emittedAt":"<ISO-8601>"}
134
134
  \`\`\`
135
135
 
136
136
  Validate envelopes with:
@@ -1117,12 +1117,21 @@ async function handleStopHandoff(runtime) {
1117
1117
  return 1;
1118
1118
  }
1119
1119
 
1120
+ const closeoutObj = toObject(state.raw.closeout) || {};
1121
+ const shipSubstate = typeof closeoutObj.shipSubstate === "string" ? closeoutObj.shipSubstate : "idle";
1122
+ const closeoutContext =
1123
+ state.currentStage === "ship" || shipSubstate !== "idle"
1124
+ ? " closeout.shipSubstate=" + shipSubstate + "; closeout chain=retro -> compound -> archive; continue closeout with /cc-next."
1125
+ : "";
1126
+
1120
1127
  const message =
1121
1128
  "Cclaw: session ending (stage=" +
1122
1129
  state.currentStage +
1123
1130
  ", run=" +
1124
1131
  state.activeRunId +
1125
- "). Active artifacts stay in " +
1132
+ ")." +
1133
+ closeoutContext +
1134
+ " Active artifacts stay in " +
1126
1135
  RUNTIME_ROOT +
1127
1136
  "/artifacts until archive. Before stopping: (1) confirm flow-state reflects reality, (2) ensure artifact changes match current intent, (3) if you discovered a non-obvious rule/pattern during stage work, add it to the current artifact ## Learnings section so stage-complete can harvest it, (4) commit or revert pending changes.";
1128
1137
 
@@ -1650,9 +1659,10 @@ async function handleVerifyCurrentState(runtime) {
1650
1659
  function normalizeHookName(rawName) {
1651
1660
  const value = normalizeText(rawName).toLowerCase();
1652
1661
  if (value === "session-start") return "session-start";
1653
- if (value === "stop-handoff") return "stop-handoff";
1662
+ if (value === "stop-handoff" || value === "stop") return "stop-handoff";
1654
1663
  if (value === "stop-checkpoint") return "stop-handoff";
1655
- if (value === "pre-compact") return "pre-compact";
1664
+ if (value === "pre-compact" || value === "precompact") return "pre-compact";
1665
+ if (value === "session-rehydrate") return "session-start";
1656
1666
  if (value === "prompt-guard") return "prompt-guard";
1657
1667
  if (value === "workflow-guard") return "workflow-guard";
1658
1668
  if (value === "context-monitor") return "context-monitor";
@@ -456,6 +456,9 @@ export function buildReviewLoopEnvelope(args) {
456
456
  function formatScore(value) {
457
457
  return clampScore(value).toFixed(3);
458
458
  }
459
+ function reviewLoopHeading(stage) {
460
+ return stage === "scope" ? "Scope Outside Voice Loop" : "Design Outside Voice Loop";
461
+ }
459
462
  function finalEnvelopeScore(envelope) {
460
463
  if (envelope.iterations.length === 0)
461
464
  return 0;
@@ -486,7 +489,8 @@ export function renderReviewLoopSummarySection(envelope) {
486
489
  })
487
490
  .join("\n")
488
491
  : "| 0 | 0.000 | 0 |";
489
- return `## Spec Review Loop
492
+ const heading = reviewLoopHeading(envelope.stage);
493
+ return `## ${heading}
490
494
  | Iteration | Quality Score | Findings |
491
495
  |---|---|---|
492
496
  ${rows}
@@ -498,9 +502,14 @@ ${rows}
498
502
  export function upsertReviewLoopSummary(markdown, envelope) {
499
503
  const withHeader = upsertReviewLoopHeader(markdown, envelope);
500
504
  const section = renderReviewLoopSummarySection(envelope);
501
- const headingRe = /^##\s+Spec Review Loop\s*$/m;
502
- const match = headingRe.exec(withHeader);
503
- if (!match || match.index < 0) {
505
+ const headingCandidates = [reviewLoopHeading(envelope.stage), "Spec Review Loop"];
506
+ const match = headingCandidates
507
+ .map((heading) => {
508
+ const escapedHeading = heading.replace(/[.*+?^${}()|[\]\\]/gu, "\\$&");
509
+ return new RegExp(`^##\\s+${escapedHeading}\\s*$`, "m").exec(withHeader);
510
+ })
511
+ .find((candidate) => candidate !== null && candidate.index >= 0);
512
+ if (!match) {
504
513
  const needsBreak = withHeader.endsWith("\n") ? "" : "\n";
505
514
  return `${withHeader}${needsBreak}\n${section}\n`;
506
515
  }
@@ -613,7 +622,8 @@ function parseHeaderMeta(markdown) {
613
622
  };
614
623
  }
615
624
  export function extractReviewLoopEnvelopeFromArtifact(markdown, stage, artifactPath) {
616
- const sectionBody = extractH2Section(markdown, "Spec Review Loop");
625
+ const sectionBody = extractH2Section(markdown, reviewLoopHeading(stage))
626
+ ?? extractH2Section(markdown, "Spec Review Loop");
617
627
  if (!sectionBody)
618
628
  return null;
619
629
  const iterations = parseIterationsTable(sectionBody);
@@ -57,7 +57,7 @@ value. Do not nitpick wording.
57
57
 
58
58
  ## Output
59
59
 
60
- Record in \`## Outside Voice Findings\` or \`## Spec Review Loop\`:
60
+ Record in \`## Outside Voice Findings\` or the stage-specific outside voice loop section:
61
61
 
62
62
  \`\`\`markdown
63
63
  | ID | Dimension | Finding | Disposition | Rationale |
@@ -168,7 +168,7 @@ Apply concise turn announces: one announce per batch boundary (or when risk/plan
168
168
  changes materially), then execute tasks without repetitive boilerplate.
169
169
 
170
170
  Detailed walkthrough:
171
- Use the current plan artifact for batch order and keep RED -> GREEN -> REFACTOR evidence in the TDD artifact.
171
+ Use the active track's upstream artifact for ordering: plan slices on standard/medium, or spec acceptance items / bug reproduction slices on quick. Keep RED -> GREEN -> REFACTOR evidence in the TDD artifact.
172
172
  `;
173
173
  }
174
174
  function crossStageTraceBlock(trace) {
@@ -386,6 +386,7 @@ ${philosophy.purpose}
386
386
 
387
387
  ## Complexity Tier
388
388
  - Active tier: \`${schema.complexityTier}\`
389
+ - Scale-to-complexity rule: execute required gates and artifact sections, but keep optional/deep sections compact unless risk, novelty, or configuration triggers them. Do not mechanically expand lightweight work into a strategy workshop.
389
390
  - Mandatory delegations at this tier: ${mandatoryDelegationSummary}
390
391
  - Track render context: \`${trackContext.track}\` (${trackContext.usesPlanTerminology ? "plan-first wording" : "acceptance-first wording"})
391
392
 
@@ -452,7 +453,7 @@ ${reviewLens.outputs.map((item) => `- ${item}`).join("\n")}
452
453
  ${reviewSectionsBlock(reviewLens.reviewSections)}
453
454
 
454
455
  ## Shared Stage Guidance
455
- - Follow the handoff menu: advance, revise, pause, rewind, or archive only when the user explicitly chooses it.
456
+ - At STOP/closeout points, offer the shared handoff choices only when a user decision is needed.
456
457
  - Carry upstream decisions forward explicitly; record drift instead of silently changing direction.
457
458
  - Before closeout, fill \`## Learnings\` with \`- None this stage.\` or 1-3 strict JSON bullets.
458
459
  - Keep decisions explicit: context, options, chosen option, rationale, risk, and rollback.
@@ -227,6 +227,7 @@ const REQUIRED_GATE_IDS = {
227
227
  review: (track) => [
228
228
  "review_layer1_spec_compliance",
229
229
  "review_layer2_security",
230
+ "review_layer_coverage_complete",
230
231
  "review_criticals_resolved",
231
232
  "review_army_json_valid",
232
233
  ...(track === "quick" ? [] : ["review_trace_matrix_clean"])
@@ -40,10 +40,10 @@ export const BRAINSTORM = {
40
40
  "**Classify depth and scope** — pick Lightweight / Standard / Deep; decompose independent subsystems before deeper work.",
41
41
  "**Premise check (one pass)** — answer the three gstack-style questions in the artifact body: *Right problem? Direct path? What if we do nothing?* Take a position; do not hedge.",
42
42
  "**Reframe with How Might We** — write a single `How Might We …?` line that names the user, the desired outcome, and the constraint. This is the altitude check before approaches.",
43
- "**Sharpening questions (3-5)** — capture decision-changing question/answer pairs in the `Sharpening Questions` table with the actual decision impact; if a question would not change architecture/scope/UX, state the assumption and skip it.",
43
+ "**Sharpening questions (3-5)** — capture decision-changing question/answer pairs in the `Sharpening Questions` table with the actual decision impact; only non-critical preference/default assumptions may continue. STOP and ask on scope, architecture, security, data loss, public API, migration, auth/pricing, or user-approval uncertainty.",
44
44
  "**Use compact discovery for simple apps** — for concrete low-risk asks (todo app, landing page, local widget), do one context pass, compare one baseline and one challenger, then ask for one explicit approval; do not drag the user through a full workshop.",
45
45
  "**Short-circuit concrete asks** — for unambiguous implementation-only requests, write a compact brainstorm stub (context, problem, approved intent, constraints, assumptions) and ask for one explicit approval.",
46
- "**Ask only decision-changing questions** — one at a time; if answers would not change approach, state the assumption and continue.",
46
+ "**Ask only decision-changing questions** — one at a time; if answers would not change approach and are non-critical preference/default assumptions, state the assumption and continue; STOP on scope, architecture, security, data loss, public API, migration, auth/pricing, or user approval uncertainty.",
47
47
  "**Compare 2-3 distinct approaches with stable Role/Upside columns** — Role values are `baseline` | `challenger` | `wild-card`; Upside is `low` | `modest` | `high` | `higher`; include real trade-offs and reuse notes; include exactly one challenger with explicit `high` or `higher` upside.",
48
48
  "**Collect reaction before recommending** — ask which option feels closest and what concern remains, then recommend based on that reaction.",
49
49
  "**Write the `Not Doing` list** — name 3-5 things this brainstorm explicitly is not committing to (vs. deferred). This protects scope from silent enlargement and the next stage from rework.",
@@ -55,7 +55,7 @@ export const BRAINSTORM = {
55
55
  "Start from observed project context; if the idea is vague, first narrow the project type with **one** structured question, then keep going.",
56
56
  "Lead with the premise check (right problem / direct path / what if nothing) and the `How Might We` reframing before approaches; both go in the artifact, not just the chat.",
57
57
  "Ask at most one question per turn, only when decision-changing; if using a structured question tool, send exactly one question object, not a multi-question form.",
58
- "If likely answers do not change architecture or scope boundaries, choose the default and state the assumption inline.",
58
+ "Only non-critical preference/default assumptions may continue inline. STOP and ask when uncertainty affects scope, architecture, security, data loss, public API, migration, auth/pricing, or user approval.",
59
59
  "For simple greenfield web apps, present a compact A/B choice with one recommended path and one higher-upside challenger; keep the artifact concise but structurally complete (Context, Premise, How Might We, Sharpening Questions, Approaches, Reaction, Selected Direction, Not Doing).",
60
60
  "Show approaches before the recommendation; include a higher-upside challenger and gather reaction first.",
61
61
  "Self-review before approval: re-read the artifact, fix contradictions/placeholders/weak trade-offs, then ask for approval. Do not ask for approval on a draft you have not re-read.",
@@ -6,7 +6,7 @@ import { decisionProtocolInstruction } from "../decision-protocol.js";
6
6
  export const DESIGN = {
7
7
  schemaShape: "v2",
8
8
  stage: "design",
9
- complexityTier: "deep",
9
+ complexityTier: "standard",
10
10
  skillFolder: "engineering-design-lock",
11
11
  skillName: "engineering-design-lock",
12
12
  skillDescription: "Engineering lock-in stage. Build a concrete technical spine before spec and planning, with section-by-section interactive review.",
@@ -42,7 +42,7 @@ export const DESIGN = {
42
42
  checklist: [
43
43
  "Compact design lock — for simple greenfield/product slices, produce a tight but complete design spine: codebase investigation, architecture boundary, one labeled diagram, data flow, failure/rescue table, test/perf expectations, and handoff. Do not run a sprawling workshop when a strong engineering lock fits on one page.",
44
44
  "Trivial-Change Escape Hatch — for <=3 files, no new interfaces, and no cross-module data flow, produce a mini-design (rationale, changed files, one risk) and proceed to spec.",
45
- "Tiered Research Fleet run `research/research-fleet.md` before lock; record `.cclaw/artifacts/02a-research.md` and summarize concrete decisions in `## Research Fleet Synthesis`.",
45
+ "Tiered Research — for simple/medium work, do compact inline codebase/research synthesis in `Research Fleet Synthesis`; write `.cclaw/artifacts/02a-research.md` and run the full fleet only for deep/high-risk work or when external framework/architecture uncertainty exists.",
46
46
  "Design Doc Check — read upstream artifacts and current design docs; latest superseding doc wins.",
47
47
  "Investigator pass — before design decisions, read blast-radius code and record touched files, responsibilities, reuse candidates, and existing patterns.",
48
48
  "Scope Challenge + Search Before Building — find existing solutions, minimum change set, and complexity smells before custom architecture.",
@@ -59,14 +59,14 @@ export const DESIGN = {
59
59
  "If a section has no issues, say 'No issues found' and move on.",
60
60
  "Do not skip failure-mode mapping; use Method/Exception/Rescue/UserSees and treat silent user impact without rescue as critical.",
61
61
  "Take a firm position, push back on weak framing, and call out suboptimal architecture with concrete alternatives.",
62
- "Classify ambiguity before acting: ask, enumerate-and-pick, or propose a hypothesis with validation path.",
62
+ "Classify ambiguity before acting. Only non-critical preference/default assumptions may continue; STOP on uncertainty about scope, architecture, security, data loss, public API, migration, auth/pricing, or required user approval. Design hypotheses must name validation path, rollback trigger, and owner before they can be carried forward.",
63
63
  "Before final approval, run the critic pass, reconcile material findings, and bound retries with the review-loop policy.",
64
64
  "For baseline approval, present the full design plus exact spec handoff and **STOP** until explicit approval.",
65
65
  "**STOP BEFORE ADVANCE.** Mandatory delegation `planner` must be completed or explicitly waived, then close via `node .cclaw/hooks/stage-complete.mjs design`."
66
66
  ],
67
67
  process: [
68
68
  "Read upstream artifacts and current design docs.",
69
- "Run tiered research fleet and write `.cclaw/artifacts/02a-research.md` before architecture lock.",
69
+ "Run compact research by default; write `.cclaw/artifacts/02a-research.md` only when deep/high-risk uncertainty requires a separate research artifact.",
70
70
  "Run investigator pass plus scope challenge/search-before-building.",
71
71
  "Walk review sections interactively and lock boundaries, data flow, state transitions, edge cases, and failure modes.",
72
72
  "Cover security, observability, deployment, tests, and performance for Standard+ changes.",
@@ -76,17 +76,17 @@ export const DESIGN = {
76
76
  "Write design lock artifact for downstream spec/plan."
77
77
  ],
78
78
  requiredGates: [
79
- { id: "design_research_complete", description: "Parallel research artifact is complete and synthesized into design decisions." },
79
+ { id: "design_research_complete", description: "Research is complete: compact inline synthesis by default, or a separate research artifact for deep/high-risk work, and findings are mapped to design decisions." },
80
80
  { id: "design_architecture_locked", description: "Architecture boundaries are explicit and approved." },
81
81
  { id: "design_data_flow_mapped", description: "Data/state flow includes edge-case paths." },
82
82
  { id: "design_failure_modes_mapped", description: "Failure modes and mitigations are documented." },
83
83
  { id: "design_test_and_perf_defined", description: "Test strategy and performance budget are defined." }
84
84
  ],
85
85
  requiredEvidence: [
86
- "Research artifact written to `.cclaw/artifacts/02a-research.md` with stack/features/architecture/pitfalls sections plus synthesis.",
86
+ "Research Fleet Synthesis is filled in `03-design.md`; for deep/high-risk work, `.cclaw/artifacts/02a-research.md` is also written with stack/features/architecture/pitfalls sections plus synthesis.",
87
87
  "Artifact written to `.cclaw/artifacts/03-design-<slug>.md`.",
88
88
  "Failure-mode table exists in Method/Exception/Rescue/UserSees format.",
89
- "Tier-required diagram markers are present: architecture (all tiers), +shadow/error (Standard+), +state-machine/rollback/deployment-sequence (Deep).",
89
+ "Tier-required diagram markers are present: architecture (all tiers). Standard/Deep add-ons (shadow/error) and Deep add-ons (state-machine/rollback/deployment-sequence) are included only when risk warrants them.",
90
90
  "When `.cclaw/config.yaml::optInAudits.staleDiagramAudit` is true, stale diagram audit finding is clear (no blast-radius file newer than diagram markers without explicit update).",
91
91
  "Security & threat model findings are documented with mitigations.",
92
92
  "Observability and deployment plans are explicit for critical flows.",
@@ -101,7 +101,7 @@ export const DESIGN = {
101
101
  ],
102
102
  inputs: ["scope agreement artifact", "system constraints", "non-functional requirements"],
103
103
  requiredContext: [
104
- "parallel research synthesis from `.cclaw/artifacts/02a-research.md`",
104
+ "compact inline Research Fleet Synthesis, plus `.cclaw/artifacts/02a-research.md` only when deep/high-risk research was needed",
105
105
  "existing architecture and boundaries",
106
106
  "operational constraints",
107
107
  "security and reliability expectations"
@@ -143,16 +143,16 @@ export const DESIGN = {
143
143
  },
144
144
  artifactValidation: [
145
145
  { section: "Upstream Handoff", required: false, validationRule: "Summarizes scope/research decisions, constraints, open questions, and explicit drift before design choices." },
146
- { section: "Research Fleet Synthesis", required: true, validationRule: "Must summarize the tiered lenses actually run (Lightweight=pitfalls, Standard=architecture+pitfalls, Deep=all four) and map findings to concrete design decisions." },
146
+ { section: "Research Fleet Synthesis", required: true, validationRule: "Must summarize the tiered lenses actually run and map findings to concrete design decisions. Default may be compact inline synthesis; full separate research pack is Deep/high-risk only." },
147
147
  { section: "Codebase Investigation", required: false, validationRule: "Investigator pass: list blast-radius files with current responsibilities, discovered patterns, and reuse candidates." },
148
148
  { section: "Search Before Building", required: false, validationRule: "For each technical choice: Layer 1 (exact match), Layer 2 (partial match), Layer 3 (inspiration), EUREKA labels with reuse-first default." },
149
149
  { section: "Architecture Boundaries", required: true, validationRule: "Must list component boundaries with ownership." },
150
150
  { section: "Architecture Diagram", required: true, validationRule: "Must include `<!-- diagram: architecture -->` marker. Diagram must label concrete nodes, label arrows, mark direction, distinguish sync/async edges, and include at least one failure/degraded edge." },
151
- { section: "Data-Flow Shadow Paths", required: false, validationRule: "Standard/Deep: include `<!-- diagram: data-flow-shadow-paths -->` marker plus a table for high-risk choices: chosen path, shadow alternative, switch trigger, fallback/degrade behavior, and verification evidence." },
152
- { section: "Error Flow Diagram", required: false, validationRule: "Standard/Deep: include `<!-- diagram: error-flow -->` marker and failure-detection -> rescue -> user-visible outcome flow." },
153
- { section: "State Machine Diagram", required: false, validationRule: "Deep: include `<!-- diagram: state-machine -->` marker and state transitions for critical flow lifecycle." },
154
- { section: "Rollback Flowchart", required: false, validationRule: "Deep: include `<!-- diagram: rollback-flowchart -->` marker with trigger -> rollback actions -> verification." },
155
- { section: "Deployment Sequence Diagram", required: false, validationRule: "Deep: include `<!-- diagram: deployment-sequence -->` marker with rollout order and guard checks." },
151
+ { section: "Data-Flow Shadow Paths", required: false, validationRule: "Standard/Deep add-on: include `<!-- diagram: data-flow-shadow-paths -->` marker plus a table for high-risk choices: chosen path, shadow alternative, switch trigger, fallback/degrade behavior, and verification evidence." },
152
+ { section: "Error Flow Diagram", required: false, validationRule: "Standard/Deep add-on: include `<!-- diagram: error-flow -->` marker and failure-detection -> rescue -> user-visible outcome flow." },
153
+ { section: "State Machine Diagram", required: false, validationRule: "Deep add-on: include `<!-- diagram: state-machine -->` marker and state transitions for critical flow lifecycle." },
154
+ { section: "Rollback Flowchart", required: false, validationRule: "Deep add-on: include `<!-- diagram: rollback-flowchart -->` marker with trigger -> rollback actions -> verification." },
155
+ { section: "Deployment Sequence Diagram", required: false, validationRule: "Deep add-on: include `<!-- diagram: deployment-sequence -->` marker with rollout order and guard checks." },
156
156
  { section: "Data Flow", required: false, validationRule: "Must include happy path, nil input, empty input, upstream error paths, plus Interaction Edge Case matrix rows for: double-click, nav-away-mid-request, 10K-result dataset, background-job abandonment, zombie connection. Each row must declare handled yes/no and deferred item when not handled." },
157
157
  { section: "Stale Diagram Audit", required: false, validationRule: "When `.cclaw/config.yaml::optInAudits.staleDiagramAudit` is true: blast-radius files from Codebase Investigation must not be newer than the current design diagram-marker baseline unless explicitly refreshed." },
158
158
  { section: "Failure Mode Table", required: true, validationRule: "Use Method/Exception/Rescue/UserSees columns and treat silent user impact without rescue as critical." },
@@ -163,10 +163,11 @@ export const DESIGN = {
163
163
  { section: "Deployment & Rollout", required: true, validationRule: "Must define migration/flag strategy, rollback plan, and post-deploy verification steps." },
164
164
  { section: "What Already Exists", required: false, validationRule: "For each sub-problem: existing code/library found (Layer 1-3/EUREKA label), reuse decision, and adaptation needed." },
165
165
  { section: "Outside Voice Findings", required: false, validationRule: "Critic pass: list adversarial findings and disposition (accept/reject/defer) with rationale per material finding." },
166
- { section: "Spec Review Loop", required: false, validationRule: `Record iteration table with quality score per iteration, stop reason, and unresolved concerns. Enforce ${reviewLoopPolicySummary("design")}` },
166
+ { section: "Design Outside Voice Loop", required: false, validationRule: `Record iteration table with quality score per iteration, stop reason, and unresolved concerns. Enforce ${reviewLoopPolicySummary("design")}` },
167
167
  { section: "NOT in scope", required: false, validationRule: "Work considered and explicitly deferred with one-line rationale." },
168
- { section: "Parallelization Strategy", required: false, validationRule: "If multi-module: dependency table, parallel lanes, conflict flags." },
169
- { section: "Unresolved Decisions", required: false, validationRule: "If any: what info is missing, who provides it, default if unanswered." },
168
+ { section: "Parallelization Strategy", required: false, validationRule: "Standard/Deep add-on when multi-module: dependency table, parallel lanes, conflict flags." },
169
+ { section: "Interface Contracts", required: false, validationRule: "Standard/Deep add-on when module boundaries or APIs change: producers, consumers, and payload/interface expectations." },
170
+ { section: "Unresolved Decisions", required: false, validationRule: "Standard/Deep add-on if any: what info is missing, who provides it, default if unanswered." },
170
171
  { section: "Completion Dashboard", required: true, validationRule: "Lists every review section with status (clear / issues-found-resolved / issues-open), critical/open gap counts, decision count, and unresolved items (or 'None')." }
171
172
  ],
172
173
  trivialOverrideSections: ["Architecture Boundaries", "NOT in scope", "Completion Dashboard"]
@@ -49,7 +49,7 @@ export const PLAN = {
49
49
  "Map scope Locked Decisions — every LD#hash anchor from scope is referenced by at least one plan task (or explicitly marked deferred with reason).",
50
50
  "Run anti-placeholder + anti-scope-reduction scans — block `TODO/TBD/...` and phrasing like `v1`, `for now`, `later` for locked boundaries.",
51
51
  "Define validation points — mark where progress must be checked before continuing, with concrete command and expected evidence.",
52
- "Define execution posture — record whether execution should be sequential, dependency-batched, parallel-safe, or blocked; include risk triggers and RED/GREEN/REFACTOR checkpoint/commit expectations when the repo workflow supports them.",
52
+ "Define execution posture — record whether execution should be sequential, dependency-batched, parallel-safe, or blocked; include risk triggers and RED/GREEN/REFACTOR checkpoint/commit expectations when the repo workflow supports them. This fulfills the `plan_execution_posture_recorded` gate.",
53
53
  "WAIT_FOR_CONFIRM — write plan artifact and explicitly pause. **STOP.** Do NOT proceed until user confirms. Then close the stage with `node .cclaw/hooks/stage-complete.mjs plan` and tell user to run `/cc-next`."
54
54
  ],
55
55
  interactionProtocol: [
@@ -75,6 +75,7 @@ export const PLAN = {
75
75
  { id: "plan_tasks_sliced_2_5_min", description: "Tasks are small, executable slices." },
76
76
  { id: "plan_dependency_batches_defined", description: "Tasks are grouped into executable batches with gate checks and execution posture." },
77
77
  { id: "plan_acceptance_mapped", description: "Each task maps to a spec acceptance criterion." },
78
+ { id: "plan_execution_posture_recorded", description: "Execution posture is recorded before implementation handoff." },
78
79
  { id: "plan_wait_for_confirm", description: "Execution blocked until explicit user confirmation." }
79
80
  ],
80
81
  requiredEvidence: [
@@ -35,8 +35,8 @@ export const REVIEW = {
35
35
  "Diff Scope — Run `git diff` against base branch. If no diff, exit early with APPROVED (no changes to review). Scope the review to changed files unless blast-radius analysis requires wider inspection.",
36
36
  "Change-Size Check — ~100 lines = normal. ~300 lines = consider splitting. ~1000+ lines = strongly recommend stacked PRs. Flag large diffs to the user.",
37
37
  "Risk-Based Second Opinion — compute changed-line count, files-touched count, and trust-boundary movement. Dispatch an adversarial reviewer only when trust boundaries changed, Critical/Important ambiguity remains, or the diff is both large and high-risk; otherwise record `not triggered`.",
38
- "Load upstream evidence — read TDD artifact (RED + GREEN + REFACTOR), spec, and plan. Verify evidence chain is unbroken.",
39
- "Run traceability matrix execute `cclaw internal trace-matrix` (or equivalent helper) and confirm there are no orphaned criteria/tasks/tests before declaring ship readiness.",
38
+ "Load upstream evidence — read TDD artifact (RED + GREEN + REFACTOR), spec, and plan when present. On quick track, use spec acceptance items / bug reproduction slices instead of nonexistent plan artifacts.",
39
+ "Run traceability matrix when plan artifacts exist or the active track enforces it; on quick, confirm spec acceptance/reproduction slices are covered without requiring plan-task coverage.",
40
40
  "Layer 1: Spec Compliance — check every acceptance criterion against implementation. Verdict: pass/fail per criterion.",
41
41
  "Layer 2: Integrated findings — one structured pass tagged by category: correctness, security, performance, architecture, external-safety.",
42
42
  "Security sweep — mandatory dedicated security-reviewer pass across diff + touched modules. A zero-finding pass must include `NO_CHANGE_ATTESTATION` with rationale.",
@@ -45,14 +45,14 @@ export const REVIEW = {
45
45
  "Meta-Review — Were tests actually run? Do test names match what they test? Are there real assertions?",
46
46
  "Classify findings — Critical (blocks ship), Important (should fix), Suggestion (optional improvement).",
47
47
  "Produce verdict — APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED.",
48
- "If verdict is BLOCKED, emit remediation route token `ROUTE_BACK_TO_TDD` and include `cclaw internal rewind tdd \"review_blocked_by_critical\"` with the blocking finding IDs."
48
+ "If verdict is BLOCKED, emit remediation route token `ROUTE_BACK_TO_TDD`, include `cclaw internal rewind tdd \"review_blocked_by_critical\"` with the blocking finding IDs, and satisfy the special transition guard `review_verdict_blocked` instead of `review_criticals_resolved`."
49
49
  ],
50
50
  interactionProtocol: [
51
51
  "Run Layer 1 (spec compliance) completely before starting Layer 2.",
52
52
  "In each review section, present findings ONE AT A TIME. Do NOT batch.",
53
53
  "Classify every finding as Critical, Important, or Suggestion.",
54
54
  decisionProtocolInstruction("each Critical finding", "present resolution options (A/B/C) with trade-offs, and mark one as (recommended)", "recommend the option that fully closes the finding with no carry-over risk and the smallest blast radius", STRUCTURED_ASK_TOOL_LIST_REVIEW),
55
- "Resolve all critical blockers before ship.",
55
+ "Resolve all critical blockers before ship. If verdict is BLOCKED, do not pass `review_criticals_resolved`; pass only the remediation route gate `review_verdict_blocked` when routing back to TDD.",
56
56
  "When verdict is BLOCKED, do not end with a passive stop: explicitly route remediation to TDD via `ROUTE_BACK_TO_TDD` and point to `cclaw internal rewind tdd` with the blocking IDs.",
57
57
  structuredAskSingleChoiceInstruction("final verdict", "verdict (APPROVED / APPROVED_WITH_CONCERNS / BLOCKED)"),
58
58
  "**STOP.** Do NOT proceed to ship until the user provides an explicit verdict."
@@ -70,7 +70,7 @@ export const REVIEW = {
70
70
  { id: "review_layer1_spec_compliance", description: "Spec compliance check completed with per-criterion verdict." },
71
71
  { id: "review_layer2_security", description: "Security review completed." },
72
72
  { id: "review_layer_coverage_complete", description: "Layer coverage map in 07-review-army.json confirms spec/correctness/security/performance/architecture/external-safety tags were considered." },
73
- { id: "review_criticals_resolved", description: "No unresolved critical blockers remain." },
73
+ { id: "review_criticals_resolved", description: "Normal APPROVED or APPROVED_WITH_CONCERNS path only: no unresolved critical blockers remain. BLOCKED routes use review_verdict_blocked instead." },
74
74
  { id: "review_army_json_valid", description: "07-review-army.json passes schema validation (validateReviewArmy)." },
75
75
  { id: "review_trace_matrix_clean", description: "Trace matrix has no orphaned criteria/tasks/test slices for the active run, and evidence cites a discovered real test command before ship handoff." }
76
76
  ],
@@ -79,7 +79,7 @@ export const REVIEW = {
79
79
  "Artifact written to `.cclaw/artifacts/07-review-army.json`.",
80
80
  "Traceability matrix run recorded (no orphaned criteria/tasks/tests for enforced tracks).",
81
81
  "Layer 1 verdict captured with per-criterion pass/fail.",
82
- "Layer 2 sections completed with findings.",
82
+ "Layer 2 sections completed across correctness, security, performance, architecture, and external-safety findings.",
83
83
  "Severity log includes critical/important/suggestion buckets.",
84
84
  "Explicit final verdict: APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED.",
85
85
  "Fresh verification command discovery recorded, and the command cited in `review_trace_matrix_clean` evidence before ship handoff.",
@@ -95,8 +95,8 @@ export const REVIEW = {
95
95
  exitCriteria: [
96
96
  "both layers completed",
97
97
  "all review sections evaluated",
98
- "critical blockers resolved",
99
- "ship readiness explicitly stated"
98
+ "critical blockers resolved for APPROVED paths, or BLOCKED routes through review_verdict_blocked",
99
+ "ship readiness or remediation route explicitly stated"
100
100
  ],
101
101
  platformNotes: [
102
102
  "When citing file locations in findings, use repo-relative forward-slash paths with a line number (`src/foo/bar.ts:42`). Avoid IDE-generated hyperlinks that embed absolute machine-specific paths.",
@@ -115,7 +115,7 @@ export const REVIEW = {
115
115
  artifactValidation: [
116
116
  { section: "Upstream Handoff", required: false, validationRule: "Summarizes spec/plan/tdd decisions, constraints, open questions, and explicit drift before review verdicts." },
117
117
  { section: "Layer 1 Verdict", required: true, validationRule: "Per-criterion pass/fail with references." },
118
- { section: "Layer 2 Findings", required: false, validationRule: "Each finding has severity, description, and resolution status. Security coverage must include either explicit security findings or `NO_CHANGE_ATTESTATION: <reason>` when no security-relevant changes were found." },
118
+ { section: "Layer 2 Findings", required: false, validationRule: "Each finding has severity, description, and resolution status across correctness, security, performance, architecture, and external-safety. Security coverage must include either explicit security findings or `NO_CHANGE_ATTESTATION: <reason>` when no security-relevant changes were found." },
119
119
  { section: "Review Findings Contract", required: true, validationRule: "Structured findings in 07-review-army.json include id/severity/confidence/fingerprint/reportedBy/status and source tags from {spec, correctness, security, performance, architecture, external-safety} with dedup reconciliation summary." },
120
120
  { section: "Review Readiness Snapshot", required: false, validationRule: "Optional compact summary: completed checks, delegation-log status, staleness signal, open critical blockers, and ship recommendation." },
121
121
  { section: "Completeness Snapshot", required: false, validationRule: "Optional compact coverage summary for AC coverage, task coverage, test-slice coverage, and adversarial-review status when triggered." },
@@ -140,7 +140,7 @@ export const REVIEW = {
140
140
  stopGate: true
141
141
  },
142
142
  {
143
- title: "Layer 2: Integrated Correctness / Performance / Architecture",
143
+ title: "Layer 2: Integrated Correctness / Security / Performance / Architecture / External-Safety",
144
144
  evaluationPoints: [
145
145
  "Logic errors and boundary violations",
146
146
  "Race conditions and concurrency issues",