cclaw-cli 0.51.24 → 0.51.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +135 -414
  2. package/dist/artifact-linter.js +10 -6
  3. package/dist/config.d.ts +1 -1
  4. package/dist/config.js +28 -3
  5. package/dist/content/core-agents.d.ts +110 -0
  6. package/dist/content/core-agents.js +235 -3
  7. package/dist/content/examples.js +8 -5
  8. package/dist/content/next-command.js +10 -6
  9. package/dist/content/reference-patterns.d.ts +18 -0
  10. package/dist/content/reference-patterns.js +391 -0
  11. package/dist/content/skills.js +39 -34
  12. package/dist/content/stage-common-guidance.js +19 -3
  13. package/dist/content/stage-schema.d.ts +12 -0
  14. package/dist/content/stage-schema.js +184 -28
  15. package/dist/content/stages/_lint-metadata/index.js +3 -2
  16. package/dist/content/stages/brainstorm.js +7 -3
  17. package/dist/content/stages/design.js +12 -3
  18. package/dist/content/stages/review.js +7 -5
  19. package/dist/content/stages/schema-types.d.ts +9 -2
  20. package/dist/content/stages/scope.js +8 -2
  21. package/dist/content/stages/ship.js +3 -2
  22. package/dist/content/stages/tdd.js +18 -13
  23. package/dist/content/start-command.js +3 -2
  24. package/dist/content/status-command.js +9 -4
  25. package/dist/content/subagents.js +281 -39
  26. package/dist/content/templates.js +64 -3
  27. package/dist/delegation.d.ts +2 -0
  28. package/dist/delegation.js +27 -6
  29. package/dist/doctor.js +47 -5
  30. package/dist/gate-evidence.js +25 -2
  31. package/dist/install.js +2 -9
  32. package/dist/internal/advance-stage.js +179 -26
  33. package/dist/run-persistence.js +21 -3
  34. package/dist/tdd-verification-evidence.d.ts +17 -0
  35. package/dist/tdd-verification-evidence.js +43 -0
  36. package/dist/types.d.ts +10 -0
  37. package/package.json +1 -1
@@ -4,6 +4,7 @@ import { FLOW_STAGES } from "../types.js";
4
4
  import { stageExamples } from "./examples.js";
5
5
  import { reviewStackAwareRoutes, reviewStackAwareRoutingSummary, stageAutoSubagentDispatch, stageSchema, stageTrackRenderContext } from "./stage-schema.js";
6
6
  import { conversationLanguagePolicyMarkdown } from "./language-policy.js";
7
+ import { referencePatternsForStage } from "./reference-patterns.js";
7
8
  const VERIFICATION_STAGES = ["tdd", "review", "ship"];
8
9
  function whenNotToUseBlock(items) {
9
10
  if (items.length === 0) {
@@ -37,24 +38,17 @@ Before execution:
37
38
  2. Load active artifacts from \`.cclaw/artifacts/\`.
38
39
  3. Load upstream artifacts required by this stage:
39
40
  ${readLines}
40
- 4. Read the state contract for this stage from \`.cclaw/templates/state-contracts/<stage>.json\`.
41
- Treat it as the machine-readable skeleton: required top-level fields,
42
- closed taxonomies, and the derived markdown path. Do not validate natural-language
43
- prose by regex; put semantic quality checks in the review prompts.
44
- 5. Read the canonical artifact template at \`${artifactTemplatePath}\` and reuse its
45
- exact section layout per-row tables with stable column order, calibrated review block;
46
- do not invent layouts for sections the template already defines.
47
- 6. Extract upstream decisions, constraints, and open questions into the current
48
- artifact's \`Upstream Handoff\` section when that section exists.
49
- 7. Before doing stage work, give a compact user-facing drift preamble: "Carrying forward: <1-3 bullets>. Drift since upstream: None / <specific drift>. Recommendation: continue / re-scope."
50
- 8. If you change an upstream decision, record an explicit drift reason in the
51
- current artifact before continuing.
52
- 9. Confirm stage inputs:
41
+ 4. Read the state contract from \`.cclaw/templates/state-contracts/<stage>.json\` for required fields, taxonomies, and derived markdown path.
42
+ 5. Read the canonical artifact template at \`${artifactTemplatePath}\` and reuse its exact section layout — per-row tables with stable column order, calibrated review block; do not invent layouts.
43
+ 6. Extract upstream decisions, constraints, and open questions into the current artifact's \`Upstream Handoff\` section when present.
44
+ 7. Confirm context readiness: upstream artifact freshness, required context, canonical template shape, relevant in-repo/reference patterns, and unresolved blockers are known. If any item is missing, load it or stop before drafting.
45
+ 8. Before doing stage work, give a compact user-facing drift preamble: "Carrying forward: <1-3 bullets>. Drift since upstream: None / <specific drift>. Recommendation: continue / re-scope."
46
+ 9. If you change an upstream decision, record an explicit drift reason in the current artifact before continuing.
47
+ 10. Confirm stage inputs:
53
48
  ${inputs}
54
- 10. Confirm required context:
49
+ 11. Confirm required context:
55
50
  ${requiredContext}
56
- 11. Use the injected knowledge digest from session-start; only fall back to full
57
- \`.cclaw/knowledge.jsonl\` when the digest is insufficient.
51
+ 12. Use the injected knowledge digest; only fall back to full \`.cclaw/knowledge.jsonl\` when insufficient.
58
52
  `;
59
53
  }
60
54
  function autoSubagentDispatchBlock(stage, track) {
@@ -65,7 +59,9 @@ function autoSubagentDispatchBlock(stage, track) {
65
59
  const rows = rules
66
60
  .map((rule) => {
67
61
  const userGate = rule.requiresUserGate ? "required" : "not required";
68
- return `| ${rule.agent} | ${rule.mode} | ${userGate} | ${rule.when} | ${rule.purpose} |`;
62
+ const dispatchClass = rule.dispatchClass ?? "stage-specialist";
63
+ const returnSchema = rule.returnSchema ?? "agent-default";
64
+ return `| ${rule.agent} | ${rule.mode} | ${dispatchClass} | ${returnSchema} | ${userGate} | ${rule.when} | ${rule.purpose} |`;
69
65
  })
70
66
  .join("\n");
71
67
  const mandatory = schema.mandatoryDelegations;
@@ -73,27 +69,37 @@ function autoSubagentDispatchBlock(stage, track) {
73
69
  const delegationLogRel = `${RUNTIME_ROOT}/state/delegation-log.json`;
74
70
  const artifactRef = `${RUNTIME_ROOT}/artifacts/${schema.artifactRules.artifactFile}`;
75
71
  return `## Automatic Subagent Dispatch
76
- | Agent | Mode | User Gate | Trigger | Purpose |
77
- |---|---|---|---|---|
72
+ | Agent | Mode | Class | Return Schema | User Gate | Trigger | Purpose |
73
+ |---|---|---|---|---|---|---|
78
74
  ${rows}
79
- Mandatory: ${mandatoryList}. Record completion/waiver in \`${delegationLogRel}\` before completion.
75
+ Mandatory: ${mandatoryList}. Record scheduled/completed/waived lifecycle rows in \`${delegationLogRel}\` before completion.
80
76
  ### Harness Dispatch Contract
81
- Use true harness dispatch: Claude native Task, Cursor generic dispatch, OpenCode \`.opencode/agents/<agent>.md\`, Codex \`.codex/agents/<agent>.toml\`. Run independent read-only/review agents in parallel where safe, write evidence into \`${artifactRef}\`, then append \`${delegationLogRel}\` rows with matching \`fulfillmentMode: "isolated"\` or \`"generic-dispatch"\`. Do not collapse OpenCode or Codex to role-switch by default; role-switch is degraded fallback and must carry non-empty \`evidenceRefs\`. Missing evidence blocks completion.
77
+ Use true harness dispatch: Claude native Task, Cursor generic dispatch, OpenCode \`.opencode/agents/<agent>.md\`, Codex \`.codex/agents/<agent>.toml\`. Run independent read-only/review agents in parallel where safe, write evidence into \`${artifactRef}\`, then append \`${delegationLogRel}\` rows with matching \`fulfillmentMode: "isolated"\` or \`"generic-dispatch"\`. Each dispatched worker should have a scheduled row and a terminal row sharing \`spanId\`; stale scheduled spans block completion. Do not collapse OpenCode or Codex to role-switch by default; role-switch is degraded fallback and must carry non-empty \`evidenceRefs\`. Missing evidence blocks completion.
82
78
  `;
83
79
  }
84
80
  function researchPlaybooksBlock(playbooks) {
85
81
  if (playbooks.length === 0)
86
82
  return "";
87
83
  const rows = playbooks
88
- .map((playbook) => `- \`${RUNTIME_ROOT}/skills/${playbook}\``)
89
- .join("\n");
84
+ .map((playbook) => `\`${RUNTIME_ROOT}/skills/${playbook}\``)
85
+ .join("; ");
90
86
  return `## Research Playbooks
91
-
92
- Use these in-thread research procedures before locking this stage. They are
93
- playbooks (not delegated personas), so execute them in the primary agent context
94
- and record outcomes in the stage artifact when relevant.
95
-
96
- ${rows}
87
+ Execute in primary agent context before locking the stage; record outcomes in the artifact when relevant: ${rows}.
88
+ `;
89
+ }
90
+ function referencePatternsBlock(stage) {
91
+ const patterns = referencePatternsForStage(stage);
92
+ if (patterns.length === 0)
93
+ return "";
94
+ const summaries = patterns
95
+ .map((pattern) => {
96
+ const contract = pattern.contracts.find((item) => item.stage === stage);
97
+ const sections = contract ? contract.artifactSections.join(", ") : "n/a";
98
+ return `${pattern.title} (sections: ${sections})`;
99
+ })
100
+ .join("; ");
101
+ return `## Reference Patterns
102
+ Prompt-only; no runtime/delegation changes. These compact pattern titles come from the internal registry; use the behavior and artifact sections, not the source project history. Use: ${summaries}.
97
103
  `;
98
104
  }
99
105
  function reviewSectionsBlock(sectionsInput) {
@@ -389,9 +395,8 @@ ${conversationLanguagePolicyMarkdown()}
389
395
  ${philosophy.purpose}
390
396
 
391
397
  ## Complexity Tier
392
- - Active tier: \`${schema.complexityTier}\`
393
- - Scale-to-complexity rule: execute required gates and artifact sections, but keep optional/deep sections compact unless risk, novelty, or configuration triggers them. Do not mechanically expand lightweight work into a strategy workshop.
394
- - Mandatory delegations at this tier: ${mandatoryDelegationSummary}
398
+ - Active tier: \`${schema.complexityTier}\`; mandatory delegations: ${mandatoryDelegationSummary}
399
+ - Scale-to-complexity: execute required gates/sections; keep optional/deep sections compact unless risk, novelty, or config triggers them.
395
400
  - Track render context: \`${trackContext.track}\` (${trackContext.usesPlanTerminology ? "plan-first wording" : "acceptance-first wording"})
396
401
 
397
402
  ## When to Use
@@ -406,14 +411,14 @@ ${mergedAntiPatterns(philosophy, executionModel)}
406
411
 
407
412
  ## Process
408
413
 
409
- This is the stage **state machine** the canonical ordered flow. For every detailed step, gate, and wording, follow the Checklist below; this diagram is the map, not the territory.
410
-
414
+ Stage state machine (map only; Checklist is authoritative):
411
415
  ${processFlowMermaid.length > 0 ? processFlowMermaid : "```mermaid\nflowchart TD\n S1[\"Execute Checklist\"] --> S2[\"Satisfy required gates\"] --> S3[\"Verify before closeout\"]\n```"}
412
416
 
413
417
  ${platformNotesBlock}${contextLoadingBlock(stage, artifactRules.crossStageTrace, executionModel)}
414
418
  ${autoSubagentDispatchBlock(stage, track)}
415
419
  ${stackAwareReviewRoutingBlock(stage)}
416
420
  ${researchPlaybooksBlock(executionModel.researchPlaybooks ?? [])}
421
+ ${referencePatternsBlock(stage)}
417
422
 
418
423
  ## Checklist
419
424
 
@@ -14,6 +14,11 @@ ${conversationLanguagePolicyMarkdown()}
14
14
  harvest learnings, then use \`/cc-next\` for progression.
15
15
  - Do not create separate protocol files.
16
16
 
17
+ ## Context readiness
18
+
19
+ - Before drafting, know the upstream artifact freshness, required template shape, relevant code/reference patterns, and unresolved blockers.
20
+ - If any item is missing, load it or stop with a blocker instead of inventing content.
21
+
17
22
  ## Shared decision protocol
18
23
 
19
24
  - Ask only decision-changing questions.
@@ -28,7 +33,7 @@ Use this same closeout menu for every stage:
28
33
  - **A) Advance** — run \`/cc-next\` and continue the critical path; after \`ship\`, the same command drives \`retro -> compound -> archive\`.
29
34
  - **B) Revise this stage** — stay on current stage and apply feedback.
30
35
  - **C) Pause / park** — run \`/cc-view status\`, then stop and resume later.
31
- - **D) Rewind** — run \`npx cclaw-cli internal rewind <target-stage> "<reason>"\` as a support/runtime repair action.
36
+ - **D) Rewind** — run \`npx cclaw-cli internal rewind <target-stage> "<reason>"\` as the managed support/runtime repair action; after redoing the target stage, run \`npx cclaw-cli internal rewind --ack <target-stage>\` to clear the stale marker.
32
37
  - **E) Abandon** — only when the user explicitly wants to end a non-ship active run early, archive with \`npx cclaw-cli archive --skip-retro --retro-reason="<reason>"\`. Once in post-ship closeout, continue \`/cc-next\` through retro/compound/archive instead.
33
38
 
34
39
  Recommendation defaults:
@@ -37,6 +42,12 @@ Recommendation defaults:
37
42
  - Completion status \`DONE_WITH_CONCERNS\` -> recommend **B**.
38
43
  - Completion status \`BLOCKED\` -> recommend **B** or **C**.
39
44
 
45
+ ## Iterate / Victory Detector
46
+
47
+ - Iterate while a required gate, artifact section, or fresh evidence item is missing.
48
+ - Stop only when the stage-specific Victory Detector passes or a named blocker is recorded.
49
+ - Do not use vague closeout wording such as \`looks good\`, \`done enough\`, or \`all set\` without the detector evidence.
50
+
40
51
  ## Completion status vocabulary
41
52
 
42
53
  - \`DONE\` — all required gates and checks satisfied.
@@ -63,9 +74,12 @@ Rollback / fallback: <if decision proves wrong>
63
74
 
64
75
  Before closeout, fill the artifact \`## Learnings\` section (do not write
65
76
  \`.cclaw/knowledge.jsonl\` by hand):
66
- - \`- None this stage.\` when nothing reusable emerged.
77
+ - \`- None this stage.\` only when nothing reusable emerged.
67
78
  - Or 1-3 JSON bullets with required keys \`type\`, \`trigger\`, \`action\`,
68
79
  \`confidence\` (optional fields may mirror knowledge.jsonl schema keys).
80
+ - For meaningful \`design\`, \`tdd\`, or \`review\` work, prefer a small JSON
81
+ learning over \`None\` when you made a reusable decision, found a testing
82
+ pattern, or caught a review/security issue.
69
83
  During \`node .cclaw/hooks/stage-complete.mjs <stage>\`, cclaw validates those
70
84
  bullets, appends unique entries to \`.cclaw/knowledge.jsonl\`, and stamps a
71
85
  harvest marker in the artifact.
@@ -78,7 +92,9 @@ Track policy:
78
92
  - \`quick\`: recommended only.
79
93
 
80
94
  \`- None this stage.\` is acceptable only when the stage produced no reusable
81
- insight (for example, purely mechanical edits with no new decisions).
95
+ insight (for example, purely mechanical edits with no new decisions). If unsure,
96
+ record a concise \`lesson\` with \`confidence":"medium"\` instead of dropping
97
+ operator knowledge.
82
98
 
83
99
  ## Progressive disclosure baseline
84
100
 
@@ -23,11 +23,23 @@ export interface StageStackAwareReviewRoute {
23
23
  signals: string[];
24
24
  focus: string;
25
25
  }
26
+ export interface StageDelegationDispatchRule {
27
+ agent: string;
28
+ mode: "mandatory" | "proactive";
29
+ when: string;
30
+ purpose: string;
31
+ requiresUserGate: boolean;
32
+ requiredAtTier?: StageComplexityTier;
33
+ dispatchClass: NonNullable<StageAutoSubagentDispatch["dispatchClass"]>;
34
+ returnSchema: NonNullable<StageAutoSubagentDispatch["returnSchema"]>;
35
+ skill?: string;
36
+ }
26
37
  export interface StageDelegationSummary {
27
38
  stage: FlowStage;
28
39
  mandatoryAgents: string[];
29
40
  proactiveAgents: string[];
30
41
  primaryAgents: string[];
42
+ dispatchRules: StageDelegationDispatchRule[];
31
43
  stackAwareRoutes: StageStackAwareReviewRoute[];
32
44
  }
33
45
  export declare function reviewStackAwareRoutes(): StageStackAwareReviewRoute[];
@@ -81,6 +81,66 @@ function dedupeAgentsInOrder(agents) {
81
81
  }
82
82
  return out;
83
83
  }
84
+ function defaultReturnSchemaForAgent(agent) {
85
+ switch (agent) {
86
+ case "researcher":
87
+ return "research-return";
88
+ case "architect":
89
+ return "architecture-return";
90
+ case "spec-validator":
91
+ return "spec-validation-return";
92
+ case "slice-implementer":
93
+ return "worker-return";
94
+ case "performance-reviewer":
95
+ return "performance-return";
96
+ case "compatibility-reviewer":
97
+ return "compatibility-return";
98
+ case "observability-reviewer":
99
+ return "observability-return";
100
+ case "release-reviewer":
101
+ return "release-return";
102
+ case "planner":
103
+ return "planning-return";
104
+ case "product-manager":
105
+ return "product-return";
106
+ case "critic":
107
+ return "critic-return";
108
+ case "reviewer":
109
+ return "review-return";
110
+ case "security-reviewer":
111
+ return "security-return";
112
+ case "test-author":
113
+ return "tdd-return";
114
+ case "doc-updater":
115
+ return "docs-return";
116
+ case "fixer":
117
+ return "fixer-return";
118
+ case "implementer":
119
+ return "worker-return";
120
+ }
121
+ }
122
+ function dispatchClassForRow(row) {
123
+ if (row.dispatchClass)
124
+ return row.dispatchClass;
125
+ if (row.agent === "implementer" || row.agent === "fixer" || row.agent === "slice-implementer")
126
+ return "worker";
127
+ return row.skill?.includes("review") || row.agent === "reviewer" || row.agent === "security-reviewer" || row.agent.endsWith("-reviewer")
128
+ ? "review-lens"
129
+ : "stage-specialist";
130
+ }
131
+ function delegationDispatchRule(row) {
132
+ return {
133
+ agent: row.agent,
134
+ mode: row.mode,
135
+ when: row.when,
136
+ purpose: row.purpose,
137
+ requiresUserGate: row.requiresUserGate,
138
+ requiredAtTier: row.requiredAtTier,
139
+ dispatchClass: dispatchClassForRow(row),
140
+ returnSchema: row.returnSchema ?? defaultReturnSchemaForAgent(row.agent),
141
+ skill: row.skill
142
+ };
143
+ }
84
144
  /**
85
145
  * Canonical delegation summary derived from STAGE_AUTO_SUBAGENT_DISPATCH.
86
146
  *
@@ -106,6 +166,7 @@ export function stageDelegationSummary(complexityTier = "standard") {
106
166
  mandatoryAgents,
107
167
  proactiveAgents,
108
168
  primaryAgents,
169
+ dispatchRules: eligibleRows.map(delegationDispatchRule),
109
170
  stackAwareRoutes: stackAwareRoutesForStage(stage)
110
171
  };
111
172
  });
@@ -361,23 +422,25 @@ const STAGE_AUTO_SUBAGENT_DISPATCH = {
361
422
  brainstorm: [
362
423
  {
363
424
  agent: "product-manager",
364
- mode: "proactive",
365
- when: "When product value, persona/JTBD, success metric, or why-now framing is ambiguous.",
425
+ mode: "mandatory",
426
+ requiredAtTier: "standard",
427
+ when: "Always for standard/deep brainstorm to validate value, persona/JTBD, success metric, and why-now framing.",
366
428
  purpose: "Pressure-test problem/value fit and produce product-discovery evidence for the Problem Decision Record.",
367
429
  requiresUserGate: false
368
430
  },
369
431
  {
370
432
  agent: "critic",
371
- mode: "proactive",
372
- when: "When the premise may be wrong, cheaper alternatives exist, or the do-nothing path could be acceptable.",
433
+ mode: "mandatory",
434
+ requiredAtTier: "standard",
435
+ when: "Always for standard/deep brainstorm to challenge the premise, do-nothing path, and higher-upside alternatives.",
373
436
  purpose: "Attack assumptions and surface non-goals before direction approval.",
374
437
  requiresUserGate: false
375
438
  },
376
439
  {
377
- agent: "planner",
440
+ agent: "researcher",
378
441
  mode: "proactive",
379
- when: "When request is ambiguous, multi-surface, or staged feasibility is unclear.",
380
- purpose: "Map scope and alternatives before direction lock.",
442
+ when: "When repository, market, docs, or prior-art context changes the approach set.",
443
+ purpose: "Provide search-before-read summaries and context-readiness evidence before large reads or decisions.",
381
444
  requiresUserGate: false
382
445
  }
383
446
  ],
@@ -392,11 +455,19 @@ const STAGE_AUTO_SUBAGENT_DISPATCH = {
392
455
  },
393
456
  {
394
457
  agent: "critic",
395
- mode: "proactive",
396
- when: "When selecting SELECTIVE EXPANSION, SCOPE EXPANSION, or SCOPE REDUCTION, or when boundaries feel soft.",
458
+ mode: "mandatory",
459
+ requiredAtTier: "standard",
460
+ when: "Always during scope shaping for standard/deep work.",
397
461
  purpose: "Test whether the selected scope mode is too timid, too broad, or hiding a smaller useful slice.",
398
462
  requiresUserGate: false
399
463
  },
464
+ {
465
+ agent: "researcher",
466
+ mode: "proactive",
467
+ when: "When churn, prior attempts, reference patterns, or external constraints may change scope boundaries.",
468
+ purpose: "Summarize search/context findings before the scope contract locks accepted/rejected/deferred ideas.",
469
+ requiresUserGate: false
470
+ },
400
471
  {
401
472
  agent: "product-manager",
402
473
  mode: "proactive",
@@ -407,13 +478,21 @@ const STAGE_AUTO_SUBAGENT_DISPATCH = {
407
478
  ],
408
479
  design: [
409
480
  {
410
- agent: "planner",
481
+ agent: "architect",
411
482
  mode: "mandatory",
412
483
  requiredAtTier: "standard",
413
484
  when: "Always during design lock.",
414
485
  purpose: "Stress architecture boundaries, dependency graph, critical path, and spec handoff.",
415
486
  requiresUserGate: false
416
487
  },
488
+ {
489
+ agent: "test-author",
490
+ mode: "mandatory",
491
+ requiredAtTier: "standard",
492
+ when: "Always during design lock.",
493
+ purpose: "Check test diagram mapping, RED expressibility, assertion quality, and verification routes before implementation.",
494
+ requiresUserGate: false
495
+ },
417
496
  {
418
497
  agent: "critic",
419
498
  mode: "proactive",
@@ -421,6 +500,13 @@ const STAGE_AUTO_SUBAGENT_DISPATCH = {
421
500
  purpose: "Produce a shadow alternative, switch trigger, and cheaper-path challenge for the engineering lock.",
422
501
  requiresUserGate: false
423
502
  },
503
+ {
504
+ agent: "researcher",
505
+ mode: "proactive",
506
+ when: "When framework/library docs, repo graph context, or reference contracts may change the design.",
507
+ purpose: "Run search-before-read context synthesis before architecture locks.",
508
+ requiresUserGate: false
509
+ },
424
510
  {
425
511
  agent: "security-reviewer",
426
512
  mode: "proactive",
@@ -429,26 +515,36 @@ const STAGE_AUTO_SUBAGENT_DISPATCH = {
429
515
  requiresUserGate: false
430
516
  },
431
517
  {
432
- agent: "test-author",
518
+ agent: "compatibility-reviewer",
519
+ mode: "proactive",
520
+ requiredAtTier: "lightweight",
521
+ when: "When public API, config, persisted data, CLI, generated clients, or cross-version behavior can change.",
522
+ purpose: "Identify backward-compatibility and migration hazards before spec/plan.",
523
+ requiresUserGate: false
524
+ },
525
+ {
526
+ agent: "observability-reviewer",
433
527
  mode: "proactive",
434
- when: "When testability, failure/rescue behavior, or verification evidence is unclear.",
435
- purpose: "Check that the design can produce concrete RED/GREEN/REFACTOR and rollout verification evidence.",
528
+ requiredAtTier: "lightweight",
529
+ when: "When runtime/debuggability, rollout, failure detection, or supportability matters.",
530
+ purpose: "Validate logs/metrics/traces, alerting, and rescue-path visibility before implementation.",
436
531
  requiresUserGate: false
437
532
  }
438
533
  ],
439
534
  spec: [
440
535
  {
441
- agent: "planner",
442
- mode: "proactive",
443
- when: "When acceptance criteria are unclear or constraints conflict.",
444
- purpose: "Normalize measurable criteria and testability mapping.",
536
+ agent: "spec-validator",
537
+ mode: "mandatory",
538
+ requiredAtTier: "standard",
539
+ when: "Always for standard/deep specs before plan handoff.",
540
+ purpose: "Validate measurability, edge cases, assumptions, and AC-to-testability mapping.",
445
541
  requiresUserGate: false
446
542
  },
447
543
  {
448
- agent: "reviewer",
544
+ agent: "test-author",
449
545
  mode: "proactive",
450
- when: "When acceptance criteria and edge cases are drafted and need independent validation before plan stage.",
451
- purpose: "Independent review of spec against measurability, testability, and completeness before locking the contract for plan.",
546
+ when: "When acceptance criteria need testability review or RED expressibility is uncertain.",
547
+ purpose: "Confirm likely test levels, commands/manual evidence, and assertion surfaces are concrete.",
452
548
  requiresUserGate: false
453
549
  }
454
550
  ],
@@ -458,7 +554,14 @@ const STAGE_AUTO_SUBAGENT_DISPATCH = {
458
554
  mode: "mandatory",
459
555
  requiredAtTier: "standard",
460
556
  when: "Always when producing execution slices.",
461
- purpose: "Create dependency-aware task graph with verification steps.",
557
+ purpose: "Create dependency-aware executable packets with expected failing test, passing command, stop condition, and verification evidence.",
558
+ requiresUserGate: false
559
+ },
560
+ {
561
+ agent: "researcher",
562
+ mode: "proactive",
563
+ when: "When plan tasks touch unfamiliar areas or reference-pattern adoption needs source verification.",
564
+ purpose: "Confirm context/search evidence before plan packets rely on discovered patterns.",
462
565
  requiresUserGate: false
463
566
  }
464
567
  ],
@@ -468,10 +571,25 @@ const STAGE_AUTO_SUBAGENT_DISPATCH = {
468
571
  mode: "mandatory",
469
572
  requiredAtTier: "lightweight",
470
573
  when: "Always during the TDD cycle.",
471
- purpose: "Own phase-specific RED/GREEN/REFACTOR evidence for each slice: failing tests before production writes, minimal GREEN implementation, then behavior-preserving refactor notes.",
574
+ purpose: "Own RED quality and per-slice RED/GREEN/REFACTOR evidence: failing tests before production writes, minimal GREEN implementation, then behavior-preserving refactor notes.",
472
575
  requiresUserGate: false,
473
576
  skill: "tdd-cycle-evidence"
474
577
  },
578
+ {
579
+ agent: "slice-implementer",
580
+ mode: "proactive",
581
+ requiredAtTier: "lightweight",
582
+ when: "When a bounded GREEN/REFACTOR slice has non-overlapping file ownership and a clear RED failure.",
583
+ purpose: "Implement the minimal passing slice inside explicit file boundaries and return strict worker evidence.",
584
+ requiresUserGate: false
585
+ },
586
+ {
587
+ agent: "reviewer",
588
+ mode: "proactive",
589
+ when: "When per-slice review triggers fire or assertion quality needs an independent read-only overseer.",
590
+ purpose: "Read-only overseer pass for slice spec fit, assertion quality, and simpler alternatives.",
591
+ requiresUserGate: false
592
+ },
475
593
  {
476
594
  agent: "doc-updater",
477
595
  mode: "proactive",
@@ -486,7 +604,7 @@ const STAGE_AUTO_SUBAGENT_DISPATCH = {
486
604
  mode: "mandatory",
487
605
  requiredAtTier: "lightweight",
488
606
  when: "Always in review stage.",
489
- purpose: "Layer 1 spec compliance plus integrated Layer 2 review across correctness, performance, architecture, and external-safety tags with source-tagged findings.",
607
+ purpose: "Layer 1 spec compliance plus integrated Layer 2 review across correctness, architecture, and external-safety tags with source-tagged findings.",
490
608
  requiresUserGate: false,
491
609
  skill: "review-spec-pass"
492
610
  },
@@ -494,11 +612,35 @@ const STAGE_AUTO_SUBAGENT_DISPATCH = {
494
612
  agent: "security-reviewer",
495
613
  mode: "mandatory",
496
614
  requiredAtTier: "lightweight",
497
- when: "Always in review stage. Even when no trust boundaries changed, produce an explicit 'no-change' security attestation.",
498
- purpose: "Guarantee a dedicated security pass on every diff: auth, input validation, secrets, injection, privilege, and blast-radius review are never opt-in. MUST load the `security-audit` skill and run a pattern-based sweep across the diff scope and touched modules in addition to the per-diff Layer 2 security checklist.",
615
+ when: "Always in review stage. Even when no trust boundaries changed, produce an explicit no-change/no-impact security attestation.",
616
+ purpose: "Guarantee a dedicated security pass on every diff: auth, input validation, secrets, injection, privilege, and blast-radius review are never opt-in.",
499
617
  requiresUserGate: false,
500
618
  skill: "security-audit"
501
619
  },
620
+ {
621
+ agent: "performance-reviewer",
622
+ mode: "proactive",
623
+ requiredAtTier: "lightweight",
624
+ when: "When hot paths, IO, data volume, rendering, caching, or algorithmic cost can move.",
625
+ purpose: "Run a focused performance lens and report evidence-backed regressions or no-impact rationale.",
626
+ requiresUserGate: false
627
+ },
628
+ {
629
+ agent: "compatibility-reviewer",
630
+ mode: "proactive",
631
+ requiredAtTier: "lightweight",
632
+ when: "When public API, CLI/config, persisted data, generated clients, or dependency versions change.",
633
+ purpose: "Check compatibility, migrations, and consumer-facing contract stability.",
634
+ requiresUserGate: false
635
+ },
636
+ {
637
+ agent: "observability-reviewer",
638
+ mode: "proactive",
639
+ requiredAtTier: "lightweight",
640
+ when: "When failure diagnosis, logging/metrics/traces, rollout, or operational support matters.",
641
+ purpose: "Check observability and supportability evidence against the design/review artifact.",
642
+ requiresUserGate: false
643
+ },
502
644
  {
503
645
  agent: "reviewer",
504
646
  mode: "proactive",
@@ -511,7 +653,7 @@ const STAGE_AUTO_SUBAGENT_DISPATCH = {
511
653
  agent: "reviewer",
512
654
  mode: "proactive",
513
655
  when: "When external reviewer comments, bot findings, or CI annotations are present after the initial review pass.",
514
- purpose: "Run the receiving-code-review workflow so every incoming feedback item gets an explicit disposition with evidence, and the queue is mirrored into review artifacts.",
656
+ purpose: "Run the receiving-code-review workflow so every incoming feedback item gets an explicit disposition with evidence.",
515
657
  requiresUserGate: false,
516
658
  skill: "receiving-code-review"
517
659
  },
@@ -526,10 +668,17 @@ const STAGE_AUTO_SUBAGENT_DISPATCH = {
526
668
  ],
527
669
  ship: [
528
670
  {
529
- agent: "doc-updater",
671
+ agent: "release-reviewer",
530
672
  mode: "mandatory",
531
673
  requiredAtTier: "lightweight",
532
674
  when: "Always in ship stage.",
675
+ purpose: "Run release readiness, finalization mode, rollback, evidence freshness, and victory-detector checks before archive/ship.",
676
+ requiresUserGate: false
677
+ },
678
+ {
679
+ agent: "doc-updater",
680
+ mode: "proactive",
681
+ when: "When release notes, migrations, public behavior, CLI/config, or docs changed.",
533
682
  purpose: "Ensure release notes and docs reflect actual shipped behavior.",
534
683
  requiresUserGate: false
535
684
  },
@@ -661,5 +810,12 @@ export function stageTrackRenderContext(track = "standard") {
661
810
  return trackRenderContext(track);
662
811
  }
663
812
  export function stageAutoSubagentDispatch(stage) {
664
- return STAGE_AUTO_SUBAGENT_DISPATCH[stage];
813
+ return STAGE_AUTO_SUBAGENT_DISPATCH[stage].map((row) => {
814
+ const normalized = delegationDispatchRule(row);
815
+ return {
816
+ ...row,
817
+ dispatchClass: normalized.dispatchClass,
818
+ returnSchema: normalized.returnSchema
819
+ };
820
+ });
665
821
  }
@@ -1,5 +1,6 @@
1
1
  import { SHIP_FINALIZATION_MODES } from "../../../constants.js";
2
2
  import { renderTrackTerminology, trackRenderContext } from "../../track-render-context.js";
3
+ import { referencePatternPolicyNeedles } from "../../reference-patterns.js";
3
4
  const STAGE_POLICY_NEEDLES = {
4
5
  brainstorm: [
5
6
  "Explore project context",
@@ -64,10 +65,10 @@ const STAGE_POLICY_NEEDLES = {
64
65
  ]
65
66
  };
66
67
  export function stagePolicyNeedlesFromMetadata(stage, track = "standard") {
67
- const needles = STAGE_POLICY_NEEDLES[stage];
68
+ const needles = [...STAGE_POLICY_NEEDLES[stage], ...referencePatternPolicyNeedles(stage)];
68
69
  const renderContext = trackRenderContext(track);
69
70
  if (stage === "tdd" && !renderContext.usesPlanTerminology) {
70
71
  return needles.map((needle) => renderTrackTerminology(needle, renderContext));
71
72
  }
72
- return [...needles];
73
+ return needles;
73
74
  }
@@ -41,11 +41,12 @@ export const BRAINSTORM = {
41
41
  "**Write the Problem Decision Record** — product work captures persona/JTBD/pain/value/evidence/success/why-now/do-nothing/non-goals; technical-maintenance work captures affected operator/developer, failure mode, operational improvement, verification signal, do-nothing cost, and non-goals.",
42
42
  "**Premise check (one pass)** — answer the three gstack-style questions in the artifact body: *Right problem? Direct path? What if we do nothing?* Take a position; do not hedge.",
43
43
  "**Reframe with How Might We** — write a single `How Might We …?` line that names the user/operator, the desired outcome, and the constraint. This is the altitude check before approaches.",
44
+ "**Run Clarity Gate** — record ambiguity score (0.00-1.00), decision boundaries, reaffirmed non-goals, and residual-risk handoff before locking recommendations. If ambiguity remains high (>0.40), ask one decision-changing question before recommending.",
44
45
  "**Sharpening question discipline** — ask one decision-changing question at a time. Do not default to 3-5 batched questions; record only questions that changed the direction or a critical stop decision.",
45
46
  "**Use compact discovery for simple apps** — for concrete low-risk asks (todo app, landing page, local widget), do one context pass, compare one baseline and one challenger, then ask for one explicit approval; do not drag the user through a full workshop.",
46
47
  "**Early-exit concrete asks** — for unambiguous implementation-only requests, write a compact Problem Decision Record plus short-circuit handoff (context, approved intent, constraints, assumptions, next-stage risks) and ask for one explicit approval.",
47
48
  "**Ask only decision-changing questions** — one at a time; if answers would not change approach and are non-critical preference/default assumptions, state the assumption and continue; STOP on scope, architecture, security, data loss, public API, migration, auth/pricing, or user approval uncertainty.",
48
- "**Compare 2-3 distinct approaches with stable Role/Upside columns** — Role values are `baseline` | `challenger` | `wild-card`; Upside is `low` | `modest` | `high` | `higher`; include real trade-offs and reuse notes; include exactly one challenger with explicit `high` or `higher` upside.",
49
+ "**Compare 2-3 distinct approaches with stable Role/Upside columns** — Role values are `baseline` | `challenger` | `wild-card`; Upside is `low` | `modest` | `high` | `higher`; include real trade-offs, reuse notes, and reference-pattern source/disposition when a known pattern influenced the option; include exactly one challenger with explicit `high` or `higher` upside.",
49
50
  "**Collect reaction before recommending** — ask which option feels closest and what concern remains, then recommend based on that reaction.",
50
51
  "**Write the `Not Doing` list** — name 3-5 things this brainstorm explicitly is not committing to (vs. deferred). This protects scope from silent enlargement and the next stage from rework.",
51
52
  "**Self-review before user approval** — re-read the artifact and patch contradictions, weak trade-offs, placeholders, ambiguity, and weak handoff language. Record the result in `Self-Review Notes` using the calibrated review format: `- Status: Approved` (or `Issues Found`), `- Patches applied:` with inline note or sub-bullets, `- Remaining concerns:` with inline note or sub-bullets. Use `Patches applied: None` and `Remaining concerns: None` when there is nothing to record.",
@@ -81,8 +82,9 @@ export const BRAINSTORM = {
81
82
  "Artifact written to `.cclaw/artifacts/01-brainstorm-<slug>.md`.",
82
83
  "Project context was explored (files, docs, or recent activity referenced).",
83
84
  "Problem Decision Record includes product framing or technical-maintenance framing.",
85
+ "Clarity Gate records ambiguity score, decision boundaries, reaffirmed non-goals, and residual-risk handoff.",
84
86
  "Clarifying questions are one-at-a-time and captured only when they change a decision or stop condition.",
85
- "2-3 approaches with trade-offs are recorded, including one higher-upside challenger option.",
87
+ "2-3 approaches with trade-offs are recorded, including one higher-upside challenger option and reference-pattern source/disposition when applicable.",
86
88
  "User reaction to approaches is captured before final recommendation.",
87
89
  "Final recommendation explicitly reflects user reaction.",
88
90
  "Selected Direction includes the handoff to the track-aware next stage: scope on standard, spec on medium when scope/design are skipped.",
@@ -131,11 +133,13 @@ export const BRAINSTORM = {
131
133
  { section: "Problem Decision Record", required: true, validationRule: "Must include either product framing fields (persona/JTBD/pain/value/evidence/success/why-now/do-nothing/non-goals) or technical-maintenance fields (operator/developer, failure mode, operational improvement, verification signal, do-nothing cost, non-goals)." },
132
134
  { section: "Premise Check", required: false, validationRule: "Recommended: explicit answers to `Right problem?`, `Direct path?`, `What if we do nothing?` — take a position, do not hedge." },
133
135
  { section: "How Might We", required: false, validationRule: "Recommended: a single `How Might We …?` line naming the user, the outcome, and the binding constraint." },
136
+ { section: "Clarity Gate", required: false, validationRule: "Recommended before recommendation lock: include ambiguity score (0.00-1.00), decision boundaries, reaffirmed non-goals, and residual-risk handoff for scope." },
134
137
  { section: "Sharpening Questions", required: false, validationRule: "Recommended only when needed: one decision-changing question per turn with explicit `Decision impact`; compact tasks may record `None - early exit` with rationale." },
135
138
  { section: "Clarifying Questions", required: false, validationRule: "Must capture question, answer, and decision impact for each clarifying question." },
136
139
  { section: "Approach Tier", required: true, validationRule: "Must classify depth as lite/standard/deep and explain the risk/uncertainty signal." },
137
140
  { section: "Short-Circuit Decision", required: false, validationRule: "Must include Status/Why/Scope handoff lines when short-circuit is discussed; compact stubs are valid for concrete asks." },
138
- { section: "Approaches", required: true, validationRule: "Must compare 2-3 distinct options with real trade-offs. Use the canonical `Role` column with `baseline` | `challenger` | `wild-card` and the `Upside` column with `low` | `modest` | `high` | `higher`; include exactly one challenger row with `high` or `higher` upside." },
141
+ { section: "Reference Pattern Candidates", required: false, validationRule: "Recommended when examples influence direction: list pattern/source, reusable invariant, accept/reject/defer disposition, and reason before approaches are finalized." },
142
+ { section: "Approaches", required: true, validationRule: "Must compare 2-3 distinct options with real trade-offs. Use the canonical `Role` column with `baseline` | `challenger` | `wild-card` and the `Upside` column with `low` | `modest` | `high` | `higher`; include exactly one challenger row with `high` or `higher` upside, and cite reference-pattern source/disposition when applicable." },
139
143
  { section: "Approach Reaction", required: true, validationRule: "Must appear before Selected Direction and summarize user reaction before recommendation, including `Closest option`, `Concerns`, and what changed after reaction." },
140
144
  { section: "Selected Direction", required: true, validationRule: "Must include the selected approach, explicit approval marker, rationale traceable to Approach Reaction, and scope handoff with decisions, drift, confidence, unresolved questions, risk hints, and non-goals." },
141
145
  { section: "Not Doing", required: false, validationRule: "Recommended: 3-5 explicitly non-committed items (distinct from deferred). Protects scope from silent enlargement and the next stage from rework." },