cclaw-cli 0.51.25 → 0.51.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,120 @@ import { stageExamples } from "./examples.js";
5
5
  import { reviewStackAwareRoutes, reviewStackAwareRoutingSummary, stageAutoSubagentDispatch, stageSchema, stageTrackRenderContext } from "./stage-schema.js";
6
6
  import { conversationLanguagePolicyMarkdown } from "./language-policy.js";
7
7
  import { referencePatternsForStage } from "./reference-patterns.js";
8
+ import { harnessDelegationRecipes } from "../harness-adapters.js";
8
9
  const VERIFICATION_STAGES = ["tdd", "review", "ship"];
10
+ // ---------- Cross-cutting universal mechanics (Layer 2 building blocks) ----------
11
+ //
12
+ // These are shared, structural blocks that get injected into every stage skill.
13
+ // They check structural shape, not domain content. Each has a matching linter
14
+ // rule in `src/artifact-linter.ts` so artifacts can fail when shape is missing.
15
+ export const FORBIDDEN_SYCOPHANCY_PHRASES = [
16
+ "you're absolutely right",
17
+ "great point",
18
+ "absolutely!",
19
+ "thanks for catching",
20
+ "thanks for the great",
21
+ "good catch",
22
+ "love this",
23
+ "nailed it"
24
+ ];
25
+ export const FORBIDDEN_PLACEHOLDER_TOKENS = [
26
+ "TBD",
27
+ "TODO",
28
+ "FIXME",
29
+ "implement later",
30
+ "similar to Task",
31
+ "add appropriate error handling",
32
+ "add proper logging",
33
+ "fill this in",
34
+ "<placeholder>"
35
+ ];
36
+ export const CONFIDENCE_FINDING_REGEX_SOURCE = "\\[P[123]\\]\\s*\\(confidence:\\s*\\d{1,2}/10\\)\\s+[^\\s]+(?::\\d+)?\\s+—";
37
+ export function stopPerIssueBlock() {
38
+ return `## STOP-per-issue Protocol
39
+
40
+ After each critical section (premise / alternatives / mode pick / each review finding), STOP and record one decision marker before continuing:
41
+
42
+ - \`Q<n>:\` — issue or open question
43
+ - \`decision:\` — \`accept\` / \`reject\` / \`defer\` / \`skip — no issues\`
44
+ - \`rationale:\` — one line, evidence-backed
45
+
46
+ Do not batch decisions. Do not silently move on. The artifact MUST contain at least one \`decision:\` marker per critical section.
47
+ `;
48
+ }
49
+ export function confidenceCalibrationBlock() {
50
+ return `## Confidence Calibration
51
+
52
+ Findings, recommendations, and review notes use the calibrated finding format:
53
+
54
+ \`[P1|P2|P3] (confidence: <n>/10) <repo-relative-path>[:<line>] — <one-line description>\`
55
+
56
+ - \`P1\` blocks merge; \`P2\` should be addressed; \`P3\` is nice-to-have.
57
+ - Confidence \`< 7\` — suppress unless severity is \`P1\`.
58
+ - "What evidence would change this?" — every finding must answer it inline or in the next bullet.
59
+ - Never assert "this is fine" without confidence; never assert confidence above \`8\` without a cited artifact, line, or test.
60
+ `;
61
+ }
62
+ export function outsideVoiceSlotBlock() {
63
+ return `## Outside Voice Slot (optional)
64
+
65
+ Reserve a section titled \`## Outside Voice\` (or \`## Outside Voice — <model/critic>\`) for a second-model or fresh-context critic perspective when used. Required shape when present:
66
+
67
+ - \`source:\` — model id, critic agent name, or human reviewer handle
68
+ - \`prompt:\` — exact frame sent (or reference to \`docs/quality-gates.md\` recipe)
69
+ - \`tension:\` — at least one disagreement with the main draft, or \`none — converged\`
70
+ - \`resolution:\` — accepted / rejected / merged / deferred + one-line rationale
71
+
72
+ Empty when not used; do not fabricate an outside voice.
73
+ `;
74
+ }
75
+ export function antiSycophancyBlock() {
76
+ const phrases = FORBIDDEN_SYCOPHANCY_PHRASES.map((p) => `\`${p}\``).join(", ");
77
+ return `## Anti-sycophancy
78
+
79
+ Forbidden response openers when receiving review, critic output, or user feedback: ${phrases}.
80
+
81
+ Replace agreement theater with one of:
82
+
83
+ - \`Verified — <evidence>\` (you actually checked)
84
+ - \`Disagree — <reason>\` (you push back with substance)
85
+ - \`Investigating — <next step>\` (you do not yet know)
86
+
87
+ Never agree before reading the cited evidence. Never apologize for asking a clarifying question.
88
+ `;
89
+ }
90
+ export function noPlaceholdersBlock() {
91
+ const tokens = FORBIDDEN_PLACEHOLDER_TOKENS.map((p) => `\`${p}\``).join(", ");
92
+ return `## NO PLACEHOLDERS Rule
93
+
94
+ Plans, specs, designs, and review artifacts MUST NOT contain placeholder tokens: ${tokens}. Use repo-relative paths and concrete commands; if a value is genuinely unknown, write the open question explicitly with a \`Q<n>:\` marker and a \`decision: defer — <reason>\` row instead of inserting a placeholder token.
95
+ `;
96
+ }
97
+ export function watchedFailProofBlock() {
98
+ return `## Watched-fail Proof
99
+
100
+ Any "the failure is real" claim (failing test, broken build, regression catch, deployment fail) MUST include a watched-fail proof line in the artifact:
101
+
102
+ \`proof: <iso-ts> | <observed snippet — first 200 chars> | source: <command or log path>\`
103
+
104
+ For TDD specifically, this is the watched-RED proof and is required per new test before \`stage-complete\` accepts the stage.
105
+ `;
106
+ }
107
+ function crossCuttingMechanicsBlock(stage) {
108
+ // All stages share the universal mechanics, but each stage's matching
109
+ // linter rules decide what is mandatory vs. structural-only.
110
+ const blocks = [
111
+ stopPerIssueBlock(),
112
+ confidenceCalibrationBlock(),
113
+ outsideVoiceSlotBlock(),
114
+ antiSycophancyBlock(),
115
+ noPlaceholdersBlock()
116
+ ];
117
+ if (stage === "tdd" || stage === "review" || stage === "ship") {
118
+ blocks.push(watchedFailProofBlock());
119
+ }
120
+ return blocks.join("\n");
121
+ }
9
122
  function whenNotToUseBlock(items) {
10
123
  if (items.length === 0) {
11
124
  return "";
@@ -67,14 +180,26 @@ function autoSubagentDispatchBlock(stage, track) {
67
180
  const mandatory = schema.mandatoryDelegations;
68
181
  const mandatoryList = mandatory.length > 0 ? mandatory.map((a) => `\`${a}\``).join(", ") : "none";
69
182
  const delegationLogRel = `${RUNTIME_ROOT}/state/delegation-log.json`;
70
- const artifactRef = `${RUNTIME_ROOT}/artifacts/${schema.artifactRules.artifactFile}`;
183
+ const delegationEventsRel = `${RUNTIME_ROOT}/state/delegation-events.jsonl`;
71
184
  return `## Automatic Subagent Dispatch
72
185
  | Agent | Mode | Class | Return Schema | User Gate | Trigger | Purpose |
73
186
  |---|---|---|---|---|---|---|
74
187
  ${rows}
75
- Mandatory: ${mandatoryList}. Record scheduled/completed/waived lifecycle rows in \`${delegationLogRel}\` before completion.
76
- ### Harness Dispatch Contract
77
- Use true harness dispatch: Claude native Task, Cursor generic dispatch, OpenCode \`.opencode/agents/<agent>.md\`, Codex \`.codex/agents/<agent>.toml\`. Run independent read-only/review agents in parallel where safe, write evidence into \`${artifactRef}\`, then append \`${delegationLogRel}\` rows with matching \`fulfillmentMode: "isolated"\` or \`"generic-dispatch"\`. Each dispatched worker should have a scheduled row and a terminal row sharing \`spanId\`; stale scheduled spans block completion. Do not collapse OpenCode or Codex to role-switch by default; role-switch is degraded fallback and must carry non-empty \`evidenceRefs\`. Missing evidence blocks completion.
188
+ Mandatory: ${mandatoryList}. Record lifecycle rows in \`${delegationLogRel}\` and append-only \`${delegationEventsRel}\` before completion.
189
+ ### Harness Dispatch Contract — use true harness dispatch: Claude Task, Cursor generic dispatch, OpenCode \`.opencode/agents/<agent>.md\` via Task/@agent, Codex \`.codex/agents/<agent>.toml\`. Do not collapse OpenCode or Codex to role-switch by default. Worker ACK Contract: ACK must include \`spanId\`, \`dispatchId\`, \`dispatchSurface\`, \`agentDefinitionPath\`, and \`ackTs\`; never claim \`fulfillmentMode: "isolated"\` without matching lifecycle proof. Helper: \`.cclaw/hooks/delegation-record.mjs --status=<status> --span-id=<spanId> --dispatch-id=<dispatchId> --dispatch-surface=<surface> --agent-definition-path=<path> --json\`. Exact recipe: scheduled -> launched -> acknowledged -> completed with the same span; completed isolated/generic rows require a prior ACK event for that span or \`--ack-ts=<iso>\`.
190
+
191
+ ${perHarnessLifecycleRecipeBlock()}`;
192
+ }
193
+ function perHarnessLifecycleRecipeBlock() {
194
+ const recipes = harnessDelegationRecipes();
195
+ const rows = recipes
196
+ .map((recipe) => `| \`${recipe.harnessId}\` | \`${recipe.dispatchSurface}\` | \`${recipe.agentDefinitionExample}\` | \`${recipe.fulfillmentMode}\` |`)
197
+ .join("\n");
198
+ return `### Per-Harness Lifecycle Recipe — placeholders only
199
+ Reuse the same \`<span-id>\` and \`<dispatch-id>\` across scheduled -> launched -> acknowledged -> completed; substitute neutral tokens \`<agent-name>\`, \`<stage>\`, \`<iso-ts>\`, \`<artifact-anchor>\`. Full command sequences live in \`docs/harnesses.md\`.
200
+ | Harness | Dispatch surface | Agent definition path | fulfillmentMode |
201
+ |---|---|---|---|
202
+ ${rows}
78
203
  `;
79
204
  }
80
205
  function researchPlaybooksBlock(playbooks) {
@@ -248,8 +373,9 @@ function completionParametersBlock(schema, track) {
248
373
  - \`completion helper\`: \`node .cclaw/hooks/stage-complete.mjs ${schema.stage}\`
249
374
  - \`completion helper with evidence\`: \`node .cclaw/hooks/stage-complete.mjs ${schema.stage} --evidence-json '{"<gate_id>":"<evidence note>"}' --passed=<gate_id>[,<gate_id>]\`
250
375
  - \`completion helper JSON diagnostics\`: append \`--json\` to receive a machine-readable validation failure summary.
376
+ - \`delegation record helper\`: \`node .cclaw/hooks/delegation-record.mjs --stage=${schema.stage} --agent=<agent> --mode=<mandatory|proactive> --status=<scheduled|launched|acknowledged|completed|failed|waived|stale> --span-id=<spanId> --dispatch-id=<dispatchId> --dispatch-surface=<surface> --agent-definition-path=<path> --json\`. \`delegation helper recipe\`: call \`--status=scheduled\`, then \`--status=launched\`, then \`--status=acknowledged\`, then \`--status=completed\` with the same \`--span-id\`, \`--dispatch-id\`, \`--dispatch-surface\`, and \`--agent-definition-path\`; completed isolated/generic rows fail unless that same span already has an acknowledged event or the completed call includes \`--ack-ts=<iso>\`. For role-switch fallback, use \`--dispatch-surface=role-switch --evidence-ref=<artifact#anchor>\` instead of pretending isolated completion.
251
377
  - Fill \`## Learnings\` before closeout: either \`- None this stage.\` or JSON bullets with required keys \`type\`, \`trigger\`, \`action\`, \`confidence\` (knowledge-schema compatible).
252
- - Record mandatory delegation completion/waiver in \`${RUNTIME_ROOT}/state/delegation-log.json\` with rationale as needed.${mandatoryAgents.length > 0 ? ` If a mandatory delegation cannot run in this harness, use \`--waive-delegation=${mandatoryAgents.join(",")} --waiver-reason="<why safe>"\` on the completion helper.` : ""}
378
+ - Record mandatory delegation lifecycle in \`${RUNTIME_ROOT}/state/delegation-log.json\` and append proof events to \`${RUNTIME_ROOT}/state/delegation-events.jsonl\`; the ledger is current state, the event log is audit proof.${mandatoryAgents.length > 0 ? ` If a mandatory delegation cannot run in this harness, use \`--waive-delegation=${mandatoryAgents.join(",")} --waiver-reason="<why safe>"\` on the completion helper.` : ""}
253
379
  - Never edit raw \`flow-state.json\` to complete a stage, even in advisory mode; that bypasses validation, gate evidence, and Learnings harvest. If the helper fails, stop and report the exact command/output instead of applying a manual state workaround.
254
380
  - Completion protocol: verify required gates, update the artifact, then use the completion helper with \`--evidence-json\` and \`--passed\` for every satisfied gate.
255
381
  `;
@@ -437,6 +563,7 @@ ${interactionFocus.length > 0 ? interactionFocus.map((item, i) => `${i + 1}. ${i
437
563
  Decision protocol: ask only decision-changing questions, record the chosen option, rationale, risk, and rollback when the stage makes a non-trivial call.
438
564
 
439
565
  ${batchExecutionModeBlock(stage, track)}
566
+ ${crossCuttingMechanicsBlock(stage)}
440
567
  ## Required Gates
441
568
  ${gateList}
442
569
 
@@ -11,7 +11,7 @@ export const BRAINSTORM = {
11
11
  philosophy: {
12
12
  hardGate: "Do NOT invoke implementation skills, write code, scaffold projects, or mutate product behavior until a concrete direction is approved by the user.",
13
13
  ironLaw: "NO ARTIFACT IS COMPLETE WITHOUT AN EXPLICITLY APPROVED DIRECTION — SILENCE IS NOT APPROVAL.",
14
- purpose: "Turn an initial idea into an approved problem frame and direction, using product or technical-maintenance discovery before proposing solutions.",
14
+ purpose: "Turn an initial idea into an approved problem frame and direction, using domain-neutral problem discovery (product, technical-maintenance, research, ops, or infrastructure framing) before proposing solutions.",
15
15
  whenToUse: [
16
16
  "Starting a new feature or behavior change",
17
17
  "Requirements are ambiguous or trade-offs are unclear",
@@ -37,8 +37,8 @@ export const BRAINSTORM = {
37
37
  executionModel: {
38
38
  checklist: [
39
39
  "**Explore project context** — inspect existing files/docs/recent activity before asking what to build; capture matching files/patterns/seeds in `Context > Discovered context` so downstream stages don't redo discovery.",
40
- "**Classify stage depth** — choose `lite` for clear low-risk tasks, `standard` for normal product/engineering changes, or `deep` for ambiguity, architecture, external dependency, security/data risk, or explicit think-bigger requests.",
41
- "**Write the Problem Decision Record** — product work captures persona/JTBD/pain/value/evidence/success/why-now/do-nothing/non-goals; technical-maintenance work captures affected operator/developer, failure mode, operational improvement, verification signal, do-nothing cost, and non-goals.",
40
+ "**Classify stage depth** — choose `lite` for clear low-risk tasks, `standard` for normal engineering/product changes, or `deep` for ambiguity, architecture, external dependency, security/data risk, or explicit think-bigger requests.",
41
+ "**Write the Problem Decision Record** — pick a free-form `Frame type` label that names how this work is framed (examples: product, technical-maintenance, research-spike, ops-incident, infrastructure), then fill the universal Framing fields: affected user/role/operator, current state/failure mode/opportunity, desired observable outcome, evidence/signal, why now, do-nothing consequence, and non-goals.",
42
42
  "**Premise check (one pass)** — answer the three gstack-style questions in the artifact body: *Right problem? Direct path? What if we do nothing?* Take a position; do not hedge.",
43
43
  "**Reframe with How Might We** — write a single `How Might We …?` line that names the user/operator, the desired outcome, and the constraint. This is the altitude check before approaches.",
44
44
  "**Run Clarity Gate** — record ambiguity score (0.00-1.00), decision boundaries, reaffirmed non-goals, and residual-risk handoff before locking recommendations. If ambiguity remains high (>0.40), ask one decision-changing question before recommending.",
@@ -81,7 +81,7 @@ export const BRAINSTORM = {
81
81
  requiredEvidence: [
82
82
  "Artifact written to `.cclaw/artifacts/01-brainstorm-<slug>.md`.",
83
83
  "Project context was explored (files, docs, or recent activity referenced).",
84
- "Problem Decision Record includes product framing or technical-maintenance framing.",
84
+ "Problem Decision Record includes a `Frame type` label and the universal Framing fields (affected user/role/operator, current state/failure mode/opportunity, desired observable outcome, evidence/signal, why now, do-nothing consequence, non-goals).",
85
85
  "Clarity Gate records ambiguity score, decision boundaries, reaffirmed non-goals, and residual-risk handoff.",
86
86
  "Clarifying questions are one-at-a-time and captured only when they change a decision or stop condition.",
87
87
  "2-3 approaches with trade-offs are recorded, including one higher-upside challenger option and reference-pattern source/disposition when applicable.",
@@ -130,7 +130,7 @@ export const BRAINSTORM = {
130
130
  },
131
131
  artifactValidation: [
132
132
  { section: "Context", required: true, validationRule: "Must reference project state and relevant existing code or patterns. A `Discovered context` subsection (or list) is recommended for downstream traceability." },
133
- { section: "Problem Decision Record", required: true, validationRule: "Must include either product framing fields (persona/JTBD/pain/value/evidence/success/why-now/do-nothing/non-goals) or technical-maintenance fields (operator/developer, failure mode, operational improvement, verification signal, do-nothing cost, non-goals)." },
133
+ { section: "Problem Decision Record", required: true, validationRule: "Must include a free-form `Frame type` label (examples only: product, technical-maintenance, research-spike, ops-incident, infrastructure) and the universal Framing fields: affected user/role/operator, current state/failure mode/opportunity, desired observable outcome, evidence/signal, why now, do-nothing consequence, non-goals. The linter checks that the section has meaningful content; the field labels themselves are the structural contract." },
134
134
  { section: "Premise Check", required: false, validationRule: "Recommended: explicit answers to `Right problem?`, `Direct path?`, `What if we do nothing?` — take a position, do not hedge." },
135
135
  { section: "How Might We", required: false, validationRule: "Recommended: a single `How Might We …?` line naming the user, the outcome, and the binding constraint." },
136
136
  { section: "Clarity Gate", required: false, validationRule: "Recommended before recommendation lock: include ambiguity score (0.00-1.00), decision boundaries, reaffirmed non-goals, and residual-risk handoff for scope." },
@@ -5,6 +5,12 @@ function flowStatePath() {
5
5
  function delegationLogPath() {
6
6
  return `${RUNTIME_ROOT}/state/delegation-log.json`;
7
7
  }
8
+ function delegationEventsPath() {
9
+ return `${RUNTIME_ROOT}/state/delegation-events.jsonl`;
10
+ }
11
+ function subagentsPath() {
12
+ return `${RUNTIME_ROOT}/state/subagents.json`;
13
+ }
8
14
  function knowledgePath() {
9
15
  return `${RUNTIME_ROOT}/knowledge.jsonl`;
10
16
  }
@@ -27,7 +33,7 @@ advancing or mutating anything. Safe to run at any point. The snapshot reflects:
27
33
  - progress across stages with per-stage markers,
28
34
  - gate coverage,
29
35
  - mandatory delegations with **fulfillmentMode** (isolated / generic-dispatch /
30
- role-switch) plus explicit waived status and evidence gate,
36
+ role-switch), dispatch proof fields, explicit waived status, and evidence gate,
31
37
  - **closeout substate** after ship (retro → compound → archive),
32
38
  - **harness parity row** (tier + fallback) for the active harness set.
33
39
 
@@ -85,7 +91,7 @@ a read-only command.
85
91
  \`Current\`, \`Stage\`, \`Gates\`, \`Delegations\`, \`Blocked by\`, \`Next\`, \`Evidence needed\`.
86
92
  - When blocked, include a plain-English action block:
87
93
  \`Current: <stage or closeout substate>\`; \`Blocked by: <gate/delegation/blocker code>\`; \`Next: <exact command or managed remediation>\`; \`Evidence needed: <artifact/test/review/delegation evidence>\`.
88
- - Report counts, not full artifact contents.
94
+ - Report counts, not full artifact contents. Include active subagent count from \`${subagentsPath()}\` and proof gaps from \`${delegationEventsPath()}\` when present.
89
95
  - If any data source is missing or corrupt, say so explicitly rather than guessing.
90
96
  - Include \`/cc-view tree\` for deep structure and \`/cc-view diff\` for before/after map in the final line.
91
97
 
@@ -175,6 +175,10 @@ Borrow the good part of Team/Ruflo-style orchestration without adding a swarm ru
175
175
  - **Checkpoint before synthesis.** Each agent returns status, files inspected/changed, evidence, and blockers before the parent acts.
176
176
  - **Consensus is for hard calls only.** Use two reviewers when severity or architecture is disputed; otherwise one evidence-backed reviewer is enough.
177
177
 
178
+ ## Parallelization Decision Gate
179
+
180
+ Before parallel dispatch, answer yes to all gates: tasks are independent, write sets do not overlap, outputs can be reconciled by evidence, and failure in one lane will not invalidate hidden assumptions in another. If any answer is no, serialize. Coder/overseer work is contract-first: the coder implements only the pasted contract, the overseer reads code and verifies acceptance evidence before the controller marks work complete.
181
+
178
182
  ## When to Use
179
183
 
180
184
  - Mid/large plans with multiple discrete tasks, dependencies, or risky overlap.
@@ -1013,9 +1017,9 @@ Two patterns (skills under \`.cclaw/skills/\`):
1013
1017
  - **SDD** (subagent-driven-development): sequential implementer→reviewer loops. Paste self-contained task text; never point subagents at plan files.
1014
1018
  - **Parallel Agents** (dispatching-parallel-agents): parallel review/analysis lenses. Never parallelize implementers on same codebase.
1015
1019
 
1016
- Status contract: DONE | DONE_WITH_CONCERNS | NEEDS_CONTEXT | BLOCKED. Worker returns must use the strict JSON schemas in \`subagent-driven-development\`.
1020
+ Status contract: ACK first, then DONE | DONE_WITH_CONCERNS | NEEDS_CONTEXT | BLOCKED. Worker returns must use the strict JSON schemas in \`subagent-driven-development\` and include matching spanId+dispatchId proof.
1017
1021
 
1018
- - Controller sequentially dispatches **implementer → reviewer** loops per task.
1022
+ - Controller sequentially dispatches **implementer → reviewer** loops per task and records lifecycle events in \`.cclaw/state/delegation-events.jsonl\`.
1019
1023
  - HARD-GATE: paste **self-contained task text**; never point subagents at plan files to “discover” scope.
1020
1024
  - **Review fixers** are **fresh agents** after failed review passes — avoids parent-context pollution.
1021
1025
  - **Machine-only flow checks auto-dispatch** by stage (design/plan/tdd/review/ship) without asking the user to trigger each specialist manually.