npm - cclaw-cli - Versions diffs - 8.2.0 → 8.4.0 - Mend

cclaw-cli 8.2.0 → 8.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +33 -4
package/dist/constants.d.ts +1 -1
package/dist/constants.js +1 -1
package/dist/content/antipatterns.d.ts +1 -1
package/dist/content/antipatterns.js +24 -0
package/dist/content/skills.js +560 -42
package/dist/content/specialist-prompts/architect.d.ts +1 -1
package/dist/content/specialist-prompts/architect.js +8 -1
package/dist/content/specialist-prompts/brainstormer.d.ts +1 -1
package/dist/content/specialist-prompts/brainstormer.js +3 -0
package/dist/content/specialist-prompts/planner.d.ts +1 -1
package/dist/content/specialist-prompts/planner.js +48 -2
package/dist/content/specialist-prompts/reviewer.d.ts +1 -1
package/dist/content/specialist-prompts/reviewer.js +185 -42
package/dist/content/specialist-prompts/security-reviewer.d.ts +1 -1
package/dist/content/specialist-prompts/security-reviewer.js +3 -0
package/dist/content/specialist-prompts/slice-builder.d.ts +1 -1
package/dist/content/specialist-prompts/slice-builder.js +5 -2
package/dist/content/start-command.js +266 -27
package/dist/flow-state.d.ts +20 -1
package/dist/flow-state.js +44 -2
package/dist/types.d.ts +40 -0
package/dist/types.js +14 -0
package/package.json +1 -1

package/dist/content/start-command.js CHANGED Viewed

@@ -1,7 +1,30 @@
 import { CORE_AGENTS } from "./core-agents.js";
 import { ironLawsMarkdown } from "./iron-laws.js";
 const SPECIALIST_LIST = CORE_AGENTS.map((agent) => `- **${agent.id}** (${agent.modes.join(" / ")}) — ${agent.description}`).join("\n");
-const TRIAGE_BLOCK_EXAMPLE = `\`\`\`
+const TRIAGE_ASK_EXAMPLE = `\`\`\`
+askUserQuestion(
+  prompt: "Triage — Complexity: small/medium (high). Recommended: plan → build → review → ship. Why: 3 modules, ~150 LOC, no auth touch. AC mode: soft. Pick a path.",
+  options: [
+    "Proceed as recommended",
+    "Switch to trivial (inline edit + commit, skip plan/review)",
+    "Escalate to large-risky (add brainstormer/architect, strict AC, parallel slices)",
+    "Custom (let me edit complexity / acMode / path)"
+  ],
+  multiSelect: false
+)
+# After the user picks, ask the second question:
+askUserQuestion(
+  prompt: "Run mode for this flow?",
+  options: [
+    "Step (default) — pause after every stage; I type \\"continue\\" to advance",
+    "Auto — chain plan → build → review → ship; stop only on block findings or security flag"
+  ],
+  multiSelect: false
+)
+\`\`\``;
+const TRIAGE_FALLBACK_EXAMPLE = `\`\`\`
 Triage
 ─ Complexity: small/medium  (confidence: high)
 ─ Recommended path: plan → build → review → ship
@@ -12,6 +35,12 @@ Triage
 [2] Switch to trivial (inline edit + commit, skip plan/review)
 [3] Escalate to large-risky (add brainstormer/architect, strict AC, parallel slices)
 [4] Custom (let me edit complexity / acMode / path)
+\`\`\`
+\`\`\`
+Run mode
+[s] Step — pause after every stage (default)
+[a] Auto — chain stages; stop only on block findings or security flag
 \`\`\``;
 const TRIAGE_PERSIST_EXAMPLE = `\`\`\`json
 {
@@ -21,7 +50,8 @@ const TRIAGE_PERSIST_EXAMPLE = `\`\`\`json
     "path": ["plan", "build", "review", "ship"],
     "rationale": "3 modules, ~150 LOC, no auth touch.",
     "decidedAt": "2026-05-08T12:34:56Z",
-    "userOverrode": false
+    "userOverrode": false,
+    "runMode": "step"
   }
 }
 \`\`\``;
@@ -62,6 +92,7 @@ Stage: <stage>  ✅ complete  |  ⏸ paused  |  ❌ blocked
 Artifact: .cclaw/flows/<slug>/<stage>.md
 What changed: <one sentence; e.g. "5 testable conditions written" or "AC-1 RED+GREEN+REFACTOR committed">
 Open findings: <0 outside review; integer in review>
+Confidence: <high | medium | low>
 Recommended next: <continue | review-pause | fix-only | cancel>
 \`\`\``;
 export const START_COMMAND_BODY = `# /cc — cclaw orchestrator
@@ -70,15 +101,16 @@ You are the **cclaw orchestrator**. Your job is to *coordinate*: detect what flo
 User input: ${"`{{TASK}}`"}.
-The flow has five hops, in order:
+The flow has six hops, in order:
 1. **Detect** — fresh \`/cc\` or resume?
 2. **Triage** — only on fresh starts; classify and confirm with the user.
-3. **Dispatch** — for each stage on the chosen path, hand off to a sub-agent.
-4. **Pause** — after each stage, summarise and wait for "continue" / "show" / "cancel".
-5. **Ship** — last hop on \`small/medium\` and \`large-risky\` paths; \`trivial\` skips this.
+3. **Pre-flight (Hop 2.5)** — only on fresh starts AND only when the path is not \`inline\`; surface 3-7 assumptions; user confirms before any specialist runs.
+4. **Dispatch** — for each stage on the chosen path, hand off to a sub-agent.
+5. **Pause** — after each stage, summarise and wait for "continue" / "show" / "cancel".
+6. **Ship + Compound** — last hops on \`small/medium\` and \`large-risky\` paths; \`trivial\` skips both.
-Skipping any hop is a bug; the gates downstream will fail. Read \`triage-gate.md\`, \`flow-resume.md\`, \`tdd-cycle.md\` (active during build), and \`ac-traceability.md\` (active in strict mode) before starting.
+Skipping any hop is a bug; the gates downstream will fail. Read \`triage-gate.md\`, \`pre-flight-assumptions.md\`, \`flow-resume.md\`, \`tdd-cycle.md\` (active during build), and \`ac-traceability.md\` (active in strict mode) before starting.
 ## Hop 1 — Detect
@@ -101,21 +133,29 @@ Do not auto-delete state. Do not hand-edit the JSON.
 ## Hop 2 — Triage (fresh starts only)
-Run the \`triage-gate.md\` skill. The output is a single fenced block followed by four numbered options:
+Run the \`triage-gate.md\` skill. **Use the harness's structured question tool** (\`AskUserQuestion\` in Claude Code, \`AskQuestion\` in Cursor, the "ask" content block in OpenCode, \`prompt\` in Codex). Two questions, in order:
+${TRIAGE_ASK_EXAMPLE}
+The first question's prompt MUST embed the four heuristic facts (complexity + confidence, recommended path, why, AC mode) so the user can decide without reading another block. Keep it under 280 characters; truncate the rationale before truncating the facts.
-${TRIAGE_BLOCK_EXAMPLE}
+The second question is skipped on the trivial / inline path (no stages to chain). Default \`runMode\` is \`step\` if the user dismisses the question.
-Wait for the user's pick. Then patch \`flow-state.json\`:
+If the harness lacks a structured ask facility, fall back to the legacy form:
+${TRIAGE_FALLBACK_EXAMPLE}
+Once both answers are in, patch \`flow-state.json\`:
 ${TRIAGE_PERSIST_EXAMPLE}
-The triage decision is **immutable** for the lifetime of the flow. If the user wants a different acMode mid-flight, the path is \`/cc-cancel\` and a fresh \`/cc\` invocation.
+The triage decision is **immutable** for the lifetime of the flow. If the user wants a different acMode or runMode mid-flight, the path is \`/cc-cancel\` and a fresh \`/cc\` invocation.
-After triage, the rest of the orchestrator runs the stages listed in \`triage.path\`, in order, pausing between each.
+After triage, the rest of the orchestrator runs the stages listed in \`triage.path\`, in order. Pause behaviour between stages is controlled by \`triage.runMode\` — see Hop 4. Before the first dispatch, run **Hop 2.5 (pre-flight)** unless the path is \`inline\`.
 ### Trivial path (acMode: inline)
-\`triage.path\` is \`["build"]\`. Skip plan/review/ship. Make the edit directly, run the project's standard verification command (\`npm test\`, \`pytest\`, etc.) once if there is one, commit with plain \`git commit\`. Single message back to the user with the commit SHA. Done.
+\`triage.path\` is \`["build"]\`. Skip plan/review/ship — and skip pre-flight (Hop 2.5) along with them. Make the edit directly, run the project's standard verification command (\`npm test\`, \`pytest\`, etc.) once if there is one, commit with plain \`git commit\`. Single message back to the user with the commit SHA. Done.
 This is the only path where the orchestrator writes code itself; everything else dispatches a sub-agent.
@@ -125,7 +165,32 @@ Run the \`flow-resume.md\` skill. Render the resume summary:
 ${RESUME_SUMMARY_EXAMPLE}
-Wait for r/s/c (and n on collision). On \`r\`, jump to Hop 3 with the saved \`currentStage\`. On \`s\`, open the artifact and stop. On \`c\`, run \`/cc-cancel\` semantics (move artifacts to \`cancelled/<slug>/\`, reset state).
+Wait for r/s/c (and n on collision). On \`r\`, jump to Hop 4 with the saved \`currentStage\` — pre-flight is **not** re-run on resume; the saved \`triage.assumptions\` is read from disk. On \`s\`, open the artifact and stop. On \`c\`, run \`/cc-cancel\` semantics (move artifacts to \`cancelled/<slug>/\`, reset state).
+## Hop 2.5 — Pre-flight (fresh starts on non-inline paths)
+Run the \`pre-flight-assumptions.md\` skill. Surface 3-7 numbered assumptions covering stack, conventions, architecture defaults, and out-of-scope items. Use the harness's structured ask tool with four options (\`Proceed\` / \`Edit one\` / \`Edit several\` / \`Cancel\`); fall back to a fenced block only when no structured ask is available.
+\`\`\`
+Pre-flight — I'm about to run with these assumptions:
+1. <stack: lang version, framework, runtime>  (read from <file>)
+2. <test convention: location + filename pattern>  (read from <file or shipped slug>)
+3. <architecture default 1>
+4. <architecture default 2>
+5. <out-of-scope default>
+Correct me now or I proceed with these.
+\`\`\`
+Persist the user-confirmed list to \`flow-state.json\` under \`triage.assumptions\` (string array). The list is **immutable** for the lifetime of the flow.
+Skip rules:
+- \`triage.path == ["build"]\` (inline) → skip Hop 2.5 entirely.
+- Resume from a paused flow → skip Hop 2.5 (saved \`assumptions\` is already on disk).
+- \`flow-state.json\` already has \`triage.assumptions\` populated (mid-flight resume) → read but do not re-prompt.
+Every dispatch envelope from Hop 3 onward includes the line \`Pre-flight assumptions: see triage.assumptions in flow-state.json\`. Sub-agents read the list; planner and architect copy it verbatim into their artifacts.
 ## Hop 3 — Dispatch
@@ -175,22 +240,89 @@ The orchestrator reads only this. The full artifact stays in \`.cclaw/flows/<slu
 #### plan
 - Specialist: \`planner\`.
-- Inputs: triage decision, the user's original prompt, \`.cclaw/lib/templates/plan.md\`, and any matching shipped slug if refining.
-- Output: \`.cclaw/flows/<slug>/plan.md\` with \`status: active\`.
+- Inputs: triage decision (including \`assumptions\` from Hop 2.5), the user's original prompt, \`.cclaw/lib/templates/plan.md\`, **\`.cclaw/knowledge.jsonl\`** (append-only log of every shipped slug — planner reads up to 3 relevant prior entries and copies their lessons into the plan body), and any matching shipped slug if refining.
+- Output: \`.cclaw/flows/<slug>/plan.md\` with \`status: active\`. Includes a \`## Assumptions\` block (verbatim from triage) and a \`## Prior lessons\` block (1-3 cross-flow lessons or "No prior shipped slugs apply to this task.").
 - Soft-mode plan body: bullet list of testable conditions, no AC IDs, no commit-trace block.
 - Strict-mode plan body: AC table with IDs, verification lines, touch surfaces, parallel-build topology if it applies.
-- Slim summary: condition / AC count, max touch surface, parallel-build flag, recommended-next.
+- Slim summary: condition / AC count, max touch surface, parallel-build flag, recommended-next, prior-lesson count.
 #### build
 - Specialist: \`slice-builder\`.
 - Inputs: \`.cclaw/flows/<slug>/plan.md\`, \`.cclaw/lib/templates/build.md\`, \`.cclaw/lib/skills/tdd-cycle.md\`.
 - Output: \`.cclaw/flows/<slug>/build.md\` with TDD evidence at the granularity dictated by \`acMode\`.
-- Strict mode: full RED → GREEN → REFACTOR per AC, every commit through \`commit-helper.mjs\`. Parallel-build only if planner declared it AND \`acMode == strict\`.
-- Soft mode: one TDD cycle for the whole feature; tests under \`tests/\` mirroring the production module path; plain \`git commit\`.
+- Soft mode: one TDD cycle for the whole feature; tests under \`tests/\` mirroring the production module path; plain \`git commit\`. Sequential, single dispatch, no worktrees.
+- Strict mode, sequential: full RED → GREEN → REFACTOR per AC, every commit through \`commit-helper.mjs\`. Single \`slice-builder\` dispatch in the main working tree.
+- Strict mode, parallel: see "Parallel-build fan-out" below — only when planner declared \`topology: parallel-build\` AND ≥4 AC AND ≥2 disjoint touchSurface clusters.
 - Inline mode: not dispatched here — handled in the trivial path of Hop 2.
 - Slim summary: AC committed (strict) or conditions verified (soft), suite-status (passed / failed), open follow-ups.
+##### Parallel-build fan-out (strict mode + planner topology=parallel-build only)
+When the planner artifact declares \`topology: parallel-build\` with ≥2 slices and \`acMode == strict\`, the orchestrator fans out one \`slice-builder\` sub-agent per slice, **capped at 5**, each in its own \`git worktree\`. This is the only fan-out cclaw uses outside of \`ship\`.
+\`\`\`text
+                                  flows/<slug>/plan.md
+                                  topology: parallel-build
+                                  slices: [s-1, s-2, s-3]   (max 5)
+                                              │
+                                              ▼
+                            git worktree add .cclaw/worktrees/<slug>-s-1 -b cclaw/<slug>/s-1
+                            git worktree add .cclaw/worktrees/<slug>-s-2 -b cclaw/<slug>/s-2
+                            git worktree add .cclaw/worktrees/<slug>-s-3 -b cclaw/<slug>/s-3
+                                              │
+                          ┌───────────────────┼───────────────────┐
+                          ▼                   ▼                   ▼
+                   slice-builder         slice-builder         slice-builder
+                   (s-1; AC-1, AC-2)     (s-2; AC-3)           (s-3; AC-4, AC-5)
+                   cwd: …/<slug>-s-1      cwd: …/<slug>-s-2     cwd: …/<slug>-s-3
+                   RED→GREEN→REFACTOR     RED→GREEN→REFACTOR    RED→GREEN→REFACTOR
+                   per AC, in slice       per AC, in slice      per AC, in slice
+                          │                   │                   │
+                          └───────────────────┼───────────────────┘
+                                              ▼
+                                  reviewer (mode=integration)
+                                  reads each branch, checks
+                                  cross-slice conflicts, AC↔commit
+                                  chain across the wave
+                                              │
+                                              ▼
+                          merge cclaw/<slug>/s-1 → main, then s-2, then s-3
+                          (fast-forward when wave was clean; otherwise stop and ask)
+                                              │
+                                              ▼
+                          git worktree remove .cclaw/worktrees/<slug>-s-N (per slice)
+\`\`\`
+Dispatch envelope per slice:
+\`\`\`
+Dispatch slice-builder
+─ Stage: build
+─ Slug: <slug>
+─ Slice: s-N  (acIds: [AC-N, AC-N+1])
+─ Working tree: .cclaw/worktrees/<slug>-s-N
+─ Branch: cclaw/<slug>/s-N
+─ AC mode: strict
+─ Touch surface (only paths this slice may modify): [<paths from plan>]
+─ Output: .cclaw/flows/<slug>/build.md (append, marked with slice id)
+─ Forbidden: read or modify any path outside touch surface; read another slice's worktree mid-flight; merge or rebase
+\`\`\`
+After every slice-builder returns:
+1. Patch \`flow-state.json\` with the per-slice progress.
+2. When **every** slice has reported, dispatch \`reviewer\` mode=\`integration\` (one sub-agent, reads from each branch).
+3. On clear integration review, merge slices into main one at a time. On block, dispatch \`slice-builder\` mode=\`fix-only\` against the cited file:line refs, then re-run the integration reviewer.
+4. Worktree cleanup happens after merge; the cclaw branches stay until ship.
+Hard rules:
+- **More than 5 parallel slices is forbidden.** If planner produced >5, the planner must merge thinner slices into fatter ones before build; do not generate "wave 2".
+- Slice-builders never read each other's worktrees mid-flight. A slice that detects a conflict with another stops and raises an integration finding.
+- If the harness lacks sub-agent dispatch or worktree creation fails (non-git repo, permissions), parallel-build degrades silently to inline-sequential. Record the fallback in \`flows/<slug>/build.md\` frontmatter (\`subAgentDispatch: inline-fallback\`) — not an error.
+- \`auto\` runMode does **not** affect the integration-reviewer ask: a parallel wave that produces a block finding always asks the user before fix-only.
 #### review
 - Specialist: \`reviewer\` (mode = \`code\` for sequential build, \`integration\` for parallel-build).
@@ -202,11 +334,75 @@ The orchestrator reads only this. The full artifact stays in \`.cclaw/flows/<slu
 #### ship
-- Specialist: \`reviewer\` mode=\`release\` AND \`security-reviewer\` mode=\`threat-model\` if \`security_flag\` is true.
-- Pattern: **parallel fan-out + merge** (the only fan-out cclaw uses). Dispatch both specialists in the same message; merge their summaries in your context.
+- Specialists fanned out in parallel (the only fan-out cclaw uses):
+  - \`reviewer\` mode=\`release\` — always.
+  - \`reviewer\` mode=\`adversarial\` — **strict mode only** (see below).
+  - \`security-reviewer\` mode=\`threat-model\` — when \`security_flag\` is true.
+- Pattern: **parallel fan-out + merge** (the canonical cclaw fan-out). Dispatch all specialists in the same message; merge their summaries in your context.
 - Inputs: \`.cclaw/flows/<slug>/plan.md\`, build.md, review.md.
-- Output: \`.cclaw/flows/<slug>/ship.md\` with the go/no-go decision, AC↔commit map (strict) or condition checklist (soft), release notes, and rollback plan.
-- After ship, run the compound learning gate (Hop 5).
+- Output: \`.cclaw/flows/<slug>/ship.md\` with the go/no-go decision, AC↔commit map (strict) or condition checklist (soft), release notes, and rollback plan. Plus, in strict mode, \`.cclaw/flows/<slug>/pre-mortem.md\` written by the adversarial reviewer (see below).
+- After ship, run the compound learning gate (Hop 6).
+##### Adversarial pre-mortem (strict mode only)
+Before the ship gate finalises, the orchestrator dispatches \`reviewer\` mode=\`adversarial\` against the diff produced for this slug. The adversarial reviewer's specific job is to **think like the failure**: how would this break in production a week from now?
+The adversarial sweep produces \`.cclaw/flows/<slug>/pre-mortem.md\`:
+\`\`\`markdown
+---
+slug: <slug>
+stage: ship
+status: pre-mortem
+generated_by: reviewer mode=adversarial
+generated_at: <iso>
+---
+# Pre-mortem — <slug>
+It is now <ship-date>+7d. This change shipped, then failed. What was the failure?
+## Most likely failure modes
+1. **<class>: <one-line failure>** — trigger: <input/condition>; impact: <user-visible result>; covered by AC: <yes/no, AC-N or "no AC tests this">.
+2. **<class>: ...**
+3. ...
+## Underexplored axes
+- <axis (correctness/readability/architecture/security/perf)>: <what reviewer's code-mode pass might have missed>
+- ...
+## Recommended pre-ship actions
+- <add a regression test for failure 1: file:line>
+- <surface decision X to the user before merge>
+- <none — pre-mortem is satisfied>
+\`\`\`
+Failure classes the adversarial pass MUST consider (mark each as "covered" / "not covered" / "n/a"):
+- **data-loss** — write paths that could lose user data on rollback or partial failure;
+- **race** — concurrent operations on shared state without locking / ordering guarantees;
+- **regression** — prior-shipped behaviour an existing test does not pin;
+- **rollback impossibility** — schema migration / persisted state shape that cannot be reverted;
+- **accidental scope** — diff touches files no AC mentions;
+- **security-edge** — auth bypass, injection, leaked secret in logs, untrusted input.
+The adversarial reviewer treats every "not covered" as a finding (axis varies; severity \`required\` by default, escalated to \`critical\` for data-loss / security-edge). Findings go into the existing Concern Ledger in \`review.md\`; the pre-mortem.md is a parallel artifact summarising the adversarial pass's reasoning so the user can read a one-page rationale.
+Ship gate decision after fan-out:
+| reviewer:release | reviewer:adversarial | security-reviewer | gate |
+| --- | --- | --- | --- |
+| clear | clear | clear | clear → ship may proceed |
+| clear | block | any | block → fix-only loop or user override |
+| any | any | block | block → fix-only loop |
+| clear | warn | clear | warn → render adversarial findings, ask user |
+The adversarial pass runs **once per ship attempt**, not iteratively. If it produces \`block\`-level findings, the orchestrator dispatches \`slice-builder\` mode=\`fix-only\` and re-runs the **regular** reviewer (mode=\`code\`) to confirm the fix; the adversarial pass does not re-run unless the user explicitly requests it (the marginal value drops fast on second run).
+In \`soft\` mode the adversarial pass is **skipped** by default — the lighter-weight regular reviewer is enough for small/medium work. The user can opt in with \`/cc <task> --adversarial\` if they want the extra sweep regardless.
 ### Discovery (large-risky only)
@@ -220,6 +416,10 @@ Each step is a separate dispatch + pause + slim summary. The user can stop after
 ## Hop 4 — Pause and resume
+Pause behaviour depends on \`triage.runMode\` (default \`step\`).
+### \`step\` mode (default; safer; recommended for \`strict\` work)
 After every dispatch returns:
 1. Render the slim summary back to the user.
@@ -227,7 +427,42 @@ After every dispatch returns:
 3. Wait. Do **not** auto-advance. The user types \`continue\`, \`show\`, \`fix-only\`, or \`cancel\`.
 4. On \`continue\` → next stage in \`triage.path\`. On \`show\` → open the artifact and stop. On \`fix-only\` → re-dispatch slice-builder with mode=fix-only and the cited findings. On \`cancel\` → \`/cc-cancel\`.
-Resume from a fresh session works because everything is on disk: \`flow-state.json\` has \`currentStage\` and \`triage\`, \`flows/<slug>/*.md\` carries the artifacts. The next \`/cc\` invocation enters Hop 1 → detect → resume summary → continue from \`currentStage\`.
+### \`auto\` mode (autopilot; faster; recommended for \`inline\` / \`soft\` work)
+After every dispatch returns:
+1. Render the slim summary back to the user (one block, no prompt).
+2. **Immediately** dispatch the next stage in \`triage.path\` — no waiting, no question.
+3. Stop unconditionally only on these hard gates (autopilot **always** asks here):
+   - \`reviewer\` returned \`block\` decision (open findings) → render the findings, ask \`continue with fix-only\` / \`cancel\`.
+   - \`security-reviewer\` raised any finding → ask before proceeding.
+   - \`reviewer\` returned \`cap-reached\` (5 iterations without convergence) → ask.
+   - **A returned slim summary has \`Confidence: low\`** → ask before proceeding (covered in detail below).
+   - About to run \`ship\` (last stage in \`triage.path\`) → ask \`ship now?\` once, then proceed on confirmation. Ship is the only stage that always confirms in autopilot.
+Auto mode never silently skips a hard gate; it just removes the cosmetic pause between green stages. The user typed \`auto\` once during triage and meant it.
+### Confidence as a hard gate (both modes)
+Every slim summary carries a \`Confidence: high | medium | low\` line. The orchestrator reads it and treats it as a quality signal for the dispatch that just returned, not a prediction of the next stage:
+| Confidence | step mode | auto mode |
+| --- | --- | --- |
+| \`high\` | normal pause; render summary, ask continue | normal flow; chain to next stage |
+| \`medium\` | normal pause; render summary, mention confidence in the user-facing line ("Plan ready (medium confidence — see Notes). Continue?") | render the summary inline ("medium — see Notes"); chain anyway. The Notes line is required when confidence is medium |
+| \`low\` | hard gate. Render the summary, do **not** offer \`continue\` as a verb. Offer: \`expand <stage>\` (re-dispatch the same specialist with a richer envelope), \`show\` (open the artifact), \`override\` (acknowledge the risk and continue anyway), \`cancel\` | hard gate. Stop chaining. Render the summary, ask the same expand/show/override/cancel question. \`override\` is the only word that resumes auto-chaining |
+A specialist that returns \`Confidence: low\` MUST also write a non-empty \`Notes:\` line that explains the dimension that drove confidence down (missing input, unverified citation, partial coverage, etc.). The orchestrator surfaces that Notes line verbatim — the sub-agent is the only one with the context to explain.
+Repeated low-confidence on the same stage (the second consecutive dispatch returns low) is itself a routing signal: the orchestrator should suggest re-triage with a richer path (e.g. \`small/medium\` → \`large-risky\`) or splitting the slug, rather than dispatching the same specialist a third time.
+Override is sticky to **this stage only** — the next stage starts with the normal high-confidence-default behaviour.
+### Common rules for both modes
+Resume from a fresh session works because everything is on disk: \`flow-state.json\` has \`currentStage\`, \`triage\` (with \`runMode\`), \`flows/<slug>/*.md\` carries the artifacts. The next \`/cc\` invocation enters Hop 1 → detect → resume summary → continue from \`currentStage\` with the saved runMode.
+Resuming a paused \`auto\` flow re-enters auto mode silently. Resuming a paused \`step\` flow renders the slim summary again and waits for \`continue\`.
 ## Hop 5 — Compound (automatic)
@@ -244,8 +479,10 @@ After ship + compound, move every \`<stage>.md\` from \`flows/<slug>/\` into \`.
 ## Always-ask rules
-- Always run the triage gate on a fresh \`/cc\`. Never silently pick a path.
-- Always pause after every stage. Never auto-advance through plan → build → review without asking.
+- Always run the triage gate on a fresh \`/cc\`. Never silently pick a path. Use the harness's structured question tool, not a printed code block.
+- In \`step\` mode, always pause after every stage. Never auto-advance.
+- In \`auto\` mode, never auto-advance past a hard gate (block / cap-reached / security finding / **Confidence: low** / ship). The user opted into chaining green stages, not chaining decisions.
+- Always honour \`Confidence: low\` in the slim summary. Stop and ask, both modes. See "Confidence as a hard gate" above.
 - Always ask before \`git push\` or PR creation. Commit-helper auto-commits in strict mode; everything past commit is opt-in.
 - Always ask before deleting active artifacts (\`/cc-cancel\` is the supported way; do not \`rm\` artifacts directly).
 - Always show the slim summary back to the user; do not summarise from your own memory of the dispatch.
@@ -263,6 +500,7 @@ These skills auto-trigger during \`/cc\`. Do not re-explain them; obey them.
 - **conversation-language** — always-on; reply in the user's language but never translate \`AC-N\`, \`D-N\`, \`F-N\`, slugs, paths, frontmatter keys, mode names, or hook output.
 - **anti-slop** — always-on for any code-modifying step; bans redundant verification and environment shims.
 - **triage-gate** — Hop 2 of every fresh \`/cc\`.
+- **pre-flight-assumptions** — Hop 2.5 of every fresh non-inline \`/cc\`; surfaces 3-7 stack/convention/architecture defaults for user confirmation.
 - **flow-resume** — when \`/cc\` is invoked with no task or with an active flow.
 - **plan-authoring** — on every edit to \`.cclaw/flows/<slug>/plan.md\`.
 - **ac-traceability** — strict mode only; before every commit.
@@ -270,7 +508,8 @@ These skills auto-trigger during \`/cc\`. Do not re-explain them; obey them.
 - **refinement** — when an existing plan match is detected.
 - **parallel-build** — strict mode + planner topology=parallel-build; enforces 5-slice cap and worktree dispatch.
 - **security-review** — when the diff touches sensitive surfaces.
-- **review-loop** — wraps every reviewer / security-reviewer invocation; runs the Concern Ledger + convergence detector.
+- **review-loop** — wraps every reviewer / security-reviewer invocation; runs the Concern Ledger + Five-axis pass + convergence detector.
+- **source-driven** — strict mode only (opt-in for soft); architect/planner detect stack version, fetch official doc deep-links, cite URLs, mark UNVERIFIED when docs are missing.
 ${ironLawsMarkdown()}
 `;

package/dist/flow-state.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { type AcMode, type AcceptanceCriterionState, type BuildProfile, type DiscoverySpecialistId, type FlowStage, type RoutingClass, type TriageDecision } from "./types.js";
+import { type AcMode, type AcceptanceCriterionState, type BuildProfile, type DiscoverySpecialistId, type FlowStage, type RoutingClass, type RunMode, type TriageDecision } from "./types.js";
 export declare const FLOW_STATE_SCHEMA_VERSION = 3;
 /** v8.0–v8.1 schema. Auto-migrated to v3 on read. */
 export declare const LEGACY_V8_FLOW_STATE_SCHEMA_VERSION = 2;
@@ -28,10 +28,29 @@ export declare class LegacyFlowStateError extends Error {
 export declare function isFlowStage(value: unknown): value is FlowStage;
 export declare function isRoutingClass(value: unknown): value is RoutingClass;
 export declare function isAcMode(value: unknown): value is AcMode;
+export declare function isRunMode(value: unknown): value is RunMode;
 export declare function isDiscoverySpecialist(value: unknown): value is DiscoverySpecialistId;
 export declare function createInitialFlowState(nowIso?: string): FlowStateV82;
 /** @deprecated kept for source-level compatibility with v8.1 imports. */
 export declare const createInitialFlowStateV8: typeof createInitialFlowState;
+/**
+ * Read a triage decision's pre-flight assumptions.
+ *
+ * Returns:
+ * - `[]` when no pre-flight ran (legacy state, trivial path, or older
+ *   `step`/`auto` flow-state with no assumptions field). Callers should
+ *   treat this as "no captured assumptions, do not surface anything".
+ * - the recorded array (possibly empty if the pre-flight ran but the user
+ *   confirmed there were no assumptions to record — rare but valid).
+ */
+export declare function assumptionsOf(triage: TriageDecision | null | undefined): readonly string[];
+/**
+ * Read a triage decision's runMode with the documented default.
+ *
+ * v8.2 state files do not record runMode; treat them as `step` so existing
+ * flows keep their pause-between-stages behaviour byte-for-byte.
+ */
+export declare function runModeOf(triage: TriageDecision | null | undefined): RunMode;
 /**
  * Validate a flow-state object. Throws on hard schema errors.
  *

package/dist/flow-state.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { AC_MODES, FLOW_STAGES, ROUTING_CLASSES } from "./types.js";
+import { AC_MODES, FLOW_STAGES, ROUTING_CLASSES, RUN_MODES } from "./types.js";
 export const FLOW_STATE_SCHEMA_VERSION = 3;
 /** v8.0–v8.1 schema. Auto-migrated to v3 on read. */
 export const LEGACY_V8_FLOW_STATE_SCHEMA_VERSION = 2;
@@ -19,6 +19,9 @@ export function isRoutingClass(value) {
 export function isAcMode(value) {
     return typeof value === "string" && AC_MODES.includes(value);
 }
+export function isRunMode(value) {
+    return typeof value === "string" && RUN_MODES.includes(value);
+}
 export function isDiscoverySpecialist(value) {
     return value === "brainstormer" || value === "architect" || value === "planner";
 }
@@ -62,7 +65,8 @@ function inferTriageFromLegacy(state) {
         path: ["plan", "build", "review", "ship"],
         rationale: "Auto-migrated from cclaw 8.0/8.1 flow-state (no triage recorded; preserved as strict).",
         decidedAt: state.startedAt,
-        userOverrode: false
+        userOverrode: false,
+        runMode: "step"
     };
 }
 function assertAcArray(value) {
@@ -116,6 +120,44 @@ function assertTriageOrNull(value) {
     if (typeof triage.userOverrode !== "boolean") {
         throw new Error("triage.userOverrode must be a boolean");
     }
+    if (triage.runMode !== undefined && !isRunMode(triage.runMode)) {
+        throw new Error(`Invalid triage.runMode: ${String(triage.runMode)}`);
+    }
+    if (triage.assumptions !== undefined && triage.assumptions !== null) {
+        if (!Array.isArray(triage.assumptions)) {
+            throw new Error("triage.assumptions must be an array, null, or absent");
+        }
+        for (const entry of triage.assumptions) {
+            if (typeof entry !== "string") {
+                throw new Error("triage.assumptions entries must be strings");
+            }
+        }
+    }
+}
+/**
+ * Read a triage decision's pre-flight assumptions.
+ *
+ * Returns:
+ * - `[]` when no pre-flight ran (legacy state, trivial path, or older
+ *   `step`/`auto` flow-state with no assumptions field). Callers should
+ *   treat this as "no captured assumptions, do not surface anything".
+ * - the recorded array (possibly empty if the pre-flight ran but the user
+ *   confirmed there were no assumptions to record — rare but valid).
+ */
+export function assumptionsOf(triage) {
+    const value = triage?.assumptions;
+    if (value === null || value === undefined)
+        return [];
+    return value;
+}
+/**
+ * Read a triage decision's runMode with the documented default.
+ *
+ * v8.2 state files do not record runMode; treat them as `step` so existing
+ * flows keep their pause-between-stages behaviour byte-for-byte.
+ */
+export function runModeOf(triage) {
+    return triage?.runMode ?? "step";
 }
 /**
  * Validate a flow-state object. Throws on hard schema errors.

package/dist/types.d.ts CHANGED Viewed

@@ -41,6 +41,21 @@ export type RoutingClass = (typeof ROUTING_CLASSES)[number];
  */
 export declare const AC_MODES: readonly ["inline", "soft", "strict"];
 export type AcMode = (typeof AC_MODES)[number];
+/**
+ * How aggressively the orchestrator advances through the flow.
+ *
+ * - `step` (default): pause after every stage. The orchestrator renders the
+ *   slim summary and waits for the user to type "continue". The original
+ *   v8.2 behaviour, recommended for `strict` and unfamiliar work.
+ * - `auto`: render the slim summary and immediately dispatch the next stage
+ *   without asking. Stops only on hard gates (block findings, security flag,
+ *   ship). Recommended for `inline` / `soft` work the user has already
+ *   scoped tightly.
+ *
+ * Selected at the triage gate; user can override per flow.
+ */
+export declare const RUN_MODES: readonly ["step", "auto"];
+export type RunMode = (typeof RUN_MODES)[number];
 /**
  * Decision recorded at the triage gate that opens every new flow.
  * Persisted in flow-state.json so resumes never re-trigger triage.
@@ -56,6 +71,31 @@ export interface TriageDecision {
     decidedAt: string;
     /** Did the user override the orchestrator's recommendation? */
     userOverrode: boolean;
+    /**
+     * Step-by-step (default) or autopilot. Persisted across resumes so the
+     * user only picks once per flow.
+     *
+     * Optional in TypeScript so v8.2 state files (which lack `runMode`) still
+     * validate; readers MUST default to `step` on absent.
+     */
+    runMode?: RunMode;
+    /**
+     * Pre-flight assumptions surfaced at Hop 2.5 (between triage and first
+     * dispatch). Each entry is one short sentence the orchestrator was about
+     * to silently default to (stack pick, lib version, file layout, target
+     * platform, code-style preference). The user either acknowledged or
+     * corrected these before any sub-agent ran.
+     *
+     * Optional and skipped entirely on the inline path. On soft/strict, the
+     * pre-flight skill writes 3-7 entries here; subsequent flows in the same
+     * project may seed defaults from the most recent shipped slug's
+     * `assumptions:` block.
+     *
+     * Reading rule: `null` or absent means "no pre-flight ran" (legacy state
+     * or trivial path). An empty array means "ran and the user accepted no
+     * assumptions are needed", which is rare but valid.
+     */
+    assumptions?: string[] | null;
 }
 export interface CliContext {
     cwd: string;

package/dist/types.js CHANGED Viewed

@@ -21,3 +21,17 @@ export const ROUTING_CLASSES = ["trivial", "small-medium", "large-risky"];
  * Selected at the triage gate; user can override.
  */
 export const AC_MODES = ["inline", "soft", "strict"];
+/**
+ * How aggressively the orchestrator advances through the flow.
+ *
+ * - `step` (default): pause after every stage. The orchestrator renders the
+ *   slim summary and waits for the user to type "continue". The original
+ *   v8.2 behaviour, recommended for `strict` and unfamiliar work.
+ * - `auto`: render the slim summary and immediately dispatch the next stage
+ *   without asking. Stops only on hard gates (block findings, security flag,
+ *   ship). Recommended for `inline` / `soft` work the user has already
+ *   scoped tightly.
+ *
+ * Selected at the triage gate; user can override per flow.
+ */
+export const RUN_MODES = ["step", "auto"];

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "cclaw-cli",
-  "version": "8.2.0",
+  "version": "8.4.0",
   "description": "Lightweight harness-first flow toolkit for coding agents",
   "type": "module",
   "bin": {