@mmerterden/multi-agent-pipeline 10.7.2 → 10.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/CHANGELOG.md +33 -2
  2. package/README.md +20 -4
  3. package/docs/adr/0001-three-model-triage.md +2 -2
  4. package/docs/adr/0007-multi-tool-adapter-framework.md +1 -1
  5. package/docs/adr/README.md +2 -2
  6. package/docs/architecture.md +14 -14
  7. package/docs/features.md +22 -21
  8. package/docs/performance.md +3 -3
  9. package/index.js +3 -7
  10. package/install/templates/copilot-instructions.md +2 -2
  11. package/package.json +2 -5
  12. package/pipeline/agents/dev-critic.md +1 -1
  13. package/pipeline/agents/task-clarifier.md +1 -3
  14. package/pipeline/claude-md-template.md +1 -1
  15. package/pipeline/commands/multi-agent/dev-autopilot.md +1 -1
  16. package/pipeline/commands/multi-agent/finish.md +2 -2
  17. package/pipeline/commands/multi-agent/help.md +12 -12
  18. package/pipeline/commands/multi-agent/local.md +1 -1
  19. package/pipeline/commands/multi-agent/refs/features/dev-critic.md +1 -1
  20. package/pipeline/commands/multi-agent/refs/features/model-fallback.md +7 -3
  21. package/pipeline/commands/multi-agent/refs/features/shadow-git.md +1 -1
  22. package/pipeline/commands/multi-agent/refs/knowledge.md +1 -1
  23. package/pipeline/commands/multi-agent/refs/phases/log-format.md +1 -1
  24. package/pipeline/commands/multi-agent/refs/phases/modes.md +1 -1
  25. package/pipeline/commands/multi-agent/refs/phases/phase-0-init.md +1 -1
  26. package/pipeline/commands/multi-agent/refs/phases/phase-1-analysis.md +2 -2
  27. package/pipeline/commands/multi-agent/refs/phases/phase-2-planning.md +3 -3
  28. package/pipeline/commands/multi-agent/refs/phases/phase-3-dev.md +2 -2
  29. package/pipeline/commands/multi-agent/refs/phases/phase-4-review.md +18 -18
  30. package/pipeline/commands/multi-agent/refs/progress-contract.md +1 -1
  31. package/pipeline/commands/multi-agent/refs/tracker-contract.md +1 -3
  32. package/pipeline/commands/multi-agent/review.md +8 -8
  33. package/pipeline/commands/multi-agent/sync.md +3 -3
  34. package/pipeline/commands/multi-agent.md +7 -7
  35. package/pipeline/lib/plan-todos.sh +2 -5
  36. package/pipeline/lib/post-pr-review.sh +2 -2
  37. package/pipeline/lib/review-watch.sh +2 -6
  38. package/pipeline/lib/shadow-git.sh +3 -5
  39. package/pipeline/schemas/agent-state.schema.json +1 -1
  40. package/pipeline/schemas/clarify-output.schema.json +1 -1
  41. package/pipeline/schemas/plan-todos.schema.json +1 -1
  42. package/pipeline/schemas/prefs.schema.json +8 -8
  43. package/pipeline/schemas/reviewer-output.schema.json +1 -1
  44. package/pipeline/schemas/triage-output.schema.json +2 -2
  45. package/pipeline/scripts/README.md +1 -2
  46. package/pipeline/scripts/cost-budget-check.mjs +1 -1
  47. package/pipeline/scripts/cost-table.json +7 -0
  48. package/pipeline/scripts/fixtures/install-layout.tsv +5 -5
  49. package/pipeline/scripts/smoke-review-watch.sh +2 -2
  50. package/pipeline/scripts/smoke-shadow-git.sh +1 -1
  51. package/pipeline/scripts/uninstall.mjs +53 -57
  52. package/pipeline/skills/shared/core/multi-agent/SKILL.md +11 -11
  53. package/pipeline/skills/shared/core/multi-agent-dev-autopilot/SKILL.md +1 -1
  54. package/pipeline/skills/shared/core/multi-agent-finish/SKILL.md +1 -1
  55. package/pipeline/skills/shared/core/multi-agent-help/SKILL.md +8 -8
  56. package/pipeline/skills/shared/core/multi-agent-review/SKILL.md +5 -5
  57. package/pipeline/skills/shared/core/multi-agent-sync/SKILL.md +7 -5
  58. package/pipeline/scripts/smoke-readme-counts.sh +0 -120
@@ -508,7 +508,7 @@ Log: `Phase 0 Step 7: taskType = {component|bugfix|feature|refactor|chore}`
508
508
 
509
509
  **Cost:** ~$0.0025 per Haiku call. The pipeline's other expensive phases (Phase 4 reviewers, Phase 3 Sonnet codegen) far outweigh this - the value is avoiding the ~30 min wasted when Phase 3 builds the wrong thing because Phase 0 didn't ask.
510
510
 
511
- **Reference:** see `pipeline/agents/task-clarifier.md` for the full scoring rubric, question quality rules, and prior-art citations (Devin Knowledge / Cursor Plan Mode).
511
+ **Reference:** see `pipeline/agents/task-clarifier.md` for the full scoring rubric and question-quality rules.
512
512
 
513
513
  **Why this fits Phase 0 (not a new phase):** clarification doesn't change what code gets written - it changes what gets understood before code is written. Phase 0 already collects identity / project / branch / maturity; ambiguity scoring fits naturally as the last contextual gate.
514
514
 
@@ -1,6 +1,6 @@
1
- ### Phase 1: Analysis (Opus)
1
+ ### Phase 1: Analysis (Fable)
2
2
 
3
- > **TLDR** - Opus-driven codebase exploration. Detects if the issue is already fixed (git blame, closed PRs), then launches parallel Explore sub-agents to map the affected code paths. Outputs: impact analysis, stack detection (auto-selects platform guide), relevant files, risk areas. Feeds Phase 2 planning.
3
+ > **TLDR** - Fable-driven codebase exploration (Opus when the fallback ladder engages). Detects if the issue is already fixed (git blame, closed PRs), then launches parallel Explore sub-agents to map the affected code paths. Outputs: impact analysis, stack detection (auto-selects platform guide), relevant files, risk areas. Feeds Phase 2 planning.
4
4
 
5
5
  <!-- progress-contract: applied -->
6
6
  Progress emission per `refs/progress-contract.md` - lines for each Explore dispatch, each finish, analyst synthesis start, `analysis.json` write.
@@ -1,6 +1,6 @@
1
- ### Phase 2: Planning (Opus)
1
+ ### Phase 2: Planning (Fable)
2
2
 
3
- > **TLDR** - Opus decomposes the analysis into concrete tasks with file-level targets, risk grading, and architecture review. Before Phase 3 a **Plan Approval Gate** runs in normal mode: if the Jira/issue description is ambiguous the orchestrator asks the user structured clarification questions (max 2 rounds) - once scope is clear it renders the plan and loops on free-text edit requests until the user approves or aborts. The gate is **skipped entirely** for `--dev`, `autopilot`, and `--dev autopilot` (their speed/zero-interaction contracts are preserved).
3
+ > **TLDR** - Fable decomposes the analysis (Opus when the fallback ladder engages) into concrete tasks with file-level targets, risk grading, and architecture review. Before Phase 3 a **Plan Approval Gate** runs in normal mode: if the Jira/issue description is ambiguous the orchestrator asks the user structured clarification questions (max 2 rounds) - once scope is clear it renders the plan and loops on free-text edit requests until the user approves or aborts. The gate is **skipped entirely** for `--dev`, `autopilot`, and `--dev autopilot` (their speed/zero-interaction contracts are preserved).
4
4
 
5
5
  <!-- progress-contract: applied -->
6
6
  Progress emission per `refs/progress-contract.md` - lines for plan-draft start, clarification-ask, clarification-answer, plan render, plan-edit-request, plan-approved, plan-aborted.
@@ -124,7 +124,7 @@ Log: "Phase 2: Plan - {N} tasks created, {M} with architecture review, validat
124
124
 
125
125
  #### Step 4.5 - Emit Plan Todo List (opt-in)
126
126
 
127
- **Gated by `prefs.global.planTodos.enabled`** (default: `false`). When enabled, after the planning-output JSON validates and BEFORE the approval gate, transform `tasks[]` into a structured Todo list conforming to `pipeline/schemas/plan-todos.schema.json` and persist into `agent-state.plan`. Pattern source: Windsurf Cascade's always-visible Todo list (<https://docs.windsurf.com/windsurf/cascade>) and Cursor Plan Mode's reviewable plan (<https://cursor.com/docs/agent/planning>).
127
+ **Gated by `prefs.global.planTodos.enabled`** (default: `false`). When enabled, after the planning-output JSON validates and BEFORE the approval gate, transform `tasks[]` into a structured Todo list conforming to `pipeline/schemas/plan-todos.schema.json` and persist into `agent-state.plan`. The plan is rendered as a live, always-visible Todo list.
128
128
 
129
129
  ```bash
130
130
  TODO_BLOB=$(jq '
@@ -41,7 +41,7 @@ Phase 3 consumes the Phase 2 output object conforming to `pipeline/schemas/plann
41
41
 
42
42
  **Plan Todo iteration (opt-in)**: gated by `prefs.global.planTodos.enabled` (default: `false`). When enabled and Phase 2 Step 4.5 emitted a `plan.todos[]`, Phase 3 iterates via `pipeline/lib/plan-todos.sh next/start/complete/fail` instead of walking `tasks[]` directly. When disabled, the loop walks `tasks[]` from `planning-output` - TDD contract is unchanged. Full helper loop + state semantics: `refs/features/plan-todos.md`.
43
43
 
44
- **Shadow-Git checkpoints (opt-in)**: gated by `prefs.global.shadowGit.enabled` (default: `false`). When enabled, the orchestrator snapshots the worktree via `pipeline/lib/shadow-git.sh` so sub-phase rollback is possible without touching the project's real `.git` history. Cline-style. Lifecycle: `shadow-git.sh init` (Phase 0 baseline), `shadow-git.sh snapshot` (per step after `plan-todos complete`), `shadow-git.sh restore <sha> --files` (rollback). Modes: `per-todo-step` (default) or `per-tool-call`. Full wiring + storage cap: `refs/features/shadow-git.md`.
44
+ **Shadow-Git checkpoints (opt-in)**: gated by `prefs.global.shadowGit.enabled` (default: `false`). When enabled, the orchestrator snapshots the worktree via `pipeline/lib/shadow-git.sh` so sub-phase rollback is possible without touching the project's real `.git` history. Lifecycle: `shadow-git.sh init` (Phase 0 baseline), `shadow-git.sh snapshot` (per step after `plan-todos complete`), `shadow-git.sh restore <sha> --files` (rollback). Modes: `per-todo-step` (default) or `per-tool-call`. Full wiring + storage cap: `refs/features/shadow-git.md`.
45
45
 
46
46
  #### Component tasks - delegated dispatch (taskType === "component")
47
47
 
@@ -51,7 +51,7 @@ For non-component taskTypes (`bugfix`, `feature`, `refactor`, `chore`), continue
51
51
 
52
52
  #### Re-entry from Phase 4 triage
53
53
 
54
- Phase 3 runs twice in the pipeline lifetime: first for initial development, then optionally for rework after Phase 4 review. **Phase 3 never acts on raw reviewer output.** It only consumes `triage.accepted` findings - Opus triage in Phase 4 already filtered false-positives, deferred out-of-scope items, and rejected noise.
54
+ Phase 3 runs twice in the pipeline lifetime: first for initial development, then optionally for rework after Phase 4 review. **Phase 3 never acts on raw reviewer output.** It only consumes `triage.accepted` findings - Fable triage in Phase 4 already filtered false-positives, deferred out-of-scope items, and rejected noise.
55
55
 
56
56
  When re-entering from Phase 4:
57
57
 
@@ -1,6 +1,6 @@
1
1
  ### Phase 4: Review (deterministic gates + parallel + triage)
2
2
 
3
- > **TLDR** - Three-stage review. Stage 1: deterministic gates (build + lint + test + secret scan) that MUST pass. Stage 2: AI models in parallel - reviewer set is **CLI-aware**: Claude Code dispatches 2 reviewers (Opus + Sonnet); Copilot CLI dispatches 3 reviewers (GPT-5.4 + Opus + Sonnet). Stage 3: Opus triage - evaluates raw findings, filters false-positives/out-of-scope, keeps only actionable items. Only triage-accepted blocking items loop back to Phase 3.
3
+ > **TLDR** - Three-stage review. Stage 1: deterministic gates (build + lint + test + secret scan) that MUST pass. Stage 2: AI models in parallel - reviewer set is **CLI-aware**: Claude Code dispatches 2 reviewers (Fable + Sonnet); Copilot CLI dispatches 3 reviewers (GPT-5.4 + Opus + Sonnet — Fable 5 is not offered on Copilot CLI). Stage 3: Fable triage (Opus on Copilot CLI) - evaluates raw findings, filters false-positives/out-of-scope, keeps only actionable items. Only triage-accepted blocking items loop back to Phase 3.
4
4
 
5
5
  <!-- progress-contract: applied -->
6
6
  Progress emission per `refs/progress-contract.md` - lines for each gate, each reviewer dispatch + finish, triage start, triage verdict, fix dispatch.
@@ -181,17 +181,17 @@ Launch Agent instances **in parallel** using the shared `code-reviewer` subagent
181
181
 
182
182
  | Reviewer | subagent_type | Model | Focus | Skills Referenced | Where it runs |
183
183
  | ---------- | ----------------- | ------------------- | --------------------------------- | --------------------------------------------- | -------------------- |
184
- | Reviewer 1 | `code-reviewer` | `claude-opus-4.6` | Deep security + architecture | `api-security-best-practices`, `architecture` | Both CLIs |
184
+ | Reviewer 1 | `code-reviewer` | `claude-fable-5` (Claude Code) / `claude-opus-4-8` (Copilot CLI) | Deep security + architecture | `api-security-best-practices`, `architecture` | Both CLIs |
185
185
  | Reviewer 2 | `code-reviewer` | `gpt-5.4` | Edge cases, different perspective | cross-model diversity | **Copilot CLI only** |
186
- | Reviewer 3 | `code-reviewer` | `claude-sonnet-4.6` | Quality + correctness + naming | `clean-code`, stack-specific skill | Both CLIs |
186
+ | Reviewer 3 | `code-reviewer` | `claude-sonnet-4-6` | Quality + correctness + naming | `clean-code`, stack-specific skill | Both CLIs |
187
187
 
188
188
  Each reviewer inherits the `code-reviewer` agent's focus areas (Security, Architecture, Quality, Performance) and output contract. The orchestrator overrides only the model and the stack-specific skill per-reviewer - no prompt duplication.
189
189
 
190
- **Model override wiring:** `code-reviewer.md` declares `preferredModel: fable`, so Reviewer 1 uses the persona default (Fable 5). Reviewer 2 (Copilot-only, `gpt-5.4`) and Reviewer 3 (`claude-sonnet-4.6`) set `PHASE_MODEL_OVERRIDE=<model>` before dispatch - the orchestrator exports `CLAUDE_CODE_SUBAGENT_MODEL` on Claude Code, or passes `--model` on Copilot CLI. Full precedence rule: `skills/shared/core/multi-agent/SKILL.md#agent-dispatch--per-persona-model-routing-v610`. Fable dispatches are subject to the fallback contract (`refs/features/model-fallback.md`): dispatch-error retry walks `fable -> opus -> sonnet` and budget-ceiling downgrade.
190
+ **Model override wiring:** `code-reviewer.md` declares `preferredModel: fable`, so Reviewer 1 uses the persona default (Fable 5). Reviewer 2 (Copilot-only, `gpt-5.4`) and Reviewer 3 (`claude-sonnet-4-6`) set `PHASE_MODEL_OVERRIDE=<model>` before dispatch - the orchestrator exports `CLAUDE_CODE_SUBAGENT_MODEL` on Claude Code, or passes `--model` on Copilot CLI. Full precedence rule: `skills/shared/core/multi-agent/SKILL.md#agent-dispatch--per-persona-model-routing-v610`. Fable dispatches are subject to the fallback contract (`refs/features/model-fallback.md`): dispatch-error retry walks `fable -> opus -> sonnet` and budget-ceiling downgrade.
191
191
 
192
192
  **Stack-specific skills loaded per reviewer** (from Phase 1 `detectedStack`). On Claude Code, Reviewer 2 (GPT-5.4) is not dispatched - its skill column is ignored. On Copilot CLI all three columns are used.
193
193
 
194
- | Stack | Reviewer 1 (Opus) | Reviewer 2 (GPT-5.4 - Copilot CLI only) | Reviewer 3 (Sonnet) |
194
+ | Stack | Reviewer 1 (Fable / Opus on Copilot) | Reviewer 2 (GPT-5.4 - Copilot CLI only) | Reviewer 3 (Sonnet) |
195
195
  |-------|-------------------|-----------------------------------------|---------------------|
196
196
  | iOS/Swift | `ios-security`, `swiftui-performance`, `hig-patterns` | `swift-concurrency`, `ios-accessibility` | `swiftui-pro`, `swift-testing` |
197
197
  | Android/Kotlin | `android-security`, `android-performance` | `compose-testing`, `android-architecture` | `compose-components`, `kotlin-coroutines-expert` |
@@ -204,11 +204,11 @@ Skills are injected into reviewer prompt context - the reviewer uses them as r
204
204
 
205
205
  **iOS/Swift - interaction & convention skills (conditional).** When the diff touches SwiftUI UI files (`*View.swift`, `*Screen.swift`, `*Configuration.swift`, `*+Modifiers.swift`), additionally inject the relevant `figma-common` convention skills as reference for the iOS reviewers: `figma-navigation`, `figma-overlays`, `figma-bottom-sheets` (interaction: emit-intent vs self-route/self-present; native-SwiftUI-first vs the project's `ui.*` custom system), and the enriched `figma-to-swiftui` accessibility rules (minimalism). These back the Step 1.5 iOS convention checks. Generic across SwiftUI projects - not tied to any one app. Omit when the diff has no SwiftUI UI changes (keeps the reviewer prompt lean).
206
206
 
207
- **Dispatch timeout (required, mirrors triage 3.3).** Reviewers run in parallel and triage waits on all of them, so one stalled reviewer hangs the phase. Bound each reviewer dispatch by `REVIEWER_TIMEOUT_SECONDS` (default 180). If a reviewer has not returned by the budget: log `review.reviewer_timeout reviewer=<name>`, treat that reviewer as absent, and proceed to triage with the reviewers that did return. The merged-findings count and `consensus.reviewerCount` reflect only the reviewers that returned. If **zero** reviewers return, retry the Opus reviewer once; on a second total failure HALT with `ERR: no reviewer returned within ${REVIEWER_TIMEOUT_SECONDS}s; resume with /multi-agent:resume #N.`. The Step 2.5 rebuttal round uses the same per-dispatch timeout. Never block indefinitely on a slow or dead reviewer dispatch.
207
+ **Dispatch timeout (required, mirrors triage 3.3).** Reviewers run in parallel and triage waits on all of them, so one stalled reviewer hangs the phase. Bound each reviewer dispatch by `REVIEWER_TIMEOUT_SECONDS` (default 180). If a reviewer has not returned by the budget: log `review.reviewer_timeout reviewer=<name>`, treat that reviewer as absent, and proceed to triage with the reviewers that did return. The merged-findings count and `consensus.reviewerCount` reflect only the reviewers that returned. If **zero** reviewers return, retry Reviewer 1 once; on a second total failure HALT with `ERR: no reviewer returned within ${REVIEWER_TIMEOUT_SECONDS}s; resume with /multi-agent:resume #N.`. The Step 2.5 rebuttal round uses the same per-dispatch timeout. Never block indefinitely on a slow or dead reviewer dispatch.
208
208
 
209
209
  #### Output contract - reviewer step
210
210
 
211
- Step 2 produces N reviewer-output objects (one per dispatched reviewer), each conforming to `pipeline/schemas/reviewer-output.schema.json`. They are persisted to `state.reviewIterations[<iteration>].reviewers[]` and consumed by Step 3 (Opus triage) - never by Phase 6 directly. The triage step (below) is the producer of the only review artifact Phase 6 reads, conforming to `pipeline/schemas/triage-output.schema.json`.
211
+ Step 2 produces N reviewer-output objects (one per dispatched reviewer), each conforming to `pipeline/schemas/reviewer-output.schema.json`. They are persisted to `state.reviewIterations[<iteration>].reviewers[]` and consumed by Step 3 (Fable triage) - never by Phase 6 directly. The triage step (below) is the producer of the only review artifact Phase 6 reads, conforming to `pipeline/schemas/triage-output.schema.json`.
212
212
 
213
213
  **Subagent return format** - each reviewer returns JSON conforming to `pipeline/schemas/reviewer-output.schema.json`:
214
214
 
@@ -248,9 +248,9 @@ Exit 0 = valid. Exit 2 = contradiction (approved=true with blocking findings) -
248
248
 
249
249
  **Off by default reason:** mixed-verdict cases are ~8% of runs in practice; the extra ~$0.20-$0.50 per run isn't worth automating for users who'd rather let triage resolve it cleanly. Users with high-stakes tasks (security-critical, release branches) can flip the flag.
250
250
 
251
- #### Step 3 - Opus Triage (filter before acting)
251
+ #### Step 3 - Fable Triage (filter before acting)
252
252
 
253
- **CRITICAL**: Reviewer findings are **raw signals**, not commands. Never auto-loop on every "blocking" tag - reviewers hallucinate, misread scope, or repeat each other. Run Opus triage to evaluate merged findings against task scope.
253
+ **CRITICAL**: Reviewer findings are **raw signals**, not commands. Never auto-loop on every "blocking" tag - reviewers hallucinate, misread scope, or repeat each other. Run Fable triage (Opus on Copilot CLI) to evaluate merged findings against task scope.
254
254
 
255
255
  ##### 3.1 Short-circuit: no findings
256
256
 
@@ -258,7 +258,7 @@ If merged findings `length === 0`, **skip triage**: write empty result `{"accept
258
258
 
259
259
  ##### 3.2 Launch triage agent
260
260
 
261
- Launch **1 Agent** (subagent_type: `general-purpose`, model: `opus`) with:
261
+ Launch **1 Agent** (subagent_type: `general-purpose`, model: `fable` on Claude Code / `opus` on Copilot CLI) with:
262
262
 
263
263
  - Raw findings from Reviewer 1 + Reviewer 2 (merged JSON)
264
264
  - Task scope (Phase 1 analysis summary + Phase 2 plan)
@@ -307,11 +307,11 @@ Step 3 produces a single triage-output object conforming to `pipeline/schemas/tr
307
307
 
308
308
  Return ONLY valid JSON conforming to pipeline/schemas/triage-output.schema.json:
309
309
  {
310
- "accepted": [{ "severity": "blocking|important|suggestion", "file": "...", "line": N, "issue": "...", "fix": "...", "reviewer": "opus|sonnet" }],
310
+ "accepted": [{ "severity": "blocking|important|suggestion", "file": "...", "line": N, "issue": "...", "fix": "...", "reviewer": "fable|opus|sonnet|gpt" }],
311
311
  "deferred": [{ "finding": {...}, "reason": "..." }],
312
312
  "rejected": [{ "finding": {...}, "reason": "..." }],
313
313
  "approved": true|false, // true if no accepted blocking items remain
314
- "consensus": { "reviewerCount": N, "verdict": "unanimous-pass|unanimous-block|split|unverified", "disagreements": [{ "file": "...", "line": N, "issue": "...", "note": "Opus blocking, Sonnet approved" }] } // optional, see 3.6
314
+ "consensus": { "reviewerCount": N, "verdict": "unanimous-pass|unanimous-block|split|unverified", "disagreements": [{ "file": "...", "line": N, "issue": "...", "note": "Fable blocking, Sonnet approved" }] } // optional, see 3.6
315
315
  }
316
316
  ```
317
317
 
@@ -352,12 +352,12 @@ Failure fallback (timeout >120s, or agent crash before any JSON is produced): re
352
352
  Emit metrics per review pass for Phase 7 cost rollup:
353
353
 
354
354
  ```bash
355
- LOG_METRIC_FORWARD_TO_TRACKER=1 pipeline/scripts/log-metric.sh "$TASK_ID" 4 review.reviewer_call model=opus duration_ms=$OPUS_DURATION tokens_in=$OPUS_IN tokens_out=$OPUS_OUT
355
+ LOG_METRIC_FORWARD_TO_TRACKER=1 pipeline/scripts/log-metric.sh "$TASK_ID" 4 review.reviewer_call model=fable duration_ms=$R1_DURATION tokens_in=$R1_IN tokens_out=$R1_OUT # model=opus on Copilot CLI
356
356
  # GPT-5.4 metric emitted only on Copilot CLI (skip on Claude Code):
357
357
  [ "${CLI_HOST:-claude}" = "copilot" ] && \
358
358
  LOG_METRIC_FORWARD_TO_TRACKER=1 pipeline/scripts/log-metric.sh "$TASK_ID" 4 review.reviewer_call model=gpt-5.4 duration_ms=$GPT_DURATION tokens_in=$GPT_IN tokens_out=$GPT_OUT
359
359
  LOG_METRIC_FORWARD_TO_TRACKER=1 pipeline/scripts/log-metric.sh "$TASK_ID" 4 review.reviewer_call model=sonnet duration_ms=$SONNET_DURATION tokens_in=$SONNET_IN tokens_out=$SONNET_OUT
360
- LOG_METRIC_FORWARD_TO_TRACKER=1 pipeline/scripts/log-metric.sh "$TASK_ID" 4 review.triage_call model=opus duration_ms=$TRIAGE_DURATION tokens_in=$TRIAGE_IN tokens_out=$TRIAGE_OUT
360
+ LOG_METRIC_FORWARD_TO_TRACKER=1 pipeline/scripts/log-metric.sh "$TASK_ID" 4 review.triage_call model=fable duration_ms=$TRIAGE_DURATION tokens_in=$TRIAGE_IN tokens_out=$TRIAGE_OUT
361
361
  pipeline/scripts/log-metric.sh "$TASK_ID" 4 review.completed raw_count=$RAW accepted=$ACC deferred=$DEF rejected=$REJ approved=$APPROVED duration_ms=$DURATION
362
362
  ```
363
363
 
@@ -365,18 +365,18 @@ pipeline/scripts/log-metric.sh "$TASK_ID" 4 review.completed raw_count=$RAW acce
365
365
 
366
366
  ##### 3.5 Optional cross-check (single-point-of-failure mitigation)
367
367
 
368
- Opt-in via `prefs.global.triageCrossCheck.enabled` (default `false`). Sampled runs dispatch a **Sonnet** triage agent as second opinion, validated via `validate-triage.mjs` (same fallback rules). Disagreements logged as `triage.cross_check_diff`; `blockOnDisagreement` pauses for user (autopilot: proceed with Opus verdict). Doubles triage cost on sampled runs.
368
+ Opt-in via `prefs.global.triageCrossCheck.enabled` (default `false`). Sampled runs dispatch a **Sonnet** triage agent as second opinion, validated via `validate-triage.mjs` (same fallback rules). Disagreements logged as `triage.cross_check_diff`; `blockOnDisagreement` pauses for user (autopilot: proceed with the Fable verdict). Doubles triage cost on sampled runs.
369
369
 
370
370
  ##### 3.6 Consensus surfacing (anti-correlation)
371
371
 
372
- **Rationale:** Reviewer 1 (Opus) and Reviewer 3 (Sonnet) share a base model family, so unanimous agreement on a *judgment call* is not independent confirmation - same-family models drift the same way on ambiguous prompts. Treating "both approved" as proof produces false-consensus passes. Triage therefore records a `consensus` block (schema v3.1.0) and surfaces disagreement and unverified agreement to the user rather than burying it.
372
+ **Rationale:** Reviewer 1 (Fable) and Reviewer 3 (Sonnet) are both Anthropic Claude models, so unanimous agreement on a *judgment call* is not independent confirmation - same-family models drift the same way on ambiguous prompts. Treating "both approved" as proof produces false-consensus passes. Triage therefore records a `consensus` block (schema v3.1.0) and surfaces disagreement and unverified agreement to the user rather than burying it.
373
373
 
374
374
  After the triage verdict is computed, populate `triage.consensus`:
375
375
 
376
376
  1. `reviewerCount` = number of reviewers dispatched this iteration (`2` on Claude Code, `3` on Copilot CLI).
377
377
  2. Classify the iteration `verdict`:
378
378
  - `unanimous-block` -> all reviewers returned at least one overlapping `blocking` finding.
379
- - `split` -> reviewers disagreed on existence or severity of one or more findings (the Step 2.5 disagreement definition). List each split in `disagreements[]` with a `note` naming who held which position (e.g. "Opus blocking, Sonnet approved").
379
+ - `split` -> reviewers disagreed on existence or severity of one or more findings (the Step 2.5 disagreement definition). List each split in `disagreements[]` with a `note` naming who held which position (e.g. "Fable blocking, Sonnet approved").
380
380
  - `unanimous-pass` -> all reviewers approved AND the diff is low-risk (no security/auth/concurrency surface per Phase 1 `touchedAreas`). Clear-cut; trust it.
381
381
  - `unverified` -> all reviewers approved BUT the diff touches a judgment-heavy surface (security, auth, concurrency, money, data migration). Agreement here may be correlated; do NOT treat it as a confirmed pass. Surface it.
382
382
  3. `disagreements[]` is populated for `split` and is also used to carry `unverified` notes (e.g. "both approved a keychain change - agreement unverified, confirm manually").
@@ -430,7 +430,7 @@ for proj in $(jq -r '.projects[] | "\(.name)\t\(.worktreePath)\t\(.baseBranch)"'
430
430
  done
431
431
  ```
432
432
 
433
- Same 3 reviewers (Opus / GPT-5.4 / Sonnet) receive `COMBINED_DIFF` with a multi-repo prefix in the system prompt:
433
+ Same reviewer set (Fable-or-Opus / GPT-5.4 / Sonnet) receive `COMBINED_DIFF` with a multi-repo prefix in the system prompt:
434
434
 
435
435
  ```
436
436
  This is a multi-repo task spanning {N} repos: {repo names}.
@@ -128,7 +128,7 @@ Every phase that dispatches a billable LLM agent MUST forward the call's token t
128
128
 
129
129
  ```bash
130
130
  LOG_METRIC_FORWARD_TO_TRACKER=1 pipeline/scripts/log-metric.sh "$TASK_ID" <phase-id> <event> \
131
- model=<opus|sonnet|haiku|gpt-5.4> \
131
+ model=<fable|opus|sonnet|haiku|gpt-5.4> \
132
132
  tokens_in=$IN tokens_out=$OUT duration_ms=$DUR
133
133
  ```
134
134
 
@@ -24,9 +24,7 @@ The agent detects which CLI it's running in and uses the appropriate visual mech
24
24
  ```
25
25
  1. system prompt mentions "Claude Code" → claude-code
26
26
  2. system prompt mentions "Copilot" / "GitHub Copilot" → copilot
27
- 3. system prompt mentions "Cursor" cursor
28
- 4. system prompt mentions "Cascade" / "Windsurf" → windsurf
29
- 5. None of the above → generic (bash stdout)
27
+ 3. None of the above generic (bash stdout)
30
28
  ```
31
29
 
32
30
  Visual mechanism per CLI:
@@ -1,5 +1,5 @@
1
1
  ---
2
- description: "Run parallel review on a branch's diff or a Pull Request: 2 models on Claude Code (Opus + Sonnet), 3 models on Copilot CLI (GPT + Opus + Sonnet). On PR input, posts per-finding inline comments and sets approve/needs-work review state."
2
+ description: "Run parallel review on a branch's diff or a Pull Request: 2 models on Claude Code (Fable + Sonnet), 3 models on Copilot CLI (GPT + Opus + Sonnet). On PR input, posts per-finding inline comments and sets approve/needs-work review state."
3
3
  argument-hint: "[#N | repo#N | PR-URL | branch] - optional: PR by number/URL, repo+number, or local branch. Supports GitHub and Bitbucket Server URLs. If omitted, the current branch is used."
4
4
  ---
5
5
 
@@ -112,13 +112,13 @@ Save the diff to `/tmp/multi-agent-review-${TASK_ID}-diff.patch` so reviewers ca
112
112
  ### 3. Launch parallel reviewers - host-CLI dependent
113
113
 
114
114
  **Claude Code (2 in parallel):**
115
- - Agent 1: `claude-opus-4.6` → security + architecture
116
- - Agent 2: `claude-sonnet-4.6` → general quality
115
+ - Agent 1: `claude-fable-5` → security + architecture
116
+ - Agent 2: `claude-sonnet-4-6` → general quality
117
117
 
118
118
  **Copilot CLI (3 in parallel):**
119
- - Agent 1: `claude-opus-4.6` → security + architecture
119
+ - Agent 1: `claude-opus-4-8` → security + architecture (Fable 5 is not offered on Copilot CLI)
120
120
  - Agent 2: `gpt-5.4` → edge cases, alternate perspective
121
- - Agent 3: `claude-sonnet-4.6` → general quality
121
+ - Agent 3: `claude-sonnet-4-6` → general quality
122
122
 
123
123
  Each reviewer receives the diff plus the standard reviewer system prompt (see `refs/phases/phase-4-review.md` for the prompt contract). Output: structured `findings[]` per reviewer.
124
124
 
@@ -137,7 +137,7 @@ Each finding gets the `ruleID` from the catalog plus the platform policy ref:
137
137
 
138
138
  Catalog-only - does NOT invoke binaries. For a full scan, use `/multi-agent:test "store-ready"`.
139
139
 
140
- ### 5. Triage (Opus)
140
+ ### 5. Triage (Fable)
141
141
 
142
142
  Classify findings into:
143
143
  - 🔴 **Blocking** → must fix
@@ -152,10 +152,10 @@ Triage also marks each finding as `accepted` (real issue), `deferred` (real but
152
152
  🔍 Review Complete · PR #1250 · 3 files +120 -45
153
153
  | Model | Verdict | Blocking | Important | Suggestion |
154
154
  |----------|-----------|----------|-----------|------------|
155
- | Opus | approved | 0 | 1 | 3 |
155
+ | Fable | approved | 0 | 1 | 3 |
156
156
  | Sonnet | rejected | 1 | 2 | 5 |
157
157
 
158
- Consensus: ⚠ DISAGREEMENT - see Opus triage
158
+ Consensus: ⚠ DISAGREEMENT - see Fable triage
159
159
  ```
160
160
 
161
161
  This summary ALWAYS prints, regardless of input mode. The chat is the live conversation; on the PR side, the durable artifacts are inline comments + the review state (Step 7).
@@ -58,7 +58,7 @@ Run every step automatically:
58
58
  Step 0: FIGMA_SYNC SKIP (deprecated - feedback_figma_source_deprecated)
59
59
  Step 1: PLATFORM Detect macOS / Linux / Windows (Git Bash / WSL); export PLATFORM env
60
60
  Step 1.5: DETECT Compare timestamps, find stale targets
61
- Step 2: COPILOT Claude Code -> Copilot CLI (instructions + 34 sub-command skills)
61
+ Step 2: COPILOT Claude Code -> Copilot CLI (instructions + 35 sub-command skills)
62
62
  Step 3: REPO Claude Code -> pipeline repo (genericized, personal data scrub, bash -n on all sh)
63
63
  Step 3c: PLUGINS pipeline shared/external -> multi-agent-plugins marketplace (rebuild knowledge/,
64
64
  bump changed plugins' patch version, commit + push the plugins repo)
@@ -277,11 +277,11 @@ This runs on the Claude <-> Copilot axis — the two CLIs the pipeline supports
277
277
  |-------------|-------------|
278
278
  | `~/.claude/commands/multi-agent/{cmd}.md` | `~/.copilot/skills/multi-agent-{cmd}/SKILL.md` |
279
279
 
280
- **34 commands are synced** (canonical inventory - must match `cross-cli-contract.md` section 1; drift = contract violation):
280
+ **35 commands are synced** (canonical inventory - must match `cross-cli-contract.md` section 1; drift = contract violation):
281
281
 
282
282
  ```
283
283
  analysis, analysis-resolve, autopilot, build-optimize, channels, delete, dev,
284
- dev-autopilot, dev-local, dev-local-autopilot, diff-explain, garbage-collect,
284
+ dev-autopilot, dev-local, dev-local-autopilot, diff-explain, finish, garbage-collect,
285
285
  help, issue, jira, kill, language, local, local-autopilot, log, manual-test,
286
286
  prune-logs, purge, refactor, resume, review, scan, search, setup, stack, status,
287
287
  sync, test, update
@@ -1,5 +1,5 @@
1
1
  ---
2
- description: "Task orchestrator - full pipeline via Jira ID + branch or GitHub Issue URL: analysis, plan, TDD development, parallel review + Opus triage (CLI-aware: 2-model on Claude Code, 3-model on Copilot CLI), commit, log"
2
+ description: "Task orchestrator - full pipeline via Jira ID + branch or GitHub Issue URL: analysis, plan, TDD development, parallel review + Fable triage (CLI-aware: 2-model on Claude Code, 3-model on Copilot CLI), commit, log"
3
3
  allowed-tools: Agent, Bash, Read, Write, Edit, Glob, Grep, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion, WebFetch, WebSearch, NotebookEdit, Skill
4
4
  ---
5
5
 
@@ -140,14 +140,14 @@ This command uses lazy loading for token efficiency. Read the relevant sub-file
140
140
  - Multiple stacks -> load all relevant guides
141
141
 
142
142
  **Agent definitions** (used in Phase 1 and Phase 4):
143
- - `$HOME/.claude/agents/code-reviewer.md` - Phase 4 reviewer persona (`preferredModel: opus`; Phase 4 overrides Reviewer 3 to `sonnet`)
143
+ - `$HOME/.claude/agents/code-reviewer.md` - Phase 4 reviewer persona (`preferredModel: fable`; Phase 4 overrides Reviewer 3 to `sonnet`)
144
144
  - `$HOME/.claude/agents/explorer.md` - Phase 1 codebase scan persona (`preferredModel: sonnet` - scan work, cost-efficient)
145
- - `$HOME/.claude/agents/ios-architect.md` - iOS architecture review (`preferredModel: opus`)
146
- - `$HOME/.claude/agents/android-architect.md` - Android architecture review (`preferredModel: opus`)
147
- - `$HOME/.claude/agents/backend-architect.md` - Backend/API architecture review (`preferredModel: opus`)
145
+ - `$HOME/.claude/agents/ios-architect.md` - iOS architecture review (`preferredModel: fable`)
146
+ - `$HOME/.claude/agents/android-architect.md` - Android architecture review (`preferredModel: fable`)
147
+ - `$HOME/.claude/agents/backend-architect.md` - Backend/API architecture review (`preferredModel: fable`)
148
148
  - `$HOME/.claude/agents/security-auditor.md` - Security audit (`preferredModel: opus`)
149
149
 
150
- **Per-persona model routing:** Before each Agent dispatch, the orchestrator reads `preferredModel` from the persona file and exports `CLAUDE_CODE_SUBAGENT_MODEL` (Claude Code) / passes `--model` (Copilot CLI). Precedence: per-dispatch `PHASE_MODEL_OVERRIDE` > persona `preferredModel` > `opus`. Full contract: `skills/shared/core/multi-agent/SKILL.md#agent-dispatch--per-persona-model-routing-v610`.
150
+ **Per-persona model routing:** Before each Agent dispatch, the orchestrator reads `preferredModel` from the persona file and exports `CLAUDE_CODE_SUBAGENT_MODEL` (Claude Code) / passes `--model` (Copilot CLI). Precedence: per-dispatch `PHASE_MODEL_OVERRIDE` > persona `preferredModel` > `fable` (falls back per `refs/features/model-fallback.md`). Full contract: `skills/shared/core/multi-agent/SKILL.md#agent-dispatch--per-persona-model-routing-v610`.
151
151
 
152
152
  ---
153
153
 
@@ -247,7 +247,7 @@ When called with `review`:
247
247
  1. Detect current branch and project from cwd (or ask)
248
248
  2. Get diff: `git diff HEAD` (unstaged + staged)
249
249
  3. If no diff, get diff against base branch: `git diff origin/{baseBranch}...HEAD`
250
- 4. Launch Phase 4 review (parallel + Opus triage - 2-model on Claude Code, 3-model on Copilot CLI) on the diff
250
+ 4. Launch Phase 4 review (parallel + Fable triage - 2-model on Claude Code, 3-model on Copilot CLI) on the diff
251
251
  5. No worktree, no state file - lightweight one-shot review
252
252
  6. Print findings to terminal
253
253
 
@@ -2,11 +2,8 @@
2
2
  #
3
3
  # plan-todos.sh - manage the Phase 2 plan as a live Todo list.
4
4
  #
5
- # Pattern source:
6
- # - Windsurf Cascade - https://docs.windsurf.com/windsurf/cascade
7
- # "renders a Todo list inside the conversation that updates as it works"
8
- # - Cursor Plan Mode - https://cursor.com/docs/agent/planning
9
- # "creates a detailed, reviewable, editable plan before writing any code"
5
+ # The Phase 2 plan is broken into a live, reviewable Todo list that updates
6
+ # step-by-step as Phase 3 works through it.
10
7
  #
11
8
  # State lives in `agent-state.json` under `.plan.todos[]` per
12
9
  # pipeline/schemas/plan-todos.schema.json. Phase 2 (Planning) emits the
@@ -137,7 +137,7 @@ render_inline_body() {
137
137
  printf '_%s_\n\n' "$rule_id"
138
138
  fi
139
139
  printf -- '---\n🤖 _Multi-Agent Review · iteration #%s_\n' "$ITERATION"
140
- # Dedupe marker - Bugbot-style. Re-runs of /multi-agent:review skip a
140
+ # Dedupe marker - dedupe-style. Re-runs of /multi-agent:review skip a
141
141
  # finding when an existing comment carries the same fingerprint.
142
142
  if [ -n "$fingerprint" ]; then
143
143
  printf '<!-- multi-agent-finding: %s -->\n' "$fingerprint"
@@ -229,7 +229,7 @@ post_github() {
229
229
  rc_body="See inline comments above."
230
230
  fi
231
231
 
232
- # Dedupe gate - read pref, default ON (Bugbot-style). Loads existing
232
+ # Dedupe gate - read pref, default ON (dedupe-style). Loads existing
233
233
  # comments once if needed; per-finding check is in-process.
234
234
  local DEDUPE_ENABLED
235
235
  DEDUPE_ENABLED=$(jq -r '.global.review.dedupeInlineComments // true' \
@@ -3,12 +3,8 @@
3
3
  # review-watch.sh - poll watched GitHub repos for incoming PRs and
4
4
  # dispatch `/multi-agent:review` on each new/updated PR.
5
5
  #
6
- # Pattern source:
7
- # - Cursor Bugbot - https://cursor.com/docs/bugbot
8
- # "Bugbot automatically reviews every PR update; reads existing PR
9
- # comments to avoid duplicate feedback."
10
- # - Devin Review - https://docs.devin.ai/work-with-devin/devin-review
11
- # "Auto-Review on PR open / new commit / draft-ready / reviewer-add."
6
+ # Auto-reviews every new/updated PR the user did not author, reading existing
7
+ # PR comments to avoid duplicate feedback.
12
8
  #
13
9
  # What it does:
14
10
  # - Reads watched repos from prefs.global.reviewWatch.repos[] (or --repos).
@@ -2,11 +2,9 @@
2
2
  #
3
3
  # shadow-git.sh - per-tool-call checkpoints in a separate git repo.
4
4
  #
5
- # Pattern source:
6
- # - Cline checkpoints - https://docs.cline.bot/features/checkpoints
7
- # "Cline maintains a shadow Git repository separate from your project's
8
- # actual Git history... After each tool use, Cline commits the current
9
- # state of your files to this shadow repo."
5
+ # A shadow git repo, separate from the project's real .git, records a snapshot
6
+ # of the working tree after each tool use so a sub-phase can be rolled back
7
+ # without polluting the user's semantic commit history.
10
8
  #
11
9
  # Why a SHADOW repo (not the real .git): the real git tree holds the user's
12
10
  # semantic commits - clean history, intentional messages. Shadow snapshots
@@ -183,7 +183,7 @@
183
183
  "planEditRequests": {
184
184
  "type": "array",
185
185
  "items": { "type": "string" },
186
- "description": "v5.3.0 Phase 2 - free-text edit instructions the user typed between plan renders. Preserved verbatim for audit; Opus parses them conversationally to revise the plan."
186
+ "description": "v5.3.0 Phase 2 - free-text edit instructions the user typed between plan renders. Preserved verbatim for audit; the planning model (Fable top tier) parses them conversationally to revise the plan."
187
187
  }
188
188
  }
189
189
  }
@@ -2,7 +2,7 @@
2
2
  "$schema": "http://json-schema.org/draft-07/schema#",
3
3
  "$id": "https://example.com/pipeline/clarify-output.schema.json",
4
4
  "title": "Clarification Output",
5
- "description": "Schema for Phase 0 Step 9 task-clarifier sub-agent output. Source: pipeline/agents/task-clarifier.md. Pattern reference: Devin clarifying-question loop (https://docs.devin.ai/work-with-devin/devin-review) and Cursor Plan Mode (https://cursor.com/docs/agent/planning).",
5
+ "description": "Schema for Phase 0 Step 9 task-clarifier sub-agent output. Source: pipeline/agents/task-clarifier.md. A clarifying-question loop that runs before planning.",
6
6
  "type": "object",
7
7
  "additionalProperties": false,
8
8
  "required": ["clarityScore", "questions", "stopAndAsk"],
@@ -2,7 +2,7 @@
2
2
  "$schema": "http://json-schema.org/draft-07/schema#",
3
3
  "$id": "https://example.com/pipeline/plan-todos.schema.json",
4
4
  "title": "Plan Todo List",
5
- "description": "Structured representation of Phase 2's plan. Persists into agent-state under `.plan.todos[]` and survives across phases. Phase 3 (Dev) iterates step-by-step; Phase 4 (Review) verifies completed steps against criteria; Phase 7 (Report) renders a per-step rollup. Pattern source: Windsurf Cascade (https://docs.windsurf.com/windsurf/cascade) renders a live Todo list inside the conversation; Cursor Plan Mode (https://cursor.com/docs/agent/planning) builds an editable plan before any code.",
5
+ "description": "Structured representation of Phase 2's plan. Persists into agent-state under `.plan.todos[]` and survives across phases. Phase 3 (Dev) iterates step-by-step; Phase 4 (Review) verifies completed steps against criteria; Phase 7 (Report) renders a per-step rollup. The plan is broken into a live, structured Todo list for step-by-step tracking across phases.",
6
6
  "type": "object",
7
7
  "additionalProperties": false,
8
8
  "required": ["title", "todos"],
@@ -709,7 +709,7 @@
709
709
  "dedupeInlineComments": {
710
710
  "type": "boolean",
711
711
  "default": true,
712
- "description": "Bugbot/Devin Review behaviour: before posting an inline comment, scan existing PR comments for a stable fingerprint marker (sha-16 of path|line|issue). If a comment with the same marker exists, skip - preserves audit trail without flooding the PR on re-runs. Provider-agnostic: GitHub /pulls/{n}/comments + /issues/{n}/comments and Bitbucket Server /pull-requests/{id}/activities?fromType=COMMENT are both checked. Set to false to restore the pre-v8.6 behavior (every run posts fresh comments, original spec)."
712
+ "description": "Deduplicated PR review comments: before posting an inline comment, scan existing PR comments for a stable fingerprint marker (sha-16 of path|line|issue). If a comment with the same marker exists, skip - preserves audit trail without flooding the PR on re-runs. Provider-agnostic: GitHub /pulls/{n}/comments + /issues/{n}/comments and Bitbucket Server /pull-requests/{id}/activities?fromType=COMMENT are both checked. Set to false to restore the pre-v8.6 behavior (every run posts fresh comments, original spec)."
713
713
  }
714
714
  }
715
715
  },
@@ -727,7 +727,7 @@
727
727
  "type": "string",
728
728
  "enum": ["per-tool-call", "per-todo-step", "off"],
729
729
  "default": "per-todo-step",
730
- "description": "Snapshot frequency. per-tool-call mirrors Cline (one snapshot after each Edit/Write/Bash mutation); per-todo-step snapshots once per plan-todos.sh step boundary (cheaper, recommended). off is equivalent to enabled=false."
730
+ "description": "Snapshot frequency. per-tool-call snapshots after each Edit/Write/Bash mutation; per-todo-step snapshots once per plan-todos.sh step boundary (cheaper, recommended). off is equivalent to enabled=false."
731
731
  },
732
732
  "pruneAfterDays": {
733
733
  "type": "integer",
@@ -741,7 +741,7 @@
741
741
  "planTodos": {
742
742
  "type": "object",
743
743
  "additionalProperties": false,
744
- "description": "v8.6+ - Plan-as-live-Todo-list. Phase 2 emits agent-state.plan.todos[] conforming to pipeline/schemas/plan-todos.schema.json; Phase 3 iterates step-by-step via pipeline/lib/plan-todos.sh next/start/complete; Phase 7 renders the rollup into agent-log.md and the PR body. Pattern source: Windsurf Cascade (https://docs.windsurf.com/windsurf/cascade) Todo list inside the conversation; Cursor Plan Mode (https://cursor.com/docs/agent/planning) reviewable plan. Off by default - opt in to add status-transition writes per Phase 3 step in exchange for sub-step visibility + per-step notes.",
744
+ "description": "v8.6+ - Plan-as-live-Todo-list. Phase 2 emits agent-state.plan.todos[] conforming to pipeline/schemas/plan-todos.schema.json; Phase 3 iterates step-by-step via pipeline/lib/plan-todos.sh next/start/complete; Phase 7 renders the rollup into agent-log.md and the PR body. The plan is broken into a live, structured Todo list. Off by default - opt in to add status-transition writes per Phase 3 step in exchange for sub-step visibility + per-step notes.",
745
745
  "properties": {
746
746
  "enabled": {
747
747
  "type": "boolean",
@@ -753,7 +753,7 @@
753
753
  "clarifyAmbiguous": {
754
754
  "type": "object",
755
755
  "additionalProperties": false,
756
- "description": "v8.6+ - Phase 0 Step 9 clarifying-question loop. Before Phase 1 starts, a cheap Haiku classifier scores task ambiguity (0-10) and emits up to N questions if score < threshold. Pattern source: Devin Knowledge / Ask Devin (https://docs.devin.ai/work-with-devin/devin-review) and Cursor Plan Mode clarifying questions (https://cursor.com/docs/agent/planning). Cost: ~$0.0025 per Haiku call. Off by default - flip on for teams burned by ambiguity-driven rework or when working on cross-team issues where the spec lives in someone else's head.",
756
+ "description": "v8.6+ - Phase 0 Step 9 clarifying-question loop. Before Phase 1 starts, a cheap Haiku classifier scores task ambiguity (0-10) and emits up to N questions if score < threshold. Cost: ~$0.0025 per Haiku call. Off by default - flip on for teams burned by ambiguity-driven rework or when working on cross-team issues where the spec lives in someone else's head.",
757
757
  "properties": {
758
758
  "enabled": {
759
759
  "type": "boolean",
@@ -831,16 +831,16 @@
831
831
  },
832
832
  "pricingModel": {
833
833
  "type": "string",
834
- "enum": ["opus", "sonnet", "haiku"],
835
- "default": "opus",
836
- "description": "Which cost-table.json rate to price accumulated tokens at. Defaults to opus for a deliberately conservative (upper-bound) estimate, so the ceiling trips early rather than late."
834
+ "enum": ["fable", "opus", "sonnet", "haiku"],
835
+ "default": "fable",
836
+ "description": "Which cost-table.json rate to price accumulated tokens at. Defaults to fable (the top tier since v10.6.0) for a deliberately conservative (upper-bound) estimate, so the ceiling trips early rather than late."
837
837
  }
838
838
  }
839
839
  },
840
840
  "reviewWatch": {
841
841
  "type": "object",
842
842
  "additionalProperties": false,
843
- "description": "v8.6+ - Auto-review incoming PRs via gh CLI polling. Inspired by Cursor Bugbot (https://cursor.com/docs/bugbot) and Devin Review (https://docs.devin.ai/work-with-devin/devin-review): trigger /multi-agent:review on PRs the user did NOT author. Disabled by default. Configure repos via .repos[] then either run pipeline/lib/review-watch.sh --watch as a background process or schedule it via cron.",
843
+ "description": "v8.6+ - Auto-review incoming PRs via gh CLI polling. Auto-triggers /multi-agent:review on PRs the user did NOT author. Disabled by default. Configure repos via .repos[] then either run pipeline/lib/review-watch.sh --watch as a background process or schedule it via cron.",
844
844
  "properties": {
845
845
  "enabled": {
846
846
  "type": "boolean",
@@ -19,7 +19,7 @@
19
19
  },
20
20
  "reviewer": {
21
21
  "type": "string",
22
- "description": "Model label for this output (e.g. 'opus', 'sonnet', 'gpt'). Present once the parallel reviewer outputs are merged into the Phase 4 array so triage/consensus can attribute each finding to its source. Optional on a single reviewer's raw pre-merge output."
22
+ "description": "Model label for this output (e.g. 'fable', 'opus', 'sonnet', 'gpt'). Present once the parallel reviewer outputs are merged into the Phase 4 array so triage/consensus can attribute each finding to its source. Optional on a single reviewer's raw pre-merge output."
23
23
  }
24
24
  },
25
25
  "$defs": {
@@ -74,8 +74,8 @@
74
74
  },
75
75
  "reviewer": {
76
76
  "type": "string",
77
- "enum": ["opus", "sonnet"],
78
- "description": "Which reviewer produced the raw finding. Haiku was removed in v2.1.0."
77
+ "enum": ["fable", "opus", "sonnet", "gpt"],
78
+ "description": "Which reviewer produced the raw finding. Claude Code Reviewer 1 is fable (opus when fallback engages); Copilot CLI adds gpt. Haiku was removed in v2.1.0."
79
79
  },
80
80
  "consensus": {
81
81
  "type": "object",
@@ -64,12 +64,11 @@ Installed into `~/.claude/scripts/` and invoked by settings.json hook configurat
64
64
  - `pre-push-check.sh` - runs before `git push` (smoke-cross-cli-behavior + smoke-personal-data)
65
65
  - `output-quality-check.sh` - runs after PR body / Jira comment generation (newline / HTML entity guard)
66
66
 
67
- ## Runtime helpers (13 files)
67
+ ## Runtime helpers
68
68
  Shell scripts invoked during pipeline execution.
69
69
 
70
70
  - `phase-banner.sh` - renders phase headers
71
71
  - `phase-tracker.sh` - live tracker state + tokens accumulation + render
72
- - `stack-swap.sh` - stack detection + skill set swap
73
72
  - `keychain-save.sh` - store PAT in macOS Keychain
74
73
  - `audit-log.sh` + `audit-log-rotate.sh` - opt-in audit trail
75
74
  - `log-metric.sh` - opt-in metric capture
@@ -66,7 +66,7 @@ if (flags.help || flags.h) {
66
66
  }
67
67
 
68
68
  // --- resolve config: prefs first, CLI overrides -----------------------------
69
- const cfg = { enabled: false, maxUsd: 5.0, warnPct: 80, onExceed: "warn", pricingModel: "opus" };
69
+ const cfg = { enabled: false, maxUsd: 5.0, warnPct: 80, onExceed: "warn", pricingModel: "fable" };
70
70
 
71
71
  if (flags.prefs) {
72
72
  if (!existsSync(flags.prefs)) die(`prefs file not found: ${flags.prefs}`);
@@ -2,6 +2,13 @@
2
2
  "_readme": "Per-model unit prices in USD per million tokens. Source: Anthropic public pricing (verified 2026-04-21). Update when Anthropic publishes new tiers. Unknown models render USD as ' - ' and emit a footnote - never block PR-body generation. cacheReadPerMtok is the discounted rate for prompt-cache hits (~10% of inPerMtok); the renderer prices a phase's tokens_cached at this rate when the tracker records it, so resume/cache reuse is visible in the ledger.",
3
3
  "schemaVersion": "1.1.0",
4
4
  "prices": {
5
+ "fable": {
6
+ "inPerMtok": 10.0,
7
+ "outPerMtok": 50.0,
8
+ "cacheReadPerMtok": 1.0,
9
+ "modelId": "claude-fable-5",
10
+ "note": "Top tier (restored v10.6.0) - architects, Reviewer 1, triage. Verified against Anthropic pricing 2026-07-02."
11
+ },
5
12
  "opus": {
6
13
  "inPerMtok": 5.0,
7
14
  "outPerMtok": 25.0,
@@ -1,16 +1,16 @@
1
1
  .claude/CLAUDE.md 1
2
2
  .claude/agents 8
3
- .claude/commands 87
3
+ .claude/commands 88
4
4
  .claude/lib 23
5
5
  .claude/multi-agent-preferences.json 1
6
6
  .claude/rules 12
7
7
  .claude/schemas 23
8
- .claude/scripts 174
8
+ .claude/scripts 167
9
9
  .claude/settings.json 1
10
- .claude/skills 555
10
+ .claude/skills 560
11
11
  .copilot/agents 8
12
12
  .copilot/copilot-instructions.md 1
13
13
  .copilot/lib 23
14
14
  .copilot/schemas 23
15
- .copilot/scripts 174
16
- .copilot/skills 590
15
+ .copilot/scripts 167
16
+ .copilot/skills 596
@@ -8,7 +8,7 @@
8
8
  # 4. State directory is created
9
9
  # 5. prefs schema exposes global.reviewWatch.{enabled,repos,intervalSeconds,labelFilter}
10
10
  # 6. global.reviewWatch.enabled defaults to false (opt-in)
11
- # 7. global.review.dedupeInlineComments default is true (Bugbot parity)
11
+ # 7. global.review.dedupeInlineComments default is true (dedupe parity)
12
12
  # 8. post-pr-review.sh exposes finding_fingerprint + comment_exists_with_fingerprint
13
13
  # 9. render_inline_body accepts a 5th fingerprint arg and embeds the marker
14
14
  # 10. Unknown command exits non-zero
@@ -81,7 +81,7 @@ fi
81
81
  # 7. dedupeInlineComments default true
82
82
  if jq -e '.properties.global.properties.review.properties.dedupeInlineComments
83
83
  | has("default") and .default == true' "$SCHEMA" >/dev/null 2>&1; then
84
- record_pass "review.dedupeInlineComments defaults to true (Bugbot parity)"
84
+ record_pass "review.dedupeInlineComments defaults to true (dedupe parity)"
85
85
  else
86
86
  record_fail "review.dedupeInlineComments should default to true"
87
87
  fi