@event4u/agent-config 2.11.0 → 2.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/.agent-src/commands/council/analysis.md +142 -0
  2. package/.agent-src/commands/council/debate.md +129 -0
  3. package/.agent-src/commands/council/default.md +8 -0
  4. package/.agent-src/commands/council/design.md +16 -12
  5. package/.agent-src/commands/council/optimize.md +16 -15
  6. package/.agent-src/commands/council/pr.md +12 -12
  7. package/.agent-src/commands/council.md +48 -2
  8. package/.agent-src/personas/advisors/contrarian.md +95 -0
  9. package/.agent-src/personas/advisors/executor.md +99 -0
  10. package/.agent-src/personas/advisors/expansionist.md +98 -0
  11. package/.agent-src/personas/advisors/first-principles.md +98 -0
  12. package/.agent-src/personas/advisors/outsider.md +102 -0
  13. package/.agent-src/rules/copilot-routing.md +19 -0
  14. package/.agent-src/rules/devcontainer-routing.md +20 -0
  15. package/.agent-src/rules/laravel-routing.md +20 -0
  16. package/.agent-src/rules/symfony-routing.md +20 -0
  17. package/.agent-src/skills/ai-council/SKILL.md +180 -2
  18. package/.agent-src/skills/canvas-design/SKILL.md +132 -0
  19. package/.agent-src/skills/canvas-design/evals/triggers.json +16 -0
  20. package/.agent-src/skills/copilot-config/SKILL.md +1 -1
  21. package/.agent-src/skills/devcontainer/SKILL.md +1 -1
  22. package/.agent-src/skills/doc-coauthoring/SKILL.md +129 -0
  23. package/.agent-src/skills/doc-coauthoring/evals/triggers.json +16 -0
  24. package/.agent-src/skills/laravel/SKILL.md +1 -1
  25. package/.agent-src/skills/project-analysis-core/SKILL.md +1 -1
  26. package/.agent-src/skills/project-analyzer/SKILL.md +1 -1
  27. package/.agent-src/skills/skill-writing/SKILL.md +101 -16
  28. package/.agent-src/skills/sql-writing/SKILL.md +1 -1
  29. package/.agent-src/skills/symfony-workflow/SKILL.md +1 -1
  30. package/.agent-src/skills/universal-project-analysis/SKILL.md +1 -1
  31. package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
  32. package/.claude-plugin/marketplace.json +5 -1
  33. package/AGENTS.md +1 -1
  34. package/CHANGELOG.md +78 -0
  35. package/CONTRIBUTING.md +5 -0
  36. package/README.md +3 -3
  37. package/config/agent-settings.template.yml +5 -84
  38. package/docs/architecture/multi-tool-projection.md +53 -0
  39. package/docs/architecture/{compression.md → source-projection.md} +21 -3
  40. package/docs/architecture.md +6 -6
  41. package/docs/catalog.md +21 -11
  42. package/docs/contracts/adr-architectural-consensus-mechanism.md +67 -0
  43. package/docs/contracts/adr-level-6-productization.md +2 -2
  44. package/docs/contracts/ai-council-config.md +186 -0
  45. package/docs/contracts/command-clusters.md +57 -1
  46. package/docs/contracts/multi-tool-projection-fidelity.md +109 -0
  47. package/docs/getting-started.md +2 -2
  48. package/package.json +1 -1
  49. package/scripts/_archive/README.md +59 -0
  50. package/scripts/ai_council/_default_prices.py +10 -1
  51. package/scripts/ai_council/advisors.py +148 -0
  52. package/scripts/ai_council/clients.py +189 -4
  53. package/scripts/ai_council/config.py +368 -0
  54. package/scripts/ai_council/consensus.py +290 -0
  55. package/scripts/ai_council/orchestrator.py +634 -16
  56. package/scripts/ai_council/prompts.py +335 -0
  57. package/scripts/check_compressed_paths.py +6 -1
  58. package/scripts/check_references.py +25 -0
  59. package/scripts/ci_time_ratio.py +168 -0
  60. package/scripts/council_cli.py +1007 -32
  61. package/scripts/measure_projection_bytes.py +159 -0
  62. package/scripts/measure_roadmap_trajectory.py +112 -0
  63. package/scripts/probe_projection_fidelity.py +202 -0
  64. package/scripts/run_skill_evals.py +185 -0
  65. package/scripts/schemas/skill.schema.json +4 -0
  66. package/scripts/score_skill_selection.py +198 -0
  67. package/scripts/skill_collision_clusters.py +162 -0
  68. package/scripts/skill_linter.py +71 -1
  69. /package/scripts/{_backfill_skill_domains.py → _archive/_backfill_skill_domains.py} +0 -0
  70. /package/scripts/{_bootstrap_tier_frontmatter.py → _archive/_bootstrap_tier_frontmatter.py} +0 -0
  71. /package/scripts/{_p43_bodies.py → _archive/_p43_bodies.py} +0 -0
  72. /package/scripts/{_p43_compress.py → _archive/_p43_compress.py} +0 -0
  73. /package/scripts/{_p4_migrate.py → _archive/_p4_migrate.py} +0 -0
  74. /package/scripts/{_phase2_shim_helper.py → _archive/_phase2_shim_helper.py} +0 -0
  75. /package/scripts/{_pilot_council_question.py → _archive/_pilot_council_question.py} +0 -0
@@ -24,6 +24,7 @@ import yaml
24
24
 
25
25
  REPO_ROOT = Path(__file__).resolve().parents[1]
26
26
  SETTINGS_FILE = REPO_ROOT / ".agent-settings.yml"
27
+ AI_COUNCIL_FILE = REPO_ROOT / "agents" / ".ai-council.yml"
27
28
 
28
29
  sys.path.insert(0, str(REPO_ROOT))
29
30
 
@@ -31,14 +32,26 @@ from scripts.ai_council.bundler import ( # noqa: E402
31
32
  BundleTooLarge, bundle_prompt, bundle_roadmap,
32
33
  )
33
34
  from scripts.ai_council.clients import ( # noqa: E402
34
- AnthropicClient, CouncilResponse, ExternalAIClient, ManualClient,
35
- OpenAIClient, load_anthropic_key, load_openai_key,
35
+ DEFAULT_MAX_TOKENS, UNLIMITED_TOKENS_FALLBACK,
36
+ AnthropicClient, CouncilResponse, ExternalAIClient, GeminiClient,
37
+ ManualClient, OpenAIClient, PerplexityClient, XAIClient,
38
+ load_anthropic_key, load_openai_key,
39
+ )
40
+ from scripts.ai_council.advisors import ( # noqa: E402
41
+ AdvisorPlan, build_persona_labels, plan_advisor_swap,
42
+ )
43
+ from scripts.ai_council.config import ( # noqa: E402
44
+ AdvisorConfig, CouncilConfig, CouncilConfigError,
45
+ load_council_config, resolve_api_key,
36
46
  )
37
47
  from scripts.ai_council.modes import ( # noqa: E402
38
48
  InvalidModeError, resolve_mode,
39
49
  )
40
50
  from scripts.ai_council.orchestrator import ( # noqa: E402
41
- CostBudget, CouncilQuestion, consult, estimate, render,
51
+ ConsensusResult,
52
+ CostBudget, CouncilQuestion, DebateCapExceeded, DebateCheckpoint,
53
+ PeerReviewResult, consult, estimate, render,
54
+ run_consensus_scoring, run_debate, run_peer_review,
42
55
  )
43
56
  from scripts.ai_council.pricing import ( # noqa: E402
44
57
  PriceTable, estimate_cost, load_prices,
@@ -47,21 +60,92 @@ from scripts.ai_council.project_context import detect_project_context # noqa: E
47
60
 
48
61
  SCHEMA_VERSION = 1
49
62
 
63
+ #: Provider names accepted under `mode=api`. Mirrors the routing table
64
+ #: in ``_construct_api_member``; both must stay in sync.
65
+ _API_PROVIDERS = frozenset({"anthropic", "openai", "gemini", "xai", "perplexity"})
66
+
50
67
 
51
68
  class CouncilDisabledError(RuntimeError):
52
69
  """Raised when ai_council.enabled is false or no member is enabled."""
53
70
 
54
71
 
55
- def load_settings(path: Path = SETTINGS_FILE) -> dict[str, Any]:
72
+ def load_settings(
73
+ path: Path = SETTINGS_FILE,
74
+ *,
75
+ ai_council_path: Path = AI_COUNCIL_FILE,
76
+ ) -> dict[str, Any]:
56
77
  """Load merged settings via the centralized loader.
57
78
 
58
79
  road-to-portable-dev-preferences P3 migration: tolerance contract
59
80
  (missing file / malformed YAML / no PyYAML) is handled uniformly by
60
81
  ``load_agent_settings``. ``ai_council.*`` keys are not whitelisted,
61
82
  so the project file remains authoritative for council config.
83
+
84
+ Step-2 council-redesign overlay: when ``agents/.ai-council.yml``
85
+ exists it is the single source of truth — the validated config is
86
+ synthesized back into ``settings['ai_council']`` and wins over any
87
+ legacy block in ``.agent-settings.yml``. The pre-2 path stays alive
88
+ so the migration breadcrumb in ``.agent-settings.yml`` can ship
89
+ independently.
62
90
  """
63
91
  from scripts._lib.agent_settings import load_agent_settings
64
- return load_agent_settings(project_path=path)
92
+ settings = load_agent_settings(project_path=path)
93
+ if ai_council_path.exists():
94
+ cfg = load_council_config(ai_council_path)
95
+ settings["ai_council"] = _synthesize_ai_council_block(cfg)
96
+ return settings
97
+
98
+
99
+ def _synthesize_ai_council_block(cfg: CouncilConfig) -> dict[str, Any]:
100
+ """Project a validated ``CouncilConfig`` onto the legacy dict shape.
101
+
102
+ ``build_members`` and the ``_resolve_*`` helpers read the legacy
103
+ ``ai_council.*`` keys — keeping the projection identical means no
104
+ downstream caller changes. ``api_key_ref`` is carried through; raw
105
+ keys are never resolved here (resolution is lazy, per enabled
106
+ member, inside ``_construct_api_member``).
107
+ """
108
+ members: dict[str, dict[str, Any]] = {}
109
+ for name, m in cfg.members.items():
110
+ entry: dict[str, Any] = {"enabled": m.enabled, "model": m.model}
111
+ if m.api_key_ref is not None:
112
+ entry["api_key_ref"] = m.api_key_ref
113
+ if m.mode is not None:
114
+ entry["mode"] = m.mode
115
+ members[name] = entry
116
+ advisors: dict[str, dict[str, Any]] = {}
117
+ for name, a in cfg.advisors.items():
118
+ entry = {
119
+ "enabled": a.enabled,
120
+ "member": a.member,
121
+ "persona": a.persona,
122
+ }
123
+ if a.model is not None:
124
+ entry["model"] = a.model
125
+ advisors[name] = entry
126
+ return {
127
+ "enabled": cfg.enabled,
128
+ "mode": cfg.defaults.mode,
129
+ "min_rounds": cfg.defaults.min_rounds,
130
+ "deep_min_rounds": cfg.defaults.deep_min_rounds,
131
+ "max_output_tokens": cfg.defaults.max_output_tokens,
132
+ "session_retention_days": cfg.defaults.session_retention_days,
133
+ "debate_max_rounds": cfg.defaults.debate_max_rounds,
134
+ "cost_budget": {
135
+ "max_input_tokens": cfg.cost_budget.max_input_tokens,
136
+ "max_output_tokens": cfg.cost_budget.max_output_tokens,
137
+ "max_calls": cfg.cost_budget.max_calls,
138
+ "max_total_usd": cfg.cost_budget.max_total_usd,
139
+ },
140
+ "consensus_scoring": {
141
+ "enabled": cfg.consensus_scoring.enabled,
142
+ "strong_threshold": cfg.consensus_scoring.strong_threshold,
143
+ "minority_threshold": cfg.consensus_scoring.minority_threshold,
144
+ "lenses": list(cfg.consensus_scoring.lenses),
145
+ },
146
+ "members": members,
147
+ "advisors": advisors,
148
+ }
65
149
 
66
150
 
67
151
  def build_members(
@@ -137,12 +221,17 @@ def build_members(
137
221
  raise CouncilDisabledError(
138
222
  f"--siblings requires mode=api for member {name!r} (got {mode!r})."
139
223
  )
224
+ api_key_ref = cfg.get("api_key_ref")
140
225
  for sib_model in siblings[name]:
141
- members.append(_construct_api_member(name, sib_model))
226
+ members.append(
227
+ _construct_api_member(name, sib_model, api_key_ref=api_key_ref),
228
+ )
142
229
  continue
143
230
  model = overrides.get(name) or cfg.get("model")
144
- if mode == "api" and name in {"anthropic", "openai"}:
145
- members.append(_construct_api_member(name, model))
231
+ if mode == "api" and name in _API_PROVIDERS:
232
+ members.append(
233
+ _construct_api_member(name, model, api_key_ref=cfg.get("api_key_ref")),
234
+ )
146
235
  elif mode == "manual":
147
236
  members.append(ManualClient(name=name, model=model or "manual"))
148
237
  elif mode == "playwright":
@@ -151,7 +240,8 @@ def build_members(
151
240
  )
152
241
  else:
153
242
  raise CouncilDisabledError(
154
- f"member {name!r} has no transport — mode={mode}, name not in {{anthropic,openai}}."
243
+ f"member {name!r} has no transport — mode={mode}, "
244
+ f"name not in {sorted(_API_PROVIDERS)!r}."
155
245
  )
156
246
  if not members:
157
247
  raise CouncilDisabledError(
@@ -161,16 +251,124 @@ def build_members(
161
251
  return members
162
252
 
163
253
 
164
- def _construct_api_member(name: str, model: str | None) -> ExternalAIClient:
165
- """Build an api-mode client for a known provider name."""
254
+ def _build_advisor_plans(
255
+ ai_cfg: dict[str, Any],
256
+ repo_root: Path,
257
+ ) -> dict[str, AdvisorPlan]:
258
+ """Reconstruct AdvisorConfig from the projected dict, then plan swaps.
259
+
260
+ The legacy ``ai_council.advisors`` dict shape is the projection
261
+ written by ``_synthesize_ai_council_block``. Disabled advisors are
262
+ silently skipped by ``plan_advisor_swap``; one-per-provider is
263
+ enforced there. Returns empty when no advisor block is present.
264
+ """
265
+ raw = ai_cfg.get("advisors") if isinstance(ai_cfg, dict) else None
266
+ if not raw:
267
+ return {}
268
+ advisors: dict[str, AdvisorConfig] = {}
269
+ for name, entry in raw.items():
270
+ if not isinstance(entry, dict):
271
+ continue
272
+ advisors[name] = AdvisorConfig(
273
+ name=name,
274
+ enabled=bool(entry.get("enabled", False)),
275
+ member=str(entry.get("member", "")),
276
+ persona=str(entry.get("persona", "")),
277
+ model=entry.get("model"),
278
+ )
279
+ return plan_advisor_swap(advisors, repo_root)
280
+
281
+
282
+ def _advisor_model_overrides(
283
+ plans: dict[str, AdvisorPlan],
284
+ explicit: dict[str, str] | None,
285
+ ) -> dict[str, str]:
286
+ """Merge advisor model_overrides under explicit ``--model`` flags.
287
+
288
+ Explicit CLI ``--model`` overrides win over advisor-bound model
289
+ overrides — the user's flag is always authoritative.
290
+ """
291
+ merged: dict[str, str] = {}
292
+ for member, plan in plans.items():
293
+ if plan.model_override:
294
+ merged[member] = plan.model_override
295
+ if explicit:
296
+ merged.update(explicit)
297
+ return merged
298
+
299
+
300
+ def _format_advisor_summary(
301
+ plans: dict[str, AdvisorPlan],
302
+ members: list[ExternalAIClient],
303
+ ) -> str:
304
+ """Render the ``advisor: <persona> on <member> via <model>`` lines."""
305
+ if not plans:
306
+ return ""
307
+ member_models = {m.name: m.model for m in members}
308
+ rows: list[str] = []
309
+ for member, plan in plans.items():
310
+ model = member_models.get(member, plan.model_override or "?")
311
+ rows.append(
312
+ f" advisor: {plan.display_name} on {member} via {model}"
313
+ )
314
+ return "\n".join(rows)
315
+
316
+
317
+ def _construct_api_member(
318
+ name: str,
319
+ model: str | None,
320
+ *,
321
+ api_key_ref: str | None = None,
322
+ ) -> ExternalAIClient:
323
+ """Build an api-mode client for a known provider name.
324
+
325
+ ``api_key_ref`` carries the validated ``file:<path>`` / ``env:<VAR>``
326
+ reference from ``agents/.ai-council.yml`` and is resolved lazily here
327
+ so the council does not require keys for disabled providers. When
328
+ ``api_key_ref`` is ``None`` (no new config yet, or legacy code path),
329
+ fall back to the per-provider loaders so the pre-step-2
330
+ ``.agent-settings.yml`` flow keeps working during migration. Tests
331
+ monkeypatch the legacy loaders — that path stays intact.
332
+ """
166
333
  if name == "anthropic":
167
- return AnthropicClient(model=model or "claude-sonnet-4-5",
168
- api_key=load_anthropic_key())
334
+ api_key = (
335
+ resolve_api_key(api_key_ref, scope="ai_council.members.anthropic")
336
+ if api_key_ref else load_anthropic_key()
337
+ )
338
+ return AnthropicClient(model=model or "claude-sonnet-4-5", api_key=api_key)
169
339
  if name == "openai":
170
- return OpenAIClient(model=model or "gpt-4o",
171
- api_key=load_openai_key())
340
+ api_key = (
341
+ resolve_api_key(api_key_ref, scope="ai_council.members.openai")
342
+ if api_key_ref else load_openai_key()
343
+ )
344
+ return OpenAIClient(model=model or "gpt-4o", api_key=api_key)
345
+ if name == "gemini":
346
+ if not api_key_ref:
347
+ raise CouncilDisabledError(
348
+ "member 'gemini' requires api_key_ref in agents/.ai-council.yml "
349
+ "(e.g. `env:GEMINI_API_KEY`) — no legacy fallback."
350
+ )
351
+ api_key = resolve_api_key(api_key_ref, scope="ai_council.members.gemini")
352
+ return GeminiClient(model=model or "gemini-2.5-pro", api_key=api_key)
353
+ if name == "xai":
354
+ if not api_key_ref:
355
+ raise CouncilDisabledError(
356
+ "member 'xai' requires api_key_ref in agents/.ai-council.yml "
357
+ "(e.g. `env:XAI_API_KEY`) — no legacy fallback."
358
+ )
359
+ api_key = resolve_api_key(api_key_ref, scope="ai_council.members.xai")
360
+ return XAIClient(model=model or "grok-4", api_key=api_key)
361
+ if name == "perplexity":
362
+ if not api_key_ref:
363
+ raise CouncilDisabledError(
364
+ "member 'perplexity' requires api_key_ref in agents/.ai-council.yml "
365
+ "(e.g. `env:PERPLEXITY_API_KEY`) — no legacy fallback."
366
+ )
367
+ api_key = resolve_api_key(api_key_ref, scope="ai_council.members.perplexity")
368
+ return PerplexityClient(model=model or "sonar-pro", api_key=api_key)
172
369
  raise CouncilDisabledError(
173
- f"member {name!r} has no api transport (known: anthropic, openai)."
370
+ f"member {name!r} has no api transport "
371
+ f"(known: {sorted(_API_PROVIDERS)!r})."
174
372
  )
175
373
 
176
374
 
@@ -179,8 +377,16 @@ def build_question(
179
377
  input_path: Path,
180
378
  input_mode: str,
181
379
  max_tokens: int,
380
+ prompt_mode_override: str | None = None,
182
381
  ) -> tuple[CouncilQuestion, str]:
183
- """Bundle the input file. Returns (question, artefact_label)."""
382
+ """Bundle the input file. Returns (question, artefact_label).
383
+
384
+ `prompt_mode_override` swaps the per-mode neutrality addendum looked
385
+ up by `system_prompt_for(question.mode, ...)`. The bundle shape is
386
+ unchanged — the bundler still uses `input_mode` to format the
387
+ artefact. Routed by the `/council pr|design|optimize|analysis`
388
+ wrappers via the `--prompt-mode` CLI flag.
389
+ """
184
390
  if input_mode == "prompt":
185
391
  text = input_path.read_text(encoding="utf-8")
186
392
  ctx = bundle_prompt(text)
@@ -192,13 +398,19 @@ def build_question(
192
398
  raise ValueError(
193
399
  f"unsupported input mode: {input_mode!r} (use prompt | roadmap)"
194
400
  )
195
- return CouncilQuestion(mode=ctx.mode, user_prompt=ctx.text,
401
+ mode = prompt_mode_override or ctx.mode
402
+ return CouncilQuestion(mode=mode, user_prompt=ctx.text,
196
403
  max_tokens=max_tokens), artefact
197
404
 
198
405
 
199
406
  def format_estimate_table(
200
407
  members: list[ExternalAIClient],
201
408
  estimates: list[Any],
409
+ *,
410
+ consensus_delta_usd: float = 0.0,
411
+ consensus_extra_calls: int = 0,
412
+ peer_review_delta_usd: float = 0.0,
413
+ peer_review_extra_calls: int = 0,
202
414
  ) -> str:
203
415
  rows = [
204
416
  f" {m.name}/{m.model}: "
@@ -206,10 +418,204 @@ def format_estimate_table(
206
418
  for m, e in zip(members, estimates)
207
419
  ]
208
420
  total = sum(e.total_usd for e in estimates)
421
+ if consensus_extra_calls > 0:
422
+ rows.append(
423
+ f" +consensus scoring: +{consensus_extra_calls} calls "
424
+ f"(~+${consensus_delta_usd:.4f})"
425
+ )
426
+ total += consensus_delta_usd
427
+ if peer_review_extra_calls > 0:
428
+ rows.append(
429
+ f" +peer-review: +{peer_review_extra_calls} calls "
430
+ f"(~+${peer_review_delta_usd:.4f})"
431
+ )
432
+ total += peer_review_delta_usd
209
433
  rows.append(f" TOTAL: ${total:.4f}")
210
434
  return "\n".join(rows)
211
435
 
212
436
 
437
+ def _consensus_cost_delta(
438
+ ai_cfg: dict[str, Any],
439
+ prompt_mode: str,
440
+ estimates: list[Any],
441
+ n_billable: int,
442
+ ) -> tuple[int, float]:
443
+ """Return ``(extra_calls, extra_usd)`` for the consensus round.
444
+
445
+ Active when ``ai_council.consensus_scoring.enabled`` is true AND the
446
+ invocation's lens is in ``consensus_scoring.lenses``. Each member
447
+ contributes two extra calls (extraction + scoring); the worst-case
448
+ cost uses the base per-member estimate as a ceiling.
449
+ """
450
+ cs = ai_cfg.get("consensus_scoring") or {}
451
+ if not cs.get("enabled"):
452
+ return 0, 0.0
453
+ lenses = cs.get("lenses") or ["analysis"]
454
+ if prompt_mode not in lenses:
455
+ return 0, 0.0
456
+ extra_calls = 2 * n_billable
457
+ extra_usd = 2.0 * sum(e.total_usd for e in estimates)
458
+ return extra_calls, extra_usd
459
+
460
+
461
+ def _maybe_run_consensus(
462
+ ai_cfg: dict[str, Any],
463
+ question: CouncilQuestion,
464
+ members: list[ExternalAIClient],
465
+ responses: list[CouncilResponse],
466
+ budget: CostBudget,
467
+ table: PriceTable,
468
+ project: Any,
469
+ args: argparse.Namespace,
470
+ ) -> ConsensusResult | None:
471
+ """Run the consensus scoring round when enabled for this lens."""
472
+ cs = ai_cfg.get("consensus_scoring") or {}
473
+ if not cs.get("enabled"):
474
+ return None
475
+ lenses = cs.get("lenses") or ["analysis"]
476
+ if question.mode not in lenses:
477
+ return None
478
+ return run_consensus_scoring(
479
+ members, responses,
480
+ budget=budget, table=table, project=project,
481
+ original_ask=args.original_ask,
482
+ max_tokens=question.max_tokens,
483
+ strong_threshold=float(cs.get("strong_threshold", 0.7)),
484
+ minority_threshold=float(cs.get("minority_threshold", 0.4)),
485
+ )
486
+
487
+
488
+ def _serialise_consensus(consensus: ConsensusResult) -> dict[str, Any]:
489
+ """Project ConsensusResult onto a JSON-safe dict for session payloads."""
490
+ return {
491
+ "findings": [
492
+ {"id": f.id, "source": f.source, "text": f.text}
493
+ for f in consensus.findings
494
+ ],
495
+ "scores": [
496
+ {
497
+ "finding_id": s.finding_id, "scorer": s.scorer,
498
+ "score": s.score, "agree": s.agree, "reason": s.reason,
499
+ }
500
+ for s in consensus.scores
501
+ ],
502
+ "metadata": {
503
+ fid: {
504
+ "mean_score": m.mean_score,
505
+ "agreement_rate": m.agreement_rate,
506
+ "consensus_strength": m.consensus_strength,
507
+ "dissent_count": m.dissent_count,
508
+ "scorers": list(m.scorers),
509
+ }
510
+ for fid, m in consensus.metadata.items()
511
+ },
512
+ "extraction_responses": _serialise_responses(consensus.extraction_responses),
513
+ "scoring_responses": _serialise_responses(consensus.scoring_responses),
514
+ }
515
+
516
+
517
+ # ── peer-review (Phase 5 / F1, Karpathy anonymous review) ──────────
518
+
519
+
520
+ def _peer_review_active(ai_cfg: dict[str, Any], args: argparse.Namespace) -> bool:
521
+ """Return True when peer-review should fire for this invocation.
522
+
523
+ Resolution chain (highest priority first):
524
+ 1. ``--peer-review`` CLI flag — explicit opt-in.
525
+ 2. ``ai_council.peer_review.enabled: true`` in
526
+ ``agents/.ai-council.yml`` — opt-in via config.
527
+ Both default to false; peer-review is opt-in by R2 verdict.
528
+ """
529
+ if getattr(args, "peer_review", False):
530
+ return True
531
+ pr_cfg = ai_cfg.get("peer_review") or {}
532
+ return bool(pr_cfg.get("enabled"))
533
+
534
+
535
+ def _peer_review_cost_delta(
536
+ ai_cfg: dict[str, Any],
537
+ args: argparse.Namespace,
538
+ estimates: list[Any],
539
+ n_billable: int,
540
+ ) -> tuple[int, float]:
541
+ """Return ``(extra_calls, extra_usd)`` for the peer-review round.
542
+
543
+ One extra call per billable member (each reviews the others). The
544
+ worst-case cost uses the base per-member estimate as a ceiling —
545
+ same heuristic as ``_consensus_cost_delta``.
546
+ """
547
+ if not _peer_review_active(ai_cfg, args):
548
+ return 0, 0.0
549
+ if n_billable < 2:
550
+ # Need ≥ 2 distinct deliberation outputs for peer-review to
551
+ # have anything to review. The orchestrator no-ops below 2.
552
+ return 0, 0.0
553
+ extra_calls = n_billable
554
+ extra_usd = sum(e.total_usd for e in estimates)
555
+ return extra_calls, extra_usd
556
+
557
+
558
+ def _maybe_run_peer_review(
559
+ ai_cfg: dict[str, Any],
560
+ args: argparse.Namespace,
561
+ question: CouncilQuestion,
562
+ members: list[ExternalAIClient],
563
+ responses: list[CouncilResponse],
564
+ budget: CostBudget,
565
+ table: PriceTable,
566
+ project: Any,
567
+ *,
568
+ persona_labels: dict[str, str] | None = None,
569
+ ) -> PeerReviewResult | None:
570
+ """Run the peer-review pass when opted in.
571
+
572
+ No-ops if fewer than 2 successful deliberation responses exist —
573
+ the orchestrator surfaces the empty result in that case.
574
+
575
+ ``persona_labels`` (Phase 6) flows through to ``anonymize_responses``
576
+ so advisor-mode runs render as ``Response A (Contrarian)`` instead
577
+ of bare ``Response A``. Plain-member runs pass ``None``.
578
+ """
579
+ if not _peer_review_active(ai_cfg, args):
580
+ return None
581
+ result = run_peer_review(
582
+ members, responses,
583
+ budget=budget, table=table, project=project,
584
+ original_ask=args.original_ask,
585
+ max_tokens=question.max_tokens,
586
+ persona_labels=persona_labels,
587
+ )
588
+ if not result.responses:
589
+ return None
590
+ return result
591
+
592
+
593
+ def _serialise_peer_review(peer_review: PeerReviewResult) -> dict[str, Any]:
594
+ """Project PeerReviewResult onto a JSON-safe dict for session payloads."""
595
+ return {
596
+ "responses": _serialise_responses(peer_review.responses),
597
+ "label_to_source": dict(peer_review.label_to_source),
598
+ "persona_labels": dict(peer_review.persona_labels),
599
+ }
600
+
601
+
602
+ def _deserialise_peer_review(
603
+ data: dict[str, Any] | None,
604
+ ) -> PeerReviewResult | None:
605
+ """Reconstruct a PeerReviewResult from a session payload section.
606
+
607
+ Returns ``None`` for payloads predating Phase 5 or runs where the
608
+ flag was not passed.
609
+ """
610
+ if not data:
611
+ return None
612
+ return PeerReviewResult(
613
+ responses=_deserialise_responses(data.get("responses") or []),
614
+ label_to_source=dict(data.get("label_to_source") or {}),
615
+ persona_labels=dict(data.get("persona_labels") or {}),
616
+ )
617
+
618
+
213
619
  # ── subcommands ─────────────────────────────────────────────────────
214
620
 
215
621
 
@@ -236,6 +642,32 @@ def _resolve_rounds(args: argparse.Namespace, ai_cfg: dict[str, Any]) -> int:
236
642
  return min_rounds
237
643
 
238
644
 
645
+ def _resolve_max_tokens(args: argparse.Namespace, ai_cfg: dict[str, Any]) -> int:
646
+ """Resolve the per-call output budget passed to each member.
647
+
648
+ Resolution chain (highest priority first):
649
+ 1. ``--max-tokens N`` — explicit invocation override.
650
+ 2. ``ai_council.max_output_tokens`` — settings value (project file
651
+ is authoritative; this key is not user-global-mergeable).
652
+ 3. ``DEFAULT_MAX_TOKENS`` — package fallback (2048).
653
+
654
+ A value of ``0`` at any layer means "unlimited"; it is widened to
655
+ ``UNLIMITED_TOKENS_FALLBACK`` before reaching the SDK because
656
+ Anthropic rejects ``max_tokens=0``. Estimation uses the same expanded
657
+ value so the cost preview reflects the worst-case ceiling.
658
+ """
659
+ cli = getattr(args, "max_tokens", None)
660
+ if cli is not None:
661
+ value = int(cli)
662
+ elif "max_output_tokens" in ai_cfg:
663
+ value = int(ai_cfg.get("max_output_tokens") or 0)
664
+ else:
665
+ value = DEFAULT_MAX_TOKENS
666
+ if value <= 0:
667
+ return UNLIMITED_TOKENS_FALLBACK
668
+ return value
669
+
670
+
239
671
  def cmd_estimate(
240
672
  args: argparse.Namespace,
241
673
  *,
@@ -246,28 +678,112 @@ def cmd_estimate(
246
678
  """Print per-member cost preview. No API calls."""
247
679
  if settings is None:
248
680
  settings = load_settings()
681
+ ai_cfg = (settings.get("ai_council") or {}) if isinstance(settings, dict) else {}
682
+ advisor_plans = _build_advisor_plans(ai_cfg, REPO_ROOT)
683
+ explicit_overrides = _parse_model_overrides(getattr(args, "model", None))
249
684
  if members is None:
250
685
  members = build_members(
251
686
  settings,
252
687
  invocation_mode=args.mode_override,
253
- model_overrides=_parse_model_overrides(getattr(args, "model", None)),
688
+ model_overrides=_advisor_model_overrides(
689
+ advisor_plans, explicit_overrides,
690
+ ),
254
691
  siblings_overrides=_parse_siblings_overrides(getattr(args, "siblings", None)),
255
692
  )
256
693
  if table is None:
257
694
  table = load_prices()
258
695
  question, _ = build_question(
259
696
  input_path=Path(args.question), input_mode=args.input_mode,
260
- max_tokens=args.max_tokens,
697
+ max_tokens=_resolve_max_tokens(args, ai_cfg),
698
+ prompt_mode_override=getattr(args, "prompt_mode", None),
261
699
  )
262
700
  project = detect_project_context(REPO_ROOT)
263
701
  billable = [m for m in members if getattr(m, "billable", True)]
264
702
  estimates = estimate(question, billable, table,
265
- project=project, original_ask=args.original_ask)
703
+ project=project, original_ask=args.original_ask,
704
+ advisor_plans=advisor_plans)
705
+ if getattr(args, "debate", False):
706
+ return _emit_debate_estimate(
707
+ args, ai_cfg, members, billable, estimates, advisor_plans,
708
+ )
709
+ extra_calls, extra_usd = _consensus_cost_delta(
710
+ ai_cfg, question.mode, estimates, len(billable),
711
+ )
712
+ pr_extra_calls, pr_extra_usd = _peer_review_cost_delta(
713
+ ai_cfg, args, estimates, len(billable),
714
+ )
266
715
  sys.stdout.write(
267
716
  f"council:estimate · mode={question.mode} · members={len(members)} "
268
717
  f"(billable={len(billable)})\n"
269
718
  )
270
- sys.stdout.write(format_estimate_table(billable, estimates) + "\n")
719
+ advisor_summary = _format_advisor_summary(advisor_plans, billable)
720
+ if advisor_summary:
721
+ sys.stdout.write(advisor_summary + "\n")
722
+ sys.stdout.write(
723
+ format_estimate_table(
724
+ billable, estimates,
725
+ consensus_delta_usd=extra_usd,
726
+ consensus_extra_calls=extra_calls,
727
+ peer_review_delta_usd=pr_extra_usd,
728
+ peer_review_extra_calls=pr_extra_calls,
729
+ ) + "\n"
730
+ )
731
+ return 0
732
+
733
+
734
+ def _emit_debate_estimate(
735
+ args: argparse.Namespace,
736
+ ai_cfg: dict[str, Any],
737
+ members: list[ExternalAIClient],
738
+ billable: list[ExternalAIClient],
739
+ estimates: list[Any],
740
+ advisor_plans: Any,
741
+ ) -> int:
742
+ """Render the round-by-round debate cost projection.
743
+
744
+ Upper bound only — progressive disclosure may stop the debate early.
745
+ Cost shape mirrors ``cmd_debate``: one call per billable member per
746
+ round, default ``ai_council.min_rounds`` (typically 2), capped at
747
+ ``ai_council.debate_max_rounds`` (typically 4).
748
+ """
749
+ min_rounds = int(ai_cfg.get("min_rounds", 2))
750
+ max_rounds_cap = int(ai_cfg.get("debate_max_rounds", 4))
751
+ requested = (
752
+ int(args.rounds) if getattr(args, "rounds", None) is not None
753
+ else min_rounds
754
+ )
755
+ if requested < 1:
756
+ raise argparse.ArgumentTypeError(
757
+ f"--rounds must be >= 1 (got {requested})"
758
+ )
759
+ if requested > max_rounds_cap:
760
+ raise argparse.ArgumentTypeError(
761
+ f"--rounds={requested} exceeds debate_max_rounds={max_rounds_cap}; "
762
+ f"raise the cap in agents/.ai-council.yml or lower --rounds."
763
+ )
764
+ rounds = requested
765
+ per_round_usd = sum(e.total_usd for e in estimates)
766
+ projected_total = per_round_usd * rounds
767
+ sys.stdout.write(
768
+ f"council:estimate · mode=debate · members={len(members)} "
769
+ f"(billable={len(billable)}) · rounds={rounds} "
770
+ f"(cap={max_rounds_cap})\n"
771
+ )
772
+ advisor_summary = _format_advisor_summary(advisor_plans, billable)
773
+ if advisor_summary:
774
+ sys.stdout.write(advisor_summary + "\n")
775
+ for round_idx in range(1, rounds + 1):
776
+ sys.stdout.write(f"\nRound {round_idx} of {rounds}:\n")
777
+ sys.stdout.write(format_estimate_table(billable, estimates) + "\n")
778
+ if round_idx < rounds:
779
+ sys.stdout.write(" " + "─" * 40 + "\n")
780
+ sys.stdout.write(
781
+ f"\n PROJECTED TOTAL ({rounds} rounds): ${projected_total:.4f}\n"
782
+ )
783
+ sys.stdout.write(
784
+ " Note: progressive disclosure may stop the debate early; "
785
+ "this is an upper bound.\n"
786
+ )
271
787
  return 0
272
788
 
273
789
 
@@ -297,6 +813,44 @@ def _deserialise_responses(items: list[dict[str, Any]]) -> list[CouncilResponse]
297
813
  return out
298
814
 
299
815
 
816
+ def _deserialise_consensus(data: dict[str, Any] | None) -> ConsensusResult | None:
817
+ """Reconstruct a ConsensusResult from a serialised payload section.
818
+
819
+ Used by ``cmd_render`` to re-render saved sessions that captured a
820
+ consensus round. Returns ``None`` when the payload predates Phase 4
821
+ or the round was skipped for the lens.
822
+ """
823
+ if not data:
824
+ return None
825
+ from scripts.ai_council.consensus import (
826
+ ConsensusMetadata, Finding, FindingScore,
827
+ aggregate_scores, bucket_by_threshold,
828
+ )
829
+ findings = [
830
+ Finding(id=f["id"], source=f["source"], text=f["text"])
831
+ for f in (data.get("findings") or [])
832
+ ]
833
+ scores = [
834
+ FindingScore(
835
+ finding_id=s["finding_id"], scorer=s["scorer"],
836
+ score=int(s["score"]), agree=bool(s["agree"]),
837
+ reason=s.get("reason", ""),
838
+ )
839
+ for s in (data.get("scores") or [])
840
+ ]
841
+ metadata = aggregate_scores(findings, scores)
842
+ bucket = bucket_by_threshold(findings, metadata)
843
+ return ConsensusResult(
844
+ bucket=bucket, findings=findings, scores=scores, metadata=metadata,
845
+ extraction_responses=_deserialise_responses(
846
+ data.get("extraction_responses") or [],
847
+ ),
848
+ scoring_responses=_deserialise_responses(
849
+ data.get("scoring_responses") or [],
850
+ ),
851
+ )
852
+
853
+
300
854
  def cmd_run(
301
855
  args: argparse.Namespace,
302
856
  *,
@@ -307,28 +861,52 @@ def cmd_run(
307
861
  """Estimate, then run the council. Requires --confirm to spend."""
308
862
  if settings is None:
309
863
  settings = load_settings()
864
+ ai_cfg = (settings.get("ai_council") or {}) if isinstance(settings, dict) else {}
865
+ advisor_plans = _build_advisor_plans(ai_cfg, REPO_ROOT)
866
+ explicit_overrides = _parse_model_overrides(getattr(args, "model", None))
310
867
  if members is None:
311
868
  members = build_members(
312
869
  settings,
313
870
  invocation_mode=args.mode_override,
314
- model_overrides=_parse_model_overrides(getattr(args, "model", None)),
871
+ model_overrides=_advisor_model_overrides(
872
+ advisor_plans, explicit_overrides,
873
+ ),
315
874
  siblings_overrides=_parse_siblings_overrides(getattr(args, "siblings", None)),
316
875
  )
317
876
  if table is None:
318
877
  table = load_prices()
319
878
  question, artefact = build_question(
320
879
  input_path=Path(args.question), input_mode=args.input_mode,
321
- max_tokens=args.max_tokens,
880
+ max_tokens=_resolve_max_tokens(args, ai_cfg),
881
+ prompt_mode_override=getattr(args, "prompt_mode", None),
322
882
  )
323
883
  project = detect_project_context(REPO_ROOT)
324
884
  billable = [m for m in members if getattr(m, "billable", True)]
325
885
  estimates = estimate(question, billable, table,
326
- project=project, original_ask=args.original_ask)
886
+ project=project, original_ask=args.original_ask,
887
+ advisor_plans=advisor_plans)
888
+ extra_calls, extra_usd = _consensus_cost_delta(
889
+ ai_cfg, question.mode, estimates, len(billable),
890
+ )
891
+ pr_extra_calls, pr_extra_usd = _peer_review_cost_delta(
892
+ ai_cfg, args, estimates, len(billable),
893
+ )
327
894
  sys.stdout.write(
328
895
  f"council:run · mode={question.mode} · members={len(members)} "
329
896
  f"(billable={len(billable)})\n"
330
897
  )
331
- sys.stdout.write(format_estimate_table(billable, estimates) + "\n")
898
+ advisor_summary = _format_advisor_summary(advisor_plans, billable)
899
+ if advisor_summary:
900
+ sys.stdout.write(advisor_summary + "\n")
901
+ sys.stdout.write(
902
+ format_estimate_table(
903
+ billable, estimates,
904
+ consensus_delta_usd=extra_usd,
905
+ consensus_extra_calls=extra_calls,
906
+ peer_review_delta_usd=pr_extra_usd,
907
+ peer_review_extra_calls=pr_extra_calls,
908
+ ) + "\n"
909
+ )
332
910
 
333
911
  if not args.confirm:
334
912
  sys.stdout.write(
@@ -337,7 +915,6 @@ def cmd_run(
337
915
  )
338
916
  return 0
339
917
 
340
- ai_cfg = settings.get("ai_council") or {}
341
918
  cost_cfg = ai_cfg.get("cost_budget") or {}
342
919
  budget = CostBudget(
343
920
  max_input_tokens=int(cost_cfg.get("max_input_tokens", 50_000)),
@@ -350,10 +927,28 @@ def cmd_run(
350
927
  members, question, budget,
351
928
  table=table, project=project,
352
929
  original_ask=args.original_ask, rounds=rounds,
930
+ advisor_plans=advisor_plans,
931
+ )
932
+ # Pipeline order (R4 verdict): deliberation → peer-review → consensus
933
+ # → synthesis. Peer-review anonymises only deliberation outputs;
934
+ # consensus-scoring runs on the de-anonymised findings.
935
+ persona_labels = build_persona_labels(advisor_plans, billable)
936
+ peer_review = _maybe_run_peer_review(
937
+ ai_cfg, args, question, members, responses, budget, table, project,
938
+ persona_labels=persona_labels,
939
+ )
940
+ consensus = _maybe_run_consensus(
941
+ ai_cfg, question, members, responses, budget, table, project, args,
353
942
  )
354
943
  estimated_total = sum(e.total_usd for e in estimates)
355
944
  actual_total = 0.0
356
- for r in responses:
945
+ all_responses: list[CouncilResponse] = list(responses)
946
+ if peer_review is not None:
947
+ all_responses.extend(peer_review.responses)
948
+ if consensus is not None:
949
+ all_responses.extend(consensus.extraction_responses)
950
+ all_responses.extend(consensus.scoring_responses)
951
+ for r in all_responses:
357
952
  if r.error:
358
953
  continue
359
954
  ce = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
@@ -361,6 +956,9 @@ def cmd_run(
361
956
  payload = {
362
957
  "schema_version": SCHEMA_VERSION,
363
958
  "mode": question.mode,
959
+ "prompt_mode": getattr(args, "prompt_mode", None),
960
+ "prose_synthesis": getattr(args, "prose_synthesis", None),
961
+ "peer_review_enabled": _peer_review_active(ai_cfg, args),
364
962
  "artefact": artefact,
365
963
  "original_ask": args.original_ask,
366
964
  "members": [f"{m.name}/{m.model}" for m in members],
@@ -369,6 +967,10 @@ def cmd_run(
369
967
  "cost_usd_actual": round(actual_total, 6),
370
968
  "responses": _serialise_responses(responses),
371
969
  }
970
+ if peer_review is not None:
971
+ payload["peer_review"] = _serialise_peer_review(peer_review)
972
+ if consensus is not None:
973
+ payload["consensus"] = _serialise_consensus(consensus)
372
974
  out_path = Path(args.output)
373
975
  out_path.parent.mkdir(parents=True, exist_ok=True)
374
976
  out_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
@@ -380,11 +982,308 @@ def cmd_run(
380
982
  return 1 if errors and len(errors) == len(responses) else 0
381
983
 
382
984
 
985
+ def _debate_round_filename(round_number: int) -> str:
986
+ return f"debate-round-{round_number}.json"
987
+
988
+
989
+ def _write_debate_round(
990
+ out_dir: Path,
991
+ round_number: int,
992
+ responses: list[CouncilResponse],
993
+ *,
994
+ question: CouncilQuestion,
995
+ members: list[ExternalAIClient],
996
+ artefact: str,
997
+ original_ask: str,
998
+ total_planned_rounds: int,
999
+ table: PriceTable,
1000
+ prompt_mode: str | None,
1001
+ prose_synthesis: bool | None,
1002
+ ) -> Path:
1003
+ """Persist a single debate round as a self-contained JSON.
1004
+
1005
+ Each round file mirrors the ``cmd_run`` payload shape — re-rendering
1006
+ via ``council render <debate-round-N.json>`` works without special
1007
+ handling. Round-specific keys (``debate_round``, ``debate_total_rounds``)
1008
+ are additive so the renderer can ignore them safely.
1009
+ """
1010
+ out_dir.mkdir(parents=True, exist_ok=True)
1011
+ actual_total = 0.0
1012
+ for r in responses:
1013
+ if r.error:
1014
+ continue
1015
+ ce = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
1016
+ actual_total += ce.total_usd
1017
+ payload = {
1018
+ "schema_version": SCHEMA_VERSION,
1019
+ "mode": question.mode,
1020
+ "prompt_mode": prompt_mode,
1021
+ "prose_synthesis": prose_synthesis,
1022
+ "artefact": artefact,
1023
+ "original_ask": original_ask,
1024
+ "members": [f"{m.name}/{m.model}" for m in members],
1025
+ "debate_round": round_number,
1026
+ "debate_total_rounds": total_planned_rounds,
1027
+ "rounds": 1,
1028
+ "cost_usd_actual": round(actual_total, 6),
1029
+ "responses": _serialise_responses(responses),
1030
+ }
1031
+ out_path = out_dir / _debate_round_filename(round_number)
1032
+ out_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
1033
+ return out_path
1034
+
1035
+
1036
+ def _load_debate_seed(
1037
+ path: Path,
1038
+ expected_members: list[ExternalAIClient],
1039
+ ) -> list[CouncilResponse]:
1040
+ """Load `--continue-as-debate` seed: round-1 responses from a prior session.
1041
+
1042
+ The seed file must be the JSON written by ``cmd_run`` (or a prior
1043
+ debate round). Members + models must match the current invocation —
1044
+ a mismatch is a hard error per the Phase 7 contract, not a silent
1045
+ fallback. The host agent surfaces the mismatch and asks the user
1046
+ to either re-run with matching members or drop ``--continue-as-debate``.
1047
+ """
1048
+ if not path.exists():
1049
+ raise FileNotFoundError(
1050
+ f"--continue-as-debate path not found: {path}"
1051
+ )
1052
+ payload = json.loads(path.read_text(encoding="utf-8"))
1053
+ source_members = list(payload.get("members") or [])
1054
+ expected_labels = [f"{m.name}/{m.model}" for m in expected_members]
1055
+ if source_members != expected_labels:
1056
+ raise CouncilDisabledError(
1057
+ f"--continue-as-debate member mismatch: source session has "
1058
+ f"{source_members!r}, current invocation has {expected_labels!r}. "
1059
+ f"Re-run with matching members or drop --continue-as-debate."
1060
+ )
1061
+ return _deserialise_responses(payload.get("responses") or [])
1062
+
1063
+
1064
+ def _make_debate_continue_prompt(
1065
+ *, auto_continue: bool,
1066
+ stream: Any = None,
1067
+ ) -> Any:
1068
+ """Build the on_continue callback for `run_debate()`.
1069
+
1070
+ ``--auto-continue`` returns ``None`` so the orchestrator skips the
1071
+ gate entirely (still subject to the hard-cap check). Interactive
1072
+ mode prints the checkpoint line and reads y/N from stdin.
1073
+ """
1074
+ if auto_continue:
1075
+ return None
1076
+ out = stream or sys.stdout
1077
+
1078
+ def _prompt(checkpoint: DebateCheckpoint) -> bool:
1079
+ out.write(
1080
+ f"\ndebate:checkpoint round={checkpoint.completed_round}/"
1081
+ f"{checkpoint.total_planned_rounds} "
1082
+ f"cost_so_far=${checkpoint.cost_so_far_usd:.4f} "
1083
+ f"next_round_estimate=${checkpoint.next_round_estimate_usd:.4f} "
1084
+ f"— continue? [y/N]: "
1085
+ )
1086
+ out.flush()
1087
+ try:
1088
+ answer = sys.stdin.readline().strip().lower()
1089
+ except (EOFError, KeyboardInterrupt):
1090
+ return False
1091
+ return answer in {"y", "yes"}
1092
+
1093
+ return _prompt
1094
+
1095
+
1096
+ def cmd_debate(
1097
+ args: argparse.Namespace,
1098
+ *,
1099
+ settings: dict[str, Any] | None = None,
1100
+ members: list[ExternalAIClient] | None = None,
1101
+ table: PriceTable | None = None,
1102
+ ) -> int:
1103
+ """Run a multi-round debate with progressive cost disclosure.
1104
+
1105
+ Phase 7 contract: each member produces an initial position in
1106
+ Round 1, then rebuts the strongest opposing position in subsequent
1107
+ rounds. The orchestrator pauses after each round and asks the user
1108
+ to continue (``--auto-continue`` bypasses the prompt). Round files
1109
+ are persisted incrementally so an interrupted debate leaves a
1110
+ recoverable trail.
1111
+ """
1112
+ if settings is None:
1113
+ settings = load_settings()
1114
+ ai_cfg = (settings.get("ai_council") or {}) if isinstance(settings, dict) else {}
1115
+ advisor_plans = _build_advisor_plans(ai_cfg, REPO_ROOT)
1116
+ explicit_overrides = _parse_model_overrides(getattr(args, "model", None))
1117
+ if members is None:
1118
+ members = build_members(
1119
+ settings,
1120
+ invocation_mode=args.mode_override,
1121
+ model_overrides=_advisor_model_overrides(
1122
+ advisor_plans, explicit_overrides,
1123
+ ),
1124
+ siblings_overrides=_parse_siblings_overrides(
1125
+ getattr(args, "siblings", None),
1126
+ ),
1127
+ )
1128
+ if table is None:
1129
+ table = load_prices()
1130
+ question, artefact = build_question(
1131
+ input_path=Path(args.question), input_mode=args.input_mode,
1132
+ max_tokens=_resolve_max_tokens(args, ai_cfg),
1133
+ prompt_mode_override="debate",
1134
+ )
1135
+ project = detect_project_context(REPO_ROOT)
1136
+ billable = [m for m in members if getattr(m, "billable", True)]
1137
+
1138
+ # Resolve round count: explicit --rounds wins; otherwise default 2.
1139
+ # Hard ceiling: ai_council.debate_max_rounds (Phase 0 reserved key).
1140
+ max_rounds_cap = int(ai_cfg.get("debate_max_rounds", 4))
1141
+ requested = (
1142
+ int(args.rounds) if getattr(args, "rounds", None) is not None else 2
1143
+ )
1144
+ if requested < 1:
1145
+ raise argparse.ArgumentTypeError(
1146
+ f"--rounds must be >= 1 (got {requested})"
1147
+ )
1148
+ if requested > max_rounds_cap:
1149
+ raise argparse.ArgumentTypeError(
1150
+ f"--rounds={requested} exceeds debate_max_rounds={max_rounds_cap}; "
1151
+ f"raise the cap in agents/.ai-council.yml or lower --rounds."
1152
+ )
1153
+ rounds = requested
1154
+
1155
+ estimates = estimate(
1156
+ question, billable, table,
1157
+ project=project, original_ask=args.original_ask,
1158
+ advisor_plans=advisor_plans,
1159
+ )
1160
+ per_round_usd = sum(e.total_usd for e in estimates)
1161
+ projected_total = per_round_usd * rounds
1162
+ sys.stdout.write(
1163
+ f"council:debate · members={len(members)} (billable={len(billable)}) "
1164
+ f"· rounds={rounds} (cap={max_rounds_cap})\n"
1165
+ )
1166
+ advisor_summary = _format_advisor_summary(advisor_plans, billable)
1167
+ if advisor_summary:
1168
+ sys.stdout.write(advisor_summary + "\n")
1169
+ sys.stdout.write(
1170
+ format_estimate_table(billable, estimates) + "\n"
1171
+ )
1172
+ sys.stdout.write(
1173
+ f" × {rounds} rounds (worst case, before progressive disclosure)\n"
1174
+ f" PROJECTED TOTAL: ${projected_total:.4f}\n"
1175
+ )
1176
+
1177
+ if not args.confirm:
1178
+ sys.stdout.write(
1179
+ "\nNo --confirm flag — estimate only. Re-run with --confirm to "
1180
+ "start the debate.\n"
1181
+ )
1182
+ return 0
1183
+
1184
+ cost_cfg = ai_cfg.get("cost_budget") or {}
1185
+ budget = CostBudget(
1186
+ max_input_tokens=int(cost_cfg.get("max_input_tokens", 50_000)),
1187
+ max_output_tokens=int(cost_cfg.get("max_output_tokens", 20_000)),
1188
+ max_calls=int(cost_cfg.get("max_calls", 10)),
1189
+ max_total_usd=float(cost_cfg.get("max_total_usd", 0.0) or 0.0),
1190
+ )
1191
+
1192
+ out_dir = Path(args.output)
1193
+ seed: list[CouncilResponse] | None = None
1194
+ if getattr(args, "continue_as_debate", None):
1195
+ seed = _load_debate_seed(Path(args.continue_as_debate), billable)
1196
+ sys.stdout.write(
1197
+ f"council:debate · seeding round 1 from "
1198
+ f"{args.continue_as_debate} ({len(seed)} responses)\n"
1199
+ )
1200
+
1201
+ written: list[Path] = []
1202
+
1203
+ def _on_round_complete(round_number: int, results: list[CouncilResponse]) -> None:
1204
+ path = _write_debate_round(
1205
+ out_dir, round_number, results,
1206
+ question=question, members=members,
1207
+ artefact=artefact, original_ask=args.original_ask,
1208
+ total_planned_rounds=rounds, table=table,
1209
+ prompt_mode="debate",
1210
+ prose_synthesis=getattr(args, "prose_synthesis", None),
1211
+ )
1212
+ written.append(path)
1213
+ errors = [r for r in results if r.error]
1214
+ sys.stdout.write(
1215
+ f"council:debate · wrote {path} "
1216
+ f"({len(results) - len(errors)}/{len(results)} ok)\n"
1217
+ )
1218
+
1219
+ on_continue = _make_debate_continue_prompt(
1220
+ auto_continue=bool(getattr(args, "auto_continue", False)),
1221
+ )
1222
+
1223
+ try:
1224
+ all_rounds = run_debate(
1225
+ members, question,
1226
+ budget=budget, table=table, project=project,
1227
+ original_ask=args.original_ask,
1228
+ max_rounds=rounds,
1229
+ on_round_complete=_on_round_complete,
1230
+ on_continue=on_continue,
1231
+ advisor_plans=advisor_plans,
1232
+ seed_round_1=seed,
1233
+ )
1234
+ except DebateCapExceeded as exc:
1235
+ sys.stderr.write(
1236
+ f"❌ council:debate cap reached after round {exc.completed_round}: "
1237
+ f"{exc}\n"
1238
+ f"Partial debate persisted under {out_dir} "
1239
+ f"({len(written)} rounds).\n"
1240
+ )
1241
+ return 3
1242
+
1243
+ actual_total = 0.0
1244
+ for rnd in all_rounds:
1245
+ for r in rnd:
1246
+ if r.error:
1247
+ continue
1248
+ ce = estimate_cost(
1249
+ r.provider, r.model, r.input_tokens, r.output_tokens, table,
1250
+ )
1251
+ actual_total += ce.total_usd
1252
+ sys.stdout.write(
1253
+ f"\ncouncil:debate · {len(all_rounds)} round(s) complete · "
1254
+ f"actual ${actual_total:.4f} (cap projection ${projected_total:.4f})\n"
1255
+ )
1256
+ errors_last = [r for r in all_rounds[-1] if r.error] if all_rounds else []
1257
+ return 1 if errors_last and len(errors_last) == len(all_rounds[-1]) else 0
1258
+
1259
+
383
1260
  def cmd_render(args: argparse.Namespace) -> int:
384
- """Re-render a saved responses JSON to the markdown report."""
1261
+ """Re-render a saved responses JSON to the markdown report.
1262
+
1263
+ Lens resolution order: explicit ``--prompt-mode`` > ``prompt_mode``
1264
+ in the payload > ``mode`` in the payload > ``None`` (default decision
1265
+ template). R4 Q4 escape hatch ``--prose-synthesis`` overrides the
1266
+ table.
1267
+ """
385
1268
  payload = json.loads(Path(args.responses).read_text(encoding="utf-8"))
386
1269
  items = payload.get("responses") or []
387
- sys.stdout.write(render(_deserialise_responses(items)) + "\n")
1270
+ explicit = getattr(args, "prompt_mode", None)
1271
+ mode = explicit or payload.get("prompt_mode") or payload.get("mode")
1272
+ prose = getattr(args, "prose_synthesis", None)
1273
+ if prose is None:
1274
+ prose = payload.get("prose_synthesis")
1275
+ consensus = _deserialise_consensus(payload.get("consensus"))
1276
+ peer_review = _deserialise_peer_review(payload.get("peer_review"))
1277
+ sys.stdout.write(
1278
+ render(
1279
+ _deserialise_responses(items),
1280
+ mode=mode,
1281
+ prose_synthesis=prose,
1282
+ consensus=consensus,
1283
+ peer_review=peer_review,
1284
+ )
1285
+ + "\n"
1286
+ )
388
1287
  return 0
389
1288
 
390
1289
 
@@ -451,8 +1350,20 @@ def _add_common_input_args(p: argparse.ArgumentParser) -> None:
451
1350
  p.add_argument("--input-mode", choices=["prompt", "roadmap"],
452
1351
  default="prompt",
453
1352
  help="How to bundle the file (default: prompt).")
454
- p.add_argument("--max-tokens", type=int, default=1024,
455
- help="Per-member output budget (default: 1024).")
1353
+ p.add_argument("--prompt-mode",
1354
+ choices=["pr", "design", "optimize", "analysis"],
1355
+ default=None, dest="prompt_mode",
1356
+ help="Lens-override for the system-prompt addendum. "
1357
+ "The bundle shape stays as --input-mode; only "
1358
+ "the per-mode neutrality addendum is swapped "
1359
+ "(see scripts/ai_council/prompts.py _MODE_TABLE). "
1360
+ "Routed by the /council pr|design|optimize|"
1361
+ "analysis wrappers.")
1362
+ p.add_argument("--max-tokens", type=int, default=None,
1363
+ help="Per-member output budget. Default reads "
1364
+ "ai_council.max_output_tokens from .agent-settings.yml "
1365
+ "(2048 if unset). 0 = unlimited (widened to the safe "
1366
+ "provider ceiling before the SDK call).")
456
1367
  p.add_argument("--mode-override", choices=["api", "manual"], default=None,
457
1368
  help="Override every member's transport mode.")
458
1369
  p.add_argument("--model", action="append", default=None, dest="model",
@@ -474,6 +1385,14 @@ def _add_common_input_args(p: argparse.ArgumentParser) -> None:
474
1385
  "skill.")
475
1386
  p.add_argument("--original-ask", default="",
476
1387
  help="The user's framing sentence (flows into handoff).")
1388
+ p.add_argument("--peer-review", dest="peer_review", action="store_true",
1389
+ default=False,
1390
+ help="Run an anonymous peer-review pass after the main "
1391
+ "deliberation. Each member critiques the others' "
1392
+ "(anonymised) responses for blind spots before "
1393
+ "synthesis. Adds N extra API calls. Opt-in per the "
1394
+ "R2 verdict; also accepts ai_council.peer_review."
1395
+ "enabled: true in agents/.ai-council.yml.")
477
1396
 
478
1397
 
479
1398
  def build_parser() -> argparse.ArgumentParser:
@@ -485,6 +1404,15 @@ def build_parser() -> argparse.ArgumentParser:
485
1404
 
486
1405
  p_est = sub.add_parser("estimate", help="Pre-call cost preview (no spend).")
487
1406
  _add_common_input_args(p_est)
1407
+ p_est.add_argument("--debate", action="store_true", default=False,
1408
+ help="Render the round-by-round projection for a "
1409
+ "debate run (one call per member per round). "
1410
+ "Progressive disclosure may stop the debate "
1411
+ "early — this is an upper bound.")
1412
+ p_est.add_argument("--rounds", type=int, default=None,
1413
+ help="Debate round count for --debate. Defaults to "
1414
+ "ai_council.min_rounds (typically 2); capped "
1415
+ "at ai_council.debate_max_rounds (typically 4).")
488
1416
 
489
1417
  p_run = sub.add_parser("run", help="Run the council; --confirm required to spend.")
490
1418
  _add_common_input_args(p_run)
@@ -503,14 +1431,59 @@ def build_parser() -> argparse.ArgumentParser:
503
1431
  "artefacts. Set by the host agent when the consuming "
504
1432
  "rule/skill/command declares council_depth: deep. "
505
1433
  "Overridden by explicit --rounds.")
1434
+ _add_prose_synthesis_arg(p_run)
1435
+
1436
+ p_deb = sub.add_parser(
1437
+ "debate",
1438
+ help="Multi-round debate with progressive cost disclosure (Phase 7).",
1439
+ )
1440
+ _add_common_input_args(p_deb)
1441
+ p_deb.add_argument("--output", required=True,
1442
+ help="Directory to write debate-round-N.json files.")
1443
+ p_deb.add_argument("--confirm", action="store_true",
1444
+ help="Required to actually start the debate.")
1445
+ p_deb.add_argument("--rounds", type=int, default=None,
1446
+ help="Number of debate rounds (default 2). Capped by "
1447
+ "ai_council.debate_max_rounds in agents/.ai-council.yml.")
1448
+ p_deb.add_argument("--auto-continue", action="store_true",
1449
+ default=False, dest="auto_continue",
1450
+ help="Skip the between-round y/N prompt. The hard cap "
1451
+ "against cost_budget.max_total_usd still applies.")
1452
+ p_deb.add_argument("--continue-as-debate", default=None,
1453
+ dest="continue_as_debate", metavar="PATH",
1454
+ help="Seed round 1 from an existing council session "
1455
+ "JSON. Members + models must match the current "
1456
+ "invocation.")
1457
+ _add_prose_synthesis_arg(p_deb)
506
1458
 
507
1459
  p_ren = sub.add_parser("render", help="Re-render a saved responses JSON.")
508
1460
  p_ren.add_argument("responses",
509
1461
  help="Path to the JSON written by `council run`.")
1462
+ p_ren.add_argument("--prompt-mode",
1463
+ choices=["default", "pr", "design", "optimize", "analysis",
1464
+ "prompt", "roadmap", "diff", "files"],
1465
+ default=None, dest="prompt_mode",
1466
+ help="Override the synthesis-template lens. Defaults "
1467
+ "to the `mode` recorded in the responses JSON.")
1468
+ _add_prose_synthesis_arg(p_ren)
510
1469
 
511
1470
  return parser
512
1471
 
513
1472
 
1473
+ def _add_prose_synthesis_arg(p: argparse.ArgumentParser) -> None:
1474
+ """R4 Q4 escape hatch — toggle structured vs prose synthesis."""
1475
+ group = p.add_mutually_exclusive_group()
1476
+ group.add_argument("--prose-synthesis", dest="prose_synthesis",
1477
+ action="store_const", const=True, default=None,
1478
+ help="Force open-ended prose synthesis (bare slot) "
1479
+ "regardless of lens. R4 Q4 escape hatch.")
1480
+ group.add_argument("--no-prose-synthesis", dest="prose_synthesis",
1481
+ action="store_const", const=False,
1482
+ help="Force the structured default decision-lens "
1483
+ "template even on a creative lens "
1484
+ "(design / optimize). Symmetric escape hatch.")
1485
+
1486
+
514
1487
  def main(argv: list[str] | None = None) -> int:
515
1488
  args = build_parser().parse_args(argv)
516
1489
  try:
@@ -518,6 +1491,8 @@ def main(argv: list[str] | None = None) -> int:
518
1491
  return cmd_estimate(args)
519
1492
  if args.cmd == "run":
520
1493
  return cmd_run(args)
1494
+ if args.cmd == "debate":
1495
+ return cmd_debate(args)
521
1496
  if args.cmd == "render":
522
1497
  return cmd_render(args)
523
1498
  except CouncilDisabledError as exc: