@event4u/agent-config 2.12.0 → 2.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/.agent-src/commands/council/analysis.md +142 -0
  2. package/.agent-src/commands/council/debate.md +129 -0
  3. package/.agent-src/commands/council/default.md +8 -0
  4. package/.agent-src/commands/council/design.md +16 -12
  5. package/.agent-src/commands/council/optimize.md +16 -15
  6. package/.agent-src/commands/council/pr.md +12 -12
  7. package/.agent-src/commands/council.md +48 -2
  8. package/.agent-src/commands/memory/learn-low-impact.md +143 -0
  9. package/.agent-src/personas/advisors/contrarian.md +95 -0
  10. package/.agent-src/personas/advisors/executor.md +99 -0
  11. package/.agent-src/personas/advisors/expansionist.md +98 -0
  12. package/.agent-src/personas/advisors/first-principles.md +98 -0
  13. package/.agent-src/personas/advisors/outsider.md +102 -0
  14. package/.agent-src/rules/ask-when-uncertain.md +10 -6
  15. package/.agent-src/rules/copilot-routing.md +19 -0
  16. package/.agent-src/rules/devcontainer-routing.md +20 -0
  17. package/.agent-src/rules/external-reference-deep-dive.md +1 -1
  18. package/.agent-src/rules/fast-path-marker-visibility.md +38 -0
  19. package/.agent-src/rules/laravel-routing.md +20 -0
  20. package/.agent-src/rules/low-impact-corpus-privacy-floor.md +74 -0
  21. package/.agent-src/rules/symfony-routing.md +20 -0
  22. package/.agent-src/skills/ai-council/SKILL.md +388 -10
  23. package/.agent-src/skills/copilot-config/SKILL.md +1 -1
  24. package/.agent-src/skills/devcontainer/SKILL.md +1 -1
  25. package/.agent-src/skills/laravel/SKILL.md +1 -1
  26. package/.agent-src/skills/project-analysis-core/SKILL.md +1 -1
  27. package/.agent-src/skills/project-analyzer/SKILL.md +1 -1
  28. package/.agent-src/skills/symfony-workflow/SKILL.md +1 -1
  29. package/.agent-src/skills/universal-project-analysis/SKILL.md +1 -1
  30. package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
  31. package/.claude-plugin/marketplace.json +4 -1
  32. package/AGENTS.md +1 -1
  33. package/CHANGELOG.md +346 -124
  34. package/CONTRIBUTING.md +5 -0
  35. package/README.md +6 -6
  36. package/config/agent-settings.template.yml +5 -93
  37. package/config/gitignore-block.txt +6 -0
  38. package/docs/architecture/multi-tool-projection.md +53 -0
  39. package/docs/architecture/{compression.md → source-projection.md} +21 -3
  40. package/docs/architecture.md +15 -15
  41. package/docs/archive/CHANGELOG-pre-2.11.0.md +141 -0
  42. package/docs/catalog.md +25 -12
  43. package/docs/contracts/adr-architectural-consensus-mechanism.md +68 -0
  44. package/docs/contracts/adr-level-6-productization.md +7 -9
  45. package/docs/contracts/ai-council-config.md +658 -0
  46. package/docs/contracts/command-clusters.md +58 -2
  47. package/docs/contracts/command-surface-tiers.md +3 -2
  48. package/docs/contracts/cost-profile-defaults.md +5 -0
  49. package/docs/contracts/decision-engine-gates.md +5 -0
  50. package/docs/contracts/decision-trace-v1.md +2 -2
  51. package/docs/contracts/file-ownership-matrix.json +1735 -72
  52. package/docs/contracts/installed-tools-lockfile.md +2 -1
  53. package/docs/contracts/low-impact-corpus-format.md +95 -0
  54. package/docs/contracts/mcp-beta-criteria.md +6 -5
  55. package/docs/contracts/mcp-cloud-scope.md +5 -4
  56. package/docs/contracts/multi-tool-projection-fidelity.md +115 -0
  57. package/docs/contracts/release-trunk-sync.md +4 -3
  58. package/docs/contracts/tier-3-contrib-plugin.md +5 -6
  59. package/docs/getting-started.md +2 -2
  60. package/docs/guidelines/agent-infra/installed-tools-manifest.md +2 -1
  61. package/docs/installation.md +32 -0
  62. package/package.json +1 -1
  63. package/scripts/_archive/README.md +59 -0
  64. package/scripts/_cli/cmd_doctor.py +134 -0
  65. package/scripts/ai_council/_default_prices.py +10 -1
  66. package/scripts/ai_council/advisors.py +148 -0
  67. package/scripts/ai_council/airgap.py +165 -0
  68. package/scripts/ai_council/cli_hints.py +123 -0
  69. package/scripts/ai_council/clients.py +959 -5
  70. package/scripts/ai_council/compile_corpus.py +178 -0
  71. package/scripts/ai_council/confidence_gate.py +156 -0
  72. package/scripts/ai_council/config.py +1364 -0
  73. package/scripts/ai_council/consensus.py +329 -0
  74. package/scripts/ai_council/events_log.py +137 -0
  75. package/scripts/ai_council/learn_low_impact_preview.py +252 -0
  76. package/scripts/ai_council/low_impact.py +714 -0
  77. package/scripts/ai_council/low_impact_corpus.py +466 -0
  78. package/scripts/ai_council/low_impact_intake.py +163 -0
  79. package/scripts/ai_council/modes.py +6 -1
  80. package/scripts/ai_council/necessity.py +782 -0
  81. package/scripts/ai_council/orchestrator.py +872 -20
  82. package/scripts/ai_council/probation_gate.py +152 -0
  83. package/scripts/ai_council/prompts.py +335 -0
  84. package/scripts/ai_council/redact_low_impact_entry.py +155 -0
  85. package/scripts/ai_council/replay.py +155 -0
  86. package/scripts/ai_council/session.py +19 -1
  87. package/scripts/ai_council/shadow_dispatch.py +235 -0
  88. package/scripts/ai_council/solo_dispatch.py +226 -0
  89. package/scripts/audit_cloud_compatibility.py +74 -0
  90. package/scripts/audit_command_surface.py +363 -0
  91. package/scripts/check_compressed_paths.py +6 -1
  92. package/scripts/check_council_layout.py +11 -0
  93. package/scripts/ci_time_ratio.py +168 -0
  94. package/scripts/council_cli.py +2005 -30
  95. package/scripts/install.sh +12 -0
  96. package/scripts/measure_projection_bytes.py +159 -0
  97. package/scripts/measure_roadmap_trajectory.py +112 -0
  98. package/scripts/probe_projection_fidelity.py +202 -0
  99. package/scripts/score_skill_selection.py +198 -0
  100. package/scripts/skill_collision_clusters.py +162 -0
  101. /package/scripts/{_backfill_skill_domains.py → _archive/_backfill_skill_domains.py} +0 -0
  102. /package/scripts/{_bootstrap_tier_frontmatter.py → _archive/_bootstrap_tier_frontmatter.py} +0 -0
  103. /package/scripts/{_p43_bodies.py → _archive/_p43_bodies.py} +0 -0
  104. /package/scripts/{_p43_compress.py → _archive/_p43_compress.py} +0 -0
  105. /package/scripts/{_p4_migrate.py → _archive/_p4_migrate.py} +0 -0
  106. /package/scripts/{_phase2_shim_helper.py → _archive/_phase2_shim_helper.py} +0 -0
  107. /package/scripts/{_pilot_council_question.py → _archive/_pilot_council_question.py} +0 -0
@@ -20,7 +20,7 @@ CouncilResponse, never raise) is unchanged.
20
20
  from __future__ import annotations
21
21
 
22
22
  from dataclasses import dataclass
23
- from typing import Callable
23
+ from typing import Any, Callable
24
24
 
25
25
  from scripts.ai_council.budget_guard import (
26
26
  record_spend as _record_daily_spend,
@@ -32,6 +32,18 @@ from scripts.ai_council.clients import (
32
32
  CouncilResponse,
33
33
  ExternalAIClient,
34
34
  )
35
+ from scripts.ai_council.consensus import (
36
+ ConsensusBucket,
37
+ ConsensusMetadata,
38
+ Finding,
39
+ FindingScore,
40
+ aggregate_scores,
41
+ anonymize_findings,
42
+ anonymize_responses,
43
+ bucket_by_threshold,
44
+ parse_findings_response,
45
+ parse_scores_response,
46
+ )
35
47
  from scripts.ai_council.pricing import (
36
48
  CostEstimate,
37
49
  PriceTable,
@@ -39,7 +51,16 @@ from scripts.ai_council.pricing import (
39
51
  estimate_input_tokens,
40
52
  )
41
53
  from scripts.ai_council.project_context import ProjectContext
42
- from scripts.ai_council.prompts import system_prompt_for
54
+ from scripts.ai_council.advisors import AdvisorPlan
55
+ from scripts.ai_council.prompts import (
56
+ advisor_system_prompt,
57
+ build_extraction_user_prompt,
58
+ build_peer_review_user_prompt,
59
+ build_scoring_user_prompt,
60
+ peer_review_synthesis_addendum,
61
+ synthesis_template,
62
+ system_prompt_for,
63
+ )
43
64
 
44
65
 
45
66
  @dataclass
@@ -78,6 +99,99 @@ class OverrunEvent:
78
99
  OnOverrunCallback = Callable[[OverrunEvent], bool]
79
100
 
80
101
 
102
+ @dataclass(frozen=True)
103
+ class DebateCostEstimate:
104
+ """Pre-flight debate cost summary (Phase 8).
105
+
106
+ ``low_usd`` / ``expected_usd`` / ``high_usd`` are the rolled-up
107
+ spend bounds across every billable member × ``rounds``. The
108
+ expected estimate matches the per-round ``estimate()`` total
109
+ multiplied by rounds (worst-case ``max_output_tokens``). ``low_usd``
110
+ discounts output to 25% of the ceiling — most members do not hit
111
+ their token budget. ``high_usd`` adds a 20% over-run buffer per the
112
+ roadmap's ±20% accuracy target.
113
+
114
+ ``per_member`` carries one entry per billable member with the same
115
+ bound triple, plus the member's transport label (api / cli /
116
+ manual). ``subscription_members`` lists non-billable members so the
117
+ disclosure block can call out the "covered by subscription" rows
118
+ without summing them into USD totals.
119
+ """
120
+
121
+ rounds: int
122
+ low_usd: float
123
+ expected_usd: float
124
+ high_usd: float
125
+ per_member: list[dict[str, Any]]
126
+ subscription_members: list[dict[str, str]]
127
+
128
+
129
+ def estimate_debate_cost(
130
+ question: CouncilQuestion,
131
+ members: list[ExternalAIClient],
132
+ table: PriceTable,
133
+ *,
134
+ rounds: int,
135
+ project: ProjectContext | None = None,
136
+ original_ask: str = "",
137
+ advisor_plans: dict[str, AdvisorPlan] | None = None,
138
+ ) -> DebateCostEstimate:
139
+ """Project total spend for an N-round debate across all members.
140
+
141
+ Mirrors :func:`estimate` per-member, then multiplies by ``rounds``
142
+ to account for the per-round preamble + critique pass. CLI / manual
143
+ members (``billable=False``) are excluded from USD totals and
144
+ surfaced separately in ``subscription_members`` so the disclosure
145
+ block can label them as covered by the user's flat-rate plan.
146
+ """
147
+ if rounds < 1:
148
+ raise ValueError(f"rounds must be >= 1 (got {rounds!r}).")
149
+ billable_members = [m for m in members if getattr(m, "billable", True)]
150
+ sub_members = [
151
+ {
152
+ "name": m.name,
153
+ "model": m.model,
154
+ "transport": getattr(m, "transport", "api"),
155
+ "subscription_label": getattr(m, "subscription_label", ""),
156
+ }
157
+ for m in members
158
+ if not getattr(m, "billable", True)
159
+ ]
160
+ per_round = estimate(
161
+ question, billable_members, table,
162
+ project=project, original_ask=original_ask,
163
+ advisor_plans=advisor_plans,
164
+ )
165
+ expected = sum(e.total_usd for e in per_round) * rounds
166
+ # Low bound: output tokens rarely reach `max_output_tokens` ceiling.
167
+ # Use input-only cost + 25% of the output ceiling — empirical floor
168
+ # from manual debate traces.
169
+ low = (
170
+ sum(e.input_usd + 0.25 * e.output_usd for e in per_round) * rounds
171
+ )
172
+ # High bound: +20% over-run buffer (roadmap ±20% accuracy target).
173
+ high = expected * 1.20
174
+ per_member: list[dict[str, Any]] = []
175
+ for member, est in zip(billable_members, per_round):
176
+ member_expected = est.total_usd * rounds
177
+ per_member.append({
178
+ "name": member.name,
179
+ "model": member.model,
180
+ "transport": getattr(member, "transport", "api"),
181
+ "low_usd": (est.input_usd + 0.25 * est.output_usd) * rounds,
182
+ "expected_usd": member_expected,
183
+ "high_usd": member_expected * 1.20,
184
+ })
185
+ return DebateCostEstimate(
186
+ rounds=rounds,
187
+ low_usd=low,
188
+ expected_usd=expected,
189
+ high_usd=high,
190
+ per_member=per_member,
191
+ subscription_members=sub_members,
192
+ )
193
+
194
+
81
195
  def estimate(
82
196
  question: CouncilQuestion,
83
197
  members: list[ExternalAIClient],
@@ -85,21 +199,41 @@ def estimate(
85
199
  *,
86
200
  project: ProjectContext | None = None,
87
201
  original_ask: str = "",
202
+ advisor_plans: dict[str, AdvisorPlan] | None = None,
88
203
  ) -> list[CostEstimate]:
89
204
  """Return a pre-call cost estimate per member, in input order.
90
205
 
91
206
  `project` and `original_ask` are passed through to
92
207
  `system_prompt_for()` so the estimate covers the handoff preamble
93
208
  bytes too. Both default to v1-shape (no preamble extension).
209
+
210
+ `advisor_plans` (Phase 6) — when a member's name has a plan, the
211
+ estimate uses the advisor persona system prompt (typically larger
212
+ than the bare mode addendum). The cost estimator must mirror
213
+ `_run_round` exactly so the pre-call preview never under-states
214
+ the advisor-mode bill.
94
215
  """
95
- sys_prompt = system_prompt_for(
216
+ plans = advisor_plans or {}
217
+ base_user_tokens = estimate_input_tokens(question.user_prompt)
218
+ base_sys = system_prompt_for(
96
219
  question.mode, project=project, original_ask=original_ask,
97
220
  )
98
- input_tokens = estimate_input_tokens(question.user_prompt) + estimate_input_tokens(sys_prompt)
99
- return [
100
- estimate_cost(m.name, m.model, input_tokens, question.max_tokens, table)
101
- for m in members
102
- ]
221
+ base_sys_tokens = estimate_input_tokens(base_sys)
222
+ estimates: list[CostEstimate] = []
223
+ for m in members:
224
+ plan = plans.get(m.name)
225
+ if plan is None:
226
+ sys_tokens = base_sys_tokens
227
+ else:
228
+ sys_prompt = advisor_system_prompt(
229
+ plan.persona_text, project=project, original_ask=original_ask,
230
+ )
231
+ sys_tokens = estimate_input_tokens(sys_prompt)
232
+ input_tokens = base_user_tokens + sys_tokens
233
+ estimates.append(
234
+ estimate_cost(m.name, m.model, input_tokens, question.max_tokens, table),
235
+ )
236
+ return estimates
103
237
 
104
238
 
105
239
  def consult(
@@ -113,6 +247,7 @@ def consult(
113
247
  original_ask: str = "",
114
248
  rounds: int = 1,
115
249
  on_round_complete: Callable[[int, list[CouncilResponse]], None] | None = None,
250
+ advisor_plans: dict[str, AdvisorPlan] | None = None,
116
251
  ) -> list[CouncilResponse]:
117
252
  """Sequentially fan out `question` to every enabled member.
118
253
 
@@ -133,6 +268,9 @@ def consult(
133
268
  accumulate across rounds. Returns the FINAL round's responses;
134
269
  use `on_round_complete(round_idx, responses)` to capture
135
270
  intermediate rounds.
271
+ - `advisor_plans` (Phase 6) keyed by provider name swaps the
272
+ member's system prompt for the advisor persona via
273
+ `advisor_system_prompt()`. Replace-mode: no extra calls.
136
274
  """
137
275
  if rounds < 1:
138
276
  raise ValueError(f"rounds must be >= 1 (got {rounds})")
@@ -162,6 +300,7 @@ def consult(
162
300
  members, round_question, budget, spent,
163
301
  table=table, on_overrun=on_overrun,
164
302
  project=project, original_ask=original_ask,
303
+ advisor_plans=advisor_plans,
165
304
  )
166
305
  if on_round_complete is not None:
167
306
  on_round_complete(round_idx, last_results)
@@ -183,14 +322,29 @@ def _run_round(
183
322
  on_overrun: OnOverrunCallback | None,
184
323
  project: ProjectContext | None,
185
324
  original_ask: str,
325
+ advisor_plans: dict[str, AdvisorPlan] | None = None,
186
326
  ) -> list[CouncilResponse]:
187
327
  """Run a single round; mutate `spent` with cumulative totals."""
188
- system_prompt = system_prompt_for(
328
+ plans = advisor_plans or {}
329
+ base_system_prompt = system_prompt_for(
189
330
  question.mode, project=project, original_ask=original_ask,
190
331
  )
332
+
333
+ def _system_prompt_for_member(m: ExternalAIClient) -> str:
334
+ plan = plans.get(m.name)
335
+ if plan is None:
336
+ return base_system_prompt
337
+ return advisor_system_prompt(
338
+ plan.persona_text, project=project, original_ask=original_ask,
339
+ )
340
+
191
341
  results: list[CouncilResponse] = []
192
342
  estimates = (
193
- estimate(question, members, table, project=project, original_ask=original_ask)
343
+ estimate(
344
+ question, members, table,
345
+ project=project, original_ask=original_ask,
346
+ advisor_plans=advisor_plans,
347
+ )
194
348
  if table is not None
195
349
  else None
196
350
  )
@@ -202,12 +356,16 @@ def _run_round(
202
356
  # observability, but no projection / budget breach can apply.
203
357
  if not getattr(member, "billable", True):
204
358
  try:
205
- response = member.ask(system_prompt, question.user_prompt, question.max_tokens)
359
+ response = member.ask(
360
+ _system_prompt_for_member(member),
361
+ question.user_prompt, question.max_tokens,
362
+ )
206
363
  except Exception as exc: # noqa: BLE001 - last-resort safety net
207
364
  response = CouncilResponse(
208
365
  provider=member.name, model=member.model, text="",
209
366
  error=f"{type(exc).__name__}: {exc}",
210
367
  )
368
+ _stamp_transport_metadata(response, member)
211
369
  results.append(response)
212
370
  spent["input"] += response.input_tokens
213
371
  spent["output"] += response.output_tokens
@@ -265,7 +423,10 @@ def _run_round(
265
423
 
266
424
  # ── actual call ──────────────────────────────────────────────
267
425
  try:
268
- response = member.ask(system_prompt, question.user_prompt, question.max_tokens)
426
+ response = member.ask(
427
+ _system_prompt_for_member(member),
428
+ question.user_prompt, question.max_tokens,
429
+ )
269
430
  except Exception as exc: # noqa: BLE001 - last-resort safety net
270
431
  response = CouncilResponse(
271
432
  provider=member.name, model=member.model, text="",
@@ -274,6 +435,7 @@ def _run_round(
274
435
  results.append(response)
275
436
  spent["input"] += response.input_tokens
276
437
  spent["output"] += response.output_tokens
438
+ actual_usd: float | None = None
277
439
  if estimates is not None and table is not None:
278
440
  # Bill the actual output against the budget using the
279
441
  # member's per-1M output rate. Re-use estimate_cost with
@@ -282,6 +444,7 @@ def _run_round(
282
444
  member.name, member.model,
283
445
  response.input_tokens, response.output_tokens, table,
284
446
  )
447
+ actual_usd = actual.total_usd
285
448
  spent["usd"] += actual.total_usd
286
449
  # Persist to the rolling 24h ledger when the daily cap is
287
450
  # active. Errors are swallowed inside record_spend.
@@ -289,14 +452,44 @@ def _run_round(
289
452
  _record_daily_spend(
290
453
  actual.total_usd, member.name, member.model,
291
454
  )
455
+ _stamp_transport_metadata(response, member, cost_usd=actual_usd)
292
456
 
293
457
  return results
294
458
 
295
459
 
296
460
  def _aborted(member: ExternalAIClient, reason: str) -> CouncilResponse:
297
- return CouncilResponse(
461
+ response = CouncilResponse(
298
462
  provider=member.name, model=member.model, text="", error=reason,
299
463
  )
464
+ _stamp_transport_metadata(response, member)
465
+ return response
466
+
467
+
468
+ def _stamp_transport_metadata(
469
+ response: CouncilResponse,
470
+ member: ExternalAIClient,
471
+ *,
472
+ cost_usd: float | None = None,
473
+ ) -> None:
474
+ """Annotate `response.metadata` with transport / billable / cost info.
475
+
476
+ Phase 5 / Step 1 — the session writer and orchestrator renderer key
477
+ off these fields to format the cost line as either
478
+ ``cost: subscription (claude-pro)`` (non-billable vendor CLI) or
479
+ ``cost: $0.NNNN (… in / … out)`` (billable api or community CLI).
480
+ Stamped here (and not in each client) so the writer stays decoupled
481
+ from the client class hierarchy.
482
+ """
483
+ meta = dict(response.metadata or {})
484
+ transport = getattr(member, "transport", "api")
485
+ meta.setdefault("transport", transport)
486
+ meta.setdefault("billable", bool(getattr(member, "billable", True)))
487
+ label = getattr(member, "subscription_label", "") or ""
488
+ if label and not meta.get("billable", True):
489
+ meta.setdefault("subscription_label", label)
490
+ if cost_usd is not None:
491
+ meta["cost_usd"] = float(cost_usd)
492
+ response.metadata = meta
300
493
 
301
494
 
302
495
  def _augment_for_next_round(
@@ -337,18 +530,677 @@ def _augment_for_next_round(
337
530
  )
338
531
 
339
532
 
340
- def render(responses: list[CouncilResponse]) -> str:
341
- """Render stacked sections + a Convergence/Divergence summary slot."""
533
+ @dataclass
534
+ class DebateCheckpoint:
535
+ """Snapshot passed to the continue-prompt callback between rounds.
536
+
537
+ Phase 7 progressive-disclosure contract — the orchestrator pauses
538
+ after each completed round, builds this checkpoint, and asks the
539
+ caller whether to continue. Returning False stops the debate
540
+ gracefully (caller receives every completed round).
541
+ """
542
+
543
+ completed_round: int # 1-based index of the round just finished
544
+ total_planned_rounds: int
545
+ cost_so_far_usd: float
546
+ next_round_estimate_usd: float
547
+ last_round_responses: list[CouncilResponse]
548
+
549
+
550
+ class DebateCapExceeded(RuntimeError):
551
+ """Raised when projected next-round spend would breach the budget cap.
552
+
553
+ The CLI catches this *after* writing the partial artefact, so the
554
+ user always has a recoverable trail of the rounds that completed
555
+ before the cap fired.
556
+ """
557
+
558
+ def __init__(
559
+ self, *,
560
+ completed_round: int,
561
+ cost_so_far: float,
562
+ next_estimate: float,
563
+ cap: float,
564
+ ) -> None:
565
+ self.completed_round = completed_round
566
+ self.cost_so_far = cost_so_far
567
+ self.next_estimate = next_estimate
568
+ self.cap = cap
569
+ super().__init__(
570
+ f"Debate hard-cap: round {completed_round + 1} would push spend "
571
+ f"to ${cost_so_far + next_estimate:.4f} (cap=${cap:.4f}); "
572
+ f"stopping after round {completed_round}."
573
+ )
574
+
575
+
576
+ # Continue-prompt callback. Receives a DebateCheckpoint, returns True to
577
+ # proceed with the next round, False to stop gracefully.
578
+ DebateContinuePrompt = Callable[[DebateCheckpoint], bool]
579
+
580
+
581
+ def _augment_for_debate_round(
582
+ original_prompt: str,
583
+ prior_responses: list[CouncilResponse],
584
+ next_round_number: int,
585
+ ) -> str:
586
+ """Build the round-N user prompt for a debate — rebuttal framing.
587
+
588
+ Same anonymisation rules as `_augment_for_next_round` (Iron Law of
589
+ Neutrality § multi-round): provider/model identifiers stripped,
590
+ "Reviewer A / B / C…" labels assigned in input order, errors
591
+ skipped. The instruction block is debate-specific: each reviewer
592
+ is asked to identify the strongest opposing position and write a
593
+ rebuttal, NOT to find common ground.
594
+ """
342
595
  blocks: list[str] = []
596
+ label_idx = 0
597
+ for r in prior_responses:
598
+ if r.error or not r.text.strip():
599
+ continue
600
+ label = chr(ord("A") + label_idx)
601
+ label_idx += 1
602
+ blocks.append(f"### Reviewer {label}\n\n{r.text.strip()}")
603
+ if not blocks:
604
+ return original_prompt
605
+ prior_block = "\n\n".join(blocks)
606
+ return (
607
+ f"{original_prompt}\n\n"
608
+ f"---\n\n"
609
+ f"## Prior round positions (round {next_round_number - 1})\n\n"
610
+ f"You are now in round {next_round_number} of a structured\n"
611
+ f"debate. Below are anonymised positions from independent\n"
612
+ f"reviewers in the previous round. You do NOT know which model\n"
613
+ f"produced which position.\n\n"
614
+ f"Identify the SINGLE strongest opposing position and write a\n"
615
+ f"rebuttal addressed at its strongest steel-manned form. Do NOT\n"
616
+ f"search for common ground — name the load-bearing flaw the\n"
617
+ f"opposing reviewer missed and state the evidence behind your\n"
618
+ f"counter-position.\n\n"
619
+ f"{prior_block}"
620
+ )
621
+
622
+
623
+ def run_debate(
624
+ members: list[ExternalAIClient],
625
+ question: CouncilQuestion,
626
+ *,
627
+ budget: CostBudget | None = None,
628
+ table: PriceTable | None = None,
629
+ on_overrun: OnOverrunCallback | None = None,
630
+ project: ProjectContext | None = None,
631
+ original_ask: str = "",
632
+ max_rounds: int = 2,
633
+ on_round_complete: Callable[[int, list[CouncilResponse]], None] | None = None,
634
+ on_continue: DebateContinuePrompt | None = None,
635
+ advisor_plans: dict[str, AdvisorPlan] | None = None,
636
+ seed_round_1: list[CouncilResponse] | None = None,
637
+ ) -> list[list[CouncilResponse]]:
638
+ """Run a structured multi-round debate with progressive disclosure.
639
+
640
+ Returns every completed round in order — caller persists each
641
+ round incrementally via `on_round_complete` for crash safety.
642
+
643
+ Round 1: each member produces an initial position. When
644
+ `seed_round_1` is provided, it is reused verbatim (no calls) so
645
+ `/council debate --continue-as-debate` can pivot from an existing
646
+ `/council default` session.
647
+
648
+ Round 2+: `_augment_for_debate_round` wraps the original prompt
649
+ with anonymised prior positions and asks each member for a
650
+ rebuttal addressed at the strongest opposing view.
651
+
652
+ Between rounds: `on_continue(checkpoint)` is consulted. Returning
653
+ False stops the debate; the caller receives every completed round.
654
+ `None` (the default) auto-continues — the CLI wires its
655
+ interactive y/N prompt here, `--auto-continue` passes `None`.
656
+
657
+ Hard cap: before kicking off round N+1, the orchestrator compares
658
+ `spent_usd + next_round_estimate` to `budget.max_total_usd`. A
659
+ projected breach raises `DebateCapExceeded`; the CLI catches it
660
+ after persisting the partial debate.
661
+ """
662
+ if max_rounds < 1:
663
+ raise ValueError(f"max_rounds must be >= 1 (got {max_rounds})")
664
+ if not members:
665
+ return []
666
+ budget = budget or CostBudget()
667
+ if len(members) > budget.max_calls:
668
+ raise ValueError(
669
+ f"Debate has {len(members)} members but budget caps at "
670
+ f"{budget.max_calls} calls."
671
+ )
672
+
673
+ spent: dict[str, float] = {"input": 0, "output": 0, "usd": 0.0}
674
+ all_rounds: list[list[CouncilResponse]] = []
675
+ current_user_prompt = question.user_prompt
676
+
677
+ for round_idx in range(max_rounds):
678
+ round_number = round_idx + 1
679
+ if round_idx == 0 and seed_round_1 is not None:
680
+ # Pivot from /council default — reuse the existing round 1
681
+ # verbatim. No calls billed; spend stays at $0 until round 2.
682
+ results = list(seed_round_1)
683
+ else:
684
+ round_question = (
685
+ question if round_idx == 0
686
+ else CouncilQuestion(
687
+ mode=question.mode,
688
+ user_prompt=current_user_prompt,
689
+ max_tokens=question.max_tokens,
690
+ )
691
+ )
692
+ results = _run_round(
693
+ members, round_question, budget, spent,
694
+ table=table, on_overrun=on_overrun,
695
+ project=project, original_ask=original_ask,
696
+ advisor_plans=advisor_plans,
697
+ )
698
+
699
+ all_rounds.append(results)
700
+ if on_round_complete is not None:
701
+ on_round_complete(round_number, results)
702
+
703
+ # Prep the user-prompt for the next round so the cost estimate
704
+ # below covers the augmented bytes.
705
+ if round_idx + 1 < max_rounds:
706
+ current_user_prompt = _augment_for_debate_round(
707
+ question.user_prompt, results, round_number + 1,
708
+ )
709
+ # Hard-cap + continue-prompt gating before kicking off N+1.
710
+ if table is not None:
711
+ next_question = CouncilQuestion(
712
+ mode=question.mode,
713
+ user_prompt=current_user_prompt,
714
+ max_tokens=question.max_tokens,
715
+ )
716
+ next_estimates = estimate(
717
+ next_question, members, table,
718
+ project=project, original_ask=original_ask,
719
+ advisor_plans=advisor_plans,
720
+ )
721
+ next_round_usd = sum(e.total_usd for e in next_estimates)
722
+ else:
723
+ next_round_usd = 0.0
724
+
725
+ if (
726
+ budget.max_total_usd > 0
727
+ and spent["usd"] + next_round_usd > budget.max_total_usd
728
+ ):
729
+ raise DebateCapExceeded(
730
+ completed_round=round_number,
731
+ cost_so_far=spent["usd"],
732
+ next_estimate=next_round_usd,
733
+ cap=budget.max_total_usd,
734
+ )
735
+
736
+ if on_continue is not None:
737
+ checkpoint = DebateCheckpoint(
738
+ completed_round=round_number,
739
+ total_planned_rounds=max_rounds,
740
+ cost_so_far_usd=spent["usd"],
741
+ next_round_estimate_usd=next_round_usd,
742
+ last_round_responses=results,
743
+ )
744
+ if not on_continue(checkpoint):
745
+ return all_rounds
746
+
747
+ return all_rounds
748
+
749
+
750
+ @dataclass
751
+ class PeerReviewResult:
752
+ """Bundle returned by `run_peer_review()` (Phase 5 / F1).
753
+
754
+ `responses` carries the per-reviewer critiques. `label_to_source`
755
+ is the anonymisation map captured server-side so the audit-trail
756
+ JSON can rehydrate it without leaking provider identity to the
757
+ member at prompt time.
758
+
759
+ `persona_labels` is the (optional) Phase 6 / Step 3a wiring: when
760
+ the deliberation was an advisor-mode run, the source → persona
761
+ map flows through to the renderer so peer-review output can render
762
+ as `Response A (Contrarian)`. Plain-member runs leave it empty.
763
+ """
764
+
765
+ responses: list[CouncilResponse]
766
+ label_to_source: dict[str, str]
767
+ persona_labels: dict[str, str]
768
+
769
+
770
+ def run_peer_review(
771
+ members: list[ExternalAIClient],
772
+ deliberation_responses: list[CouncilResponse],
773
+ *,
774
+ budget: CostBudget | None = None,
775
+ table: PriceTable | None = None,
776
+ on_overrun: OnOverrunCallback | None = None,
777
+ project: ProjectContext | None = None,
778
+ original_ask: str = "",
779
+ max_tokens: int = DEFAULT_MAX_TOKENS,
780
+ persona_labels: dict[str, str] | None = None,
781
+ ) -> PeerReviewResult:
782
+ """Karpathy peer-review pass (Phase 5 / F1).
783
+
784
+ After the final deliberation round, each member sees the OTHER
785
+ members' deliberation outputs under neutral `Response-A` labels
786
+ (provider identity stripped; advisor persona labels preserved per
787
+ Phase 6 Step 3a) and emits a Karpathy-style critique:
788
+ strongest / weakest blind spot / what all missed / refinement.
789
+
790
+ Members never see their own response — the orchestrator filters
791
+ self before building the anonymised prompt. Errors in one member's
792
+ pass tag that member but never abort the round.
793
+
794
+ Cost gates flow through `consult([member], ...)`, so the same
795
+ budget + daily-ledger semantics as deliberation apply.
796
+ """
797
+ if not members or not deliberation_responses:
798
+ return PeerReviewResult(
799
+ responses=[], label_to_source={}, persona_labels={},
800
+ )
801
+
802
+ member_by_name = {m.name: m for m in members}
803
+ # ── source map: deliberation responses keyed by `provider:model` ─
804
+ # Errors and empty bodies are skipped — they leak nothing useful
805
+ # and would clutter the anonymised prompt with blanks.
806
+ by_source: dict[str, CouncilResponse] = {}
807
+ for r in deliberation_responses:
808
+ if r.error or not r.text.strip():
809
+ continue
810
+ source = f"{r.provider}:{r.model}"
811
+ by_source[source] = r
812
+
813
+ if len(by_source) < 2:
814
+ # Peer-review needs ≥ 2 distinct deliberation outputs (a
815
+ # reviewer with nothing else to review is a no-op).
816
+ return PeerReviewResult(
817
+ responses=[], label_to_source={}, persona_labels={},
818
+ )
819
+
820
+ persona_labels = dict(persona_labels or {})
821
+ review_responses: list[CouncilResponse] = []
822
+ # ── final label_to_source map captured from the LAST member call
823
+ # so the renderer / JSON dump has the deterministic A/B mapping.
824
+ # Each member sees a different N-1 subset (self filtered), but the
825
+ # ordering of `by_source` stays stable, so the label assignment is
826
+ # deterministic per artefact run.
827
+ last_label_to_source: dict[str, str] = {}
828
+
829
+ for reviewer in members:
830
+ scorer = f"{reviewer.name}:{reviewer.model}"
831
+ if reviewer.name not in member_by_name:
832
+ continue
833
+ others_pairs = [
834
+ (src, resp.text) for src, resp in by_source.items() if src != scorer
835
+ ]
836
+ if len(others_pairs) == 0:
837
+ continue
838
+ anon_text, label_to_source = anonymize_responses(
839
+ others_pairs, persona_labels=persona_labels,
840
+ )
841
+ if not anon_text:
842
+ continue
843
+ last_label_to_source = label_to_source
844
+ question = CouncilQuestion(
845
+ mode="prompt",
846
+ user_prompt=build_peer_review_user_prompt(anon_text),
847
+ max_tokens=max_tokens,
848
+ )
849
+ reviewed = consult(
850
+ [reviewer], question,
851
+ budget=budget, table=table, on_overrun=on_overrun,
852
+ project=project, original_ask=original_ask,
853
+ )
854
+ review_responses.extend(reviewed)
855
+
856
+ return PeerReviewResult(
857
+ responses=review_responses,
858
+ label_to_source=last_label_to_source,
859
+ persona_labels=persona_labels,
860
+ )
861
+
862
+
863
+ @dataclass
864
+ class ConsensusResult:
865
+ """Bundle returned by `run_consensus_scoring()`.
866
+
867
+ `bucket` is renderer-ready; `findings`, `scores`, and `metadata`
868
+ are kept for audit-trail JSON (council-sessions/*.json).
869
+ """
870
+
871
+ bucket: ConsensusBucket
872
+ findings: list[Finding]
873
+ scores: list[FindingScore]
874
+ metadata: dict[str, ConsensusMetadata]
875
+ extraction_responses: list[CouncilResponse]
876
+ scoring_responses: list[CouncilResponse]
877
+
878
+
879
+ def run_consensus_scoring(
880
+ members: list[ExternalAIClient],
881
+ deliberation_responses: list[CouncilResponse],
882
+ *,
883
+ budget: CostBudget | None = None,
884
+ table: PriceTable | None = None,
885
+ on_overrun: OnOverrunCallback | None = None,
886
+ project: ProjectContext | None = None,
887
+ original_ask: str = "",
888
+ max_tokens: int = DEFAULT_MAX_TOKENS,
889
+ strong_threshold: float = 0.7,
890
+ minority_threshold: float = 0.4,
891
+ ) -> ConsensusResult:
892
+ """Two-pass consensus round (Phase 4 / F3).
893
+
894
+ Pass 1 — extraction: each member re-emits its own deliberation as
895
+ a JSON array of `{id, text}` findings. Pass 2 — scoring: each
896
+ member sees the *other* members' findings under anonymous labels
897
+ and rates them 1-10 + agree/disagree + reason.
898
+
899
+ The cost budget is shared across both passes; the daily ledger
900
+ receives both. Errors in one member's extraction or scoring tag
901
+ that member but never abort the round.
902
+ """
903
+ if not members or not deliberation_responses:
904
+ return ConsensusResult(
905
+ bucket=ConsensusBucket(), findings=[], scores=[], metadata={},
906
+ extraction_responses=[], scoring_responses=[],
907
+ )
908
+
909
+ # ── Pass 1: extraction ──────────────────────────────────────────
910
+ member_by_name = {m.name: m for m in members}
911
+ extraction_responses: list[CouncilResponse] = []
912
+ all_findings: list[Finding] = []
913
+ for resp in deliberation_responses:
914
+ member = member_by_name.get(resp.provider)
915
+ if member is None or resp.error or not resp.text.strip():
916
+ continue
917
+ question = CouncilQuestion(
918
+ mode="prompt",
919
+ user_prompt=build_extraction_user_prompt(resp.text),
920
+ max_tokens=max_tokens,
921
+ )
922
+ extracted = consult(
923
+ [member], question,
924
+ budget=budget, table=table, on_overrun=on_overrun,
925
+ project=project, original_ask=original_ask,
926
+ )
927
+ extraction_responses.extend(extracted)
928
+ if not extracted or extracted[0].error:
929
+ continue
930
+ source = f"{member.name}:{member.model}"
931
+ all_findings.extend(
932
+ parse_findings_response(extracted[0].text, source=source),
933
+ )
934
+
935
+ if not all_findings:
936
+ return ConsensusResult(
937
+ bucket=ConsensusBucket(), findings=[], scores=[], metadata={},
938
+ extraction_responses=extraction_responses, scoring_responses=[],
939
+ )
940
+
941
+ # ── Pass 2: scoring (each member rates the OTHERS' findings) ────
942
+ scoring_responses: list[CouncilResponse] = []
943
+ all_scores: list[FindingScore] = []
944
+ for member in members:
945
+ scorer = f"{member.name}:{member.model}"
946
+ others = [f for f in all_findings if f.source != scorer]
947
+ if not others:
948
+ continue
949
+ anon = anonymize_findings(others)
950
+ label_to_id = {label: f.id for label, f in anon.items()}
951
+ anon_text = {label: f.text for label, f in anon.items()}
952
+ question = CouncilQuestion(
953
+ mode="prompt",
954
+ user_prompt=build_scoring_user_prompt(anon_text),
955
+ max_tokens=max_tokens,
956
+ )
957
+ scored = consult(
958
+ [member], question,
959
+ budget=budget, table=table, on_overrun=on_overrun,
960
+ project=project, original_ask=original_ask,
961
+ )
962
+ scoring_responses.extend(scored)
963
+ if not scored or scored[0].error:
964
+ continue
965
+ for s in parse_scores_response(scored[0].text, scorer=scorer):
966
+ real_id = label_to_id.get(s.finding_id)
967
+ if real_id is None:
968
+ continue
969
+ all_scores.append(FindingScore(
970
+ finding_id=real_id, scorer=s.scorer, score=s.score,
971
+ agree=s.agree, reason=s.reason,
972
+ ))
973
+
974
+ metadata = aggregate_scores(all_findings, all_scores)
975
+ bucket = bucket_by_threshold(
976
+ all_findings, metadata,
977
+ strong=strong_threshold, minority=minority_threshold,
978
+ )
979
+ return ConsensusResult(
980
+ bucket=bucket, findings=all_findings, scores=all_scores,
981
+ metadata=metadata, extraction_responses=extraction_responses,
982
+ scoring_responses=scoring_responses,
983
+ )
984
+
985
+
986
+ def _render_response_meta(r: CouncilResponse) -> str:
987
+ """Format the per-member meta line — tokens, cost (or subscription), latency.
988
+
989
+ Phase 5 / Step 1 — non-billable vendor-CLI calls render
990
+ ``cost: subscription (<label>)`` with no token detail (the local
991
+ session counted them but the user is on a flat rate). Billable
992
+ calls (api or community CLI) render ``cost: $X.XXXX`` plus tokens.
993
+ Tokens marked ``estimated=True`` get a ``~`` prefix so the audit
994
+ trail flags heuristic counts.
995
+ """
996
+ meta_dict = r.metadata or {}
997
+ billable = bool(meta_dict.get("billable", True))
998
+ estimated = bool(meta_dict.get("tokens_estimated", False))
999
+ parts: list[str] = []
1000
+ if not billable:
1001
+ label = meta_dict.get("subscription_label") or "flat-rate"
1002
+ parts.append(f"cost: subscription ({label})")
1003
+ else:
1004
+ cost_usd = meta_dict.get("cost_usd")
1005
+ if isinstance(cost_usd, (int, float)):
1006
+ parts.append(f"cost: ${cost_usd:.4f}")
1007
+ prefix = "~" if estimated else ""
1008
+ parts.append(
1009
+ f"tokens: {prefix}{r.input_tokens} in / {prefix}{r.output_tokens} out"
1010
+ )
1011
+ parts.append(f"{r.latency_ms} ms")
1012
+ return f"*{' · '.join(parts)}*"
1013
+
1014
+
1015
+ # Lens defaults for the Phase 9 confidence-explanation badge. The PR
1016
+ # lens stays terse so the existing "Must-fix / Nice-to-have" structure
1017
+ # isn't drowned in scorer prose; every other decision lens shows the
1018
+ # explanation by default. Creative lenses (design/optimize) never reach
1019
+ # this code path because they skip consensus scoring entirely.
1020
+ _DEFAULT_EXPLAIN_LENSES: frozenset[str] = frozenset({
1021
+ "default", "analysis", "debate", "prompt", "roadmap", "diff", "files",
1022
+ })
1023
+
1024
+
1025
+ def _default_explain_confidence(mode: str | None) -> bool:
1026
+ """Decide whether the confidence-explanation badge fires by default.
1027
+
1028
+ Pulled into a helper so the CLI ``--explain-confidence`` /
1029
+ ``--no-explain-confidence`` flags and the lens override path share
1030
+ one truth source.
1031
+ """
1032
+ if mode is None:
1033
+ return True
1034
+ return mode in _DEFAULT_EXPLAIN_LENSES
1035
+
1036
+
1037
+ def render(
1038
+ responses: list[CouncilResponse],
1039
+ *,
1040
+ mode: str | None = None,
1041
+ prose_synthesis: bool | None = None,
1042
+ consensus: ConsensusResult | None = None,
1043
+ peer_review: PeerReviewResult | None = None,
1044
+ explain_confidence: bool | None = None,
1045
+ ) -> str:
1046
+ """Render stacked sections + a lens-aware synthesis prompt slot.
1047
+
1048
+ `mode` selects the synthesis template from `prompts.synthesis_template`.
1049
+ `None` collapses to the default decision-lens template (back-compat).
1050
+
1051
+ `prose_synthesis` is the R4 Q4 escape hatch:
1052
+ - `True` → force creative-lens passthrough (bare slot) regardless of mode
1053
+ - `False` → force decision-lens default template even on creative lenses
1054
+ - `None` → honour the lens default from the table
1055
+
1056
+ `consensus` (Phase 4 / F3) prepends Strong Consensus / Findings /
1057
+ Minority Views sections when the analysis lens scored its findings.
1058
+
1059
+ `peer_review` (Phase 5 / F1) appends a Peer-Review block listing
1060
+ each member's critique (under Reviewer-A / Reviewer-B labels, in
1061
+ member input order so the audit trail is deterministic) and
1062
+ extends the synthesis template with the
1063
+ `Peer-Review-Surfaced Blind Spots` addendum.
1064
+ """
1065
+ blocks: list[str] = []
1066
+ explain = (
1067
+ explain_confidence
1068
+ if explain_confidence is not None
1069
+ else _default_explain_confidence(mode)
1070
+ )
1071
+ if consensus is not None and (
1072
+ consensus.bucket.strong or consensus.bucket.findings or consensus.bucket.minority
1073
+ ):
1074
+ blocks.append(_render_consensus(consensus.bucket, explain=explain))
343
1075
  for r in responses:
344
1076
  header = f"## {r.provider} · {r.model}"
345
1077
  if r.error:
346
1078
  blocks.append(f"{header}\n\n*ERROR:* `{r.error}`")
347
1079
  continue
348
- meta = (
349
- f"*tokens: {r.input_tokens} in / {r.output_tokens} out · "
350
- f"{r.latency_ms} ms*"
351
- )
1080
+ meta = _render_response_meta(r)
352
1081
  blocks.append(f"{header}\n\n{meta}\n\n{r.text}")
353
- blocks.append("## Convergence / Divergence\n\n*to be summarised by the host agent*")
1082
+ if peer_review is not None and peer_review.responses:
1083
+ blocks.append(_render_peer_review(peer_review))
1084
+ if prose_synthesis is True:
1085
+ template = ""
1086
+ elif prose_synthesis is False:
1087
+ template = synthesis_template("default")
1088
+ else:
1089
+ template = synthesis_template(mode)
1090
+ if peer_review is not None and peer_review.responses:
1091
+ addendum = peer_review_synthesis_addendum()
1092
+ template = f"{template}\n{addendum}" if template else addendum.lstrip()
1093
+ if template:
1094
+ body = template
1095
+ else:
1096
+ body = "*to be summarised by the host agent*"
1097
+ blocks.append(f"## Convergence / Divergence\n\n{body}")
354
1098
  return "\n\n---\n\n".join(blocks)
1099
+
1100
+
1101
+ def _render_peer_review(peer_review: PeerReviewResult) -> str:
1102
+ """Render the peer-review block under deterministic Reviewer labels.
1103
+
1104
+ Each successful reviewer gets a `### Reviewer X` sub-section. Errors
1105
+ keep their slot (so the audit trail still surfaces the breach) but
1106
+ render `ERROR: <tag>` instead of the prompt body.
1107
+ """
1108
+ lines = ["## Peer-Review (Karpathy)"]
1109
+ label_idx = 0
1110
+ for r in peer_review.responses:
1111
+ label = chr(ord("A") + label_idx)
1112
+ label_idx += 1
1113
+ if r.error:
1114
+ lines.append(f"### Reviewer {label}\n\n*ERROR:* `{r.error}`")
1115
+ continue
1116
+ lines.append(f"### Reviewer {label}\n\n{r.text.strip()}")
1117
+ return "\n\n".join(lines)
1118
+
1119
+
1120
+ def _render_consensus(bucket: ConsensusBucket, *, explain: bool = True) -> str:
1121
+ """Render Strong / Findings / Minority sections in renderer order.
1122
+
1123
+ ``explain`` toggles the Phase 9 confidence-explanation badge — when
1124
+ ``False`` the renderer falls back to the terse Phase 4 badge so the
1125
+ PR lens (and any caller passing ``--no-explain-confidence``) keeps
1126
+ its compact output.
1127
+ """
1128
+ parts: list[str] = []
1129
+ if bucket.strong:
1130
+ parts.append(
1131
+ "## Strong Consensus\n\n"
1132
+ + _render_bucket(bucket.strong, explain=explain),
1133
+ )
1134
+ if bucket.findings:
1135
+ parts.append(
1136
+ "## Findings\n\n"
1137
+ + _render_bucket(bucket.findings, explain=explain),
1138
+ )
1139
+ if bucket.minority:
1140
+ parts.append(
1141
+ "## Minority Views\n\n"
1142
+ "*Sub-threshold by consensus; kept for audit trail.*\n\n"
1143
+ + _render_bucket(bucket.minority, explain=explain),
1144
+ )
1145
+ return "\n\n".join(parts)
1146
+
1147
+
1148
+ def _truncate_reason(reason: str, *, limit: int = 120) -> str:
1149
+ """Collapse a multi-line scorer reason to a single ≤``limit``-char line.
1150
+
1151
+ Phase 9 — the dissent summary must fit on one line; we keep the
1152
+ first sentence-ish chunk and add an ellipsis when truncating. Empty
1153
+ reasons render as ``no rationale``.
1154
+ """
1155
+ flat = " ".join(reason.split()) if reason else ""
1156
+ if not flat:
1157
+ return "no rationale"
1158
+ if len(flat) <= limit:
1159
+ return flat
1160
+ return flat[: limit - 1].rstrip() + "…"
1161
+
1162
+
1163
+ def _render_bucket(
1164
+ items: list[tuple[Finding, ConsensusMetadata]],
1165
+ *,
1166
+ explain: bool = True,
1167
+ ) -> str:
1168
+ """Render one bucket of (finding, metadata) tuples.
1169
+
1170
+ The Phase 4 terse badge (``strength · mean · scorers · dissent``)
1171
+ is preserved on the first line. Phase 9 adds a second
1172
+ confidence-explanation line whenever ``explain`` is true *and* at
1173
+ least one scorer rated the finding — the explanation needs scorer
1174
+ data to be meaningful.
1175
+ """
1176
+ lines: list[str] = []
1177
+ for f, m in items:
1178
+ terse_badge = (
1179
+ f"strength {m.consensus_strength:.2f} · "
1180
+ f"mean {m.mean_score:.1f}/10 · "
1181
+ f"{len(m.scorers)} scorers · "
1182
+ f"{m.dissent_count} dissent"
1183
+ )
1184
+ block = f"- **{f.id}** — {f.text} \n _{terse_badge}_"
1185
+ if explain and m.scorers:
1186
+ total = m.concur_count + m.dissent_count
1187
+ if total <= 0:
1188
+ total = len(m.scorers)
1189
+ parts: list[str] = [
1190
+ f"{m.concur_count}/{total} members concur",
1191
+ ]
1192
+ if m.dissent_reasons:
1193
+ first = m.dissent_reasons[0]
1194
+ parts.append(
1195
+ f"{first[0]} dissented citing "
1196
+ f"{_truncate_reason(first[1])}",
1197
+ )
1198
+ extra = len(m.dissent_reasons) - 1
1199
+ if extra > 0:
1200
+ parts.append(f"{extra} other dissent(s)")
1201
+ else:
1202
+ parts.append("no dissent")
1203
+ parts.append(f"mean evidence-quality {m.evidence_quality}")
1204
+ block += " \n _" + "; ".join(parts) + "_"
1205
+ lines.append(block)
1206
+ return "\n".join(lines)