@event4u/agent-config 2.12.0 → 2.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/council/analysis.md +142 -0
- package/.agent-src/commands/council/debate.md +129 -0
- package/.agent-src/commands/council/default.md +8 -0
- package/.agent-src/commands/council/design.md +16 -12
- package/.agent-src/commands/council/optimize.md +16 -15
- package/.agent-src/commands/council/pr.md +12 -12
- package/.agent-src/commands/council.md +48 -2
- package/.agent-src/commands/memory/learn-low-impact.md +143 -0
- package/.agent-src/personas/advisors/contrarian.md +95 -0
- package/.agent-src/personas/advisors/executor.md +99 -0
- package/.agent-src/personas/advisors/expansionist.md +98 -0
- package/.agent-src/personas/advisors/first-principles.md +98 -0
- package/.agent-src/personas/advisors/outsider.md +102 -0
- package/.agent-src/rules/ask-when-uncertain.md +10 -6
- package/.agent-src/rules/copilot-routing.md +19 -0
- package/.agent-src/rules/devcontainer-routing.md +20 -0
- package/.agent-src/rules/external-reference-deep-dive.md +1 -1
- package/.agent-src/rules/fast-path-marker-visibility.md +38 -0
- package/.agent-src/rules/laravel-routing.md +20 -0
- package/.agent-src/rules/low-impact-corpus-privacy-floor.md +74 -0
- package/.agent-src/rules/symfony-routing.md +20 -0
- package/.agent-src/skills/ai-council/SKILL.md +388 -10
- package/.agent-src/skills/copilot-config/SKILL.md +1 -1
- package/.agent-src/skills/devcontainer/SKILL.md +1 -1
- package/.agent-src/skills/laravel/SKILL.md +1 -1
- package/.agent-src/skills/project-analysis-core/SKILL.md +1 -1
- package/.agent-src/skills/project-analyzer/SKILL.md +1 -1
- package/.agent-src/skills/symfony-workflow/SKILL.md +1 -1
- package/.agent-src/skills/universal-project-analysis/SKILL.md +1 -1
- package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
- package/.claude-plugin/marketplace.json +4 -1
- package/AGENTS.md +1 -1
- package/CHANGELOG.md +346 -124
- package/CONTRIBUTING.md +5 -0
- package/README.md +6 -6
- package/config/agent-settings.template.yml +5 -93
- package/config/gitignore-block.txt +6 -0
- package/docs/architecture/multi-tool-projection.md +53 -0
- package/docs/architecture/{compression.md → source-projection.md} +21 -3
- package/docs/architecture.md +15 -15
- package/docs/archive/CHANGELOG-pre-2.11.0.md +141 -0
- package/docs/catalog.md +25 -12
- package/docs/contracts/adr-architectural-consensus-mechanism.md +68 -0
- package/docs/contracts/adr-level-6-productization.md +7 -9
- package/docs/contracts/ai-council-config.md +658 -0
- package/docs/contracts/command-clusters.md +58 -2
- package/docs/contracts/command-surface-tiers.md +3 -2
- package/docs/contracts/cost-profile-defaults.md +5 -0
- package/docs/contracts/decision-engine-gates.md +5 -0
- package/docs/contracts/decision-trace-v1.md +2 -2
- package/docs/contracts/file-ownership-matrix.json +1735 -72
- package/docs/contracts/installed-tools-lockfile.md +2 -1
- package/docs/contracts/low-impact-corpus-format.md +95 -0
- package/docs/contracts/mcp-beta-criteria.md +6 -5
- package/docs/contracts/mcp-cloud-scope.md +5 -4
- package/docs/contracts/multi-tool-projection-fidelity.md +115 -0
- package/docs/contracts/release-trunk-sync.md +4 -3
- package/docs/contracts/tier-3-contrib-plugin.md +5 -6
- package/docs/getting-started.md +2 -2
- package/docs/guidelines/agent-infra/installed-tools-manifest.md +2 -1
- package/docs/installation.md +32 -0
- package/package.json +1 -1
- package/scripts/_archive/README.md +59 -0
- package/scripts/_cli/cmd_doctor.py +134 -0
- package/scripts/ai_council/_default_prices.py +10 -1
- package/scripts/ai_council/advisors.py +148 -0
- package/scripts/ai_council/airgap.py +165 -0
- package/scripts/ai_council/cli_hints.py +123 -0
- package/scripts/ai_council/clients.py +959 -5
- package/scripts/ai_council/compile_corpus.py +178 -0
- package/scripts/ai_council/confidence_gate.py +156 -0
- package/scripts/ai_council/config.py +1364 -0
- package/scripts/ai_council/consensus.py +329 -0
- package/scripts/ai_council/events_log.py +137 -0
- package/scripts/ai_council/learn_low_impact_preview.py +252 -0
- package/scripts/ai_council/low_impact.py +714 -0
- package/scripts/ai_council/low_impact_corpus.py +466 -0
- package/scripts/ai_council/low_impact_intake.py +163 -0
- package/scripts/ai_council/modes.py +6 -1
- package/scripts/ai_council/necessity.py +782 -0
- package/scripts/ai_council/orchestrator.py +872 -20
- package/scripts/ai_council/probation_gate.py +152 -0
- package/scripts/ai_council/prompts.py +335 -0
- package/scripts/ai_council/redact_low_impact_entry.py +155 -0
- package/scripts/ai_council/replay.py +155 -0
- package/scripts/ai_council/session.py +19 -1
- package/scripts/ai_council/shadow_dispatch.py +235 -0
- package/scripts/ai_council/solo_dispatch.py +226 -0
- package/scripts/audit_cloud_compatibility.py +74 -0
- package/scripts/audit_command_surface.py +363 -0
- package/scripts/check_compressed_paths.py +6 -1
- package/scripts/check_council_layout.py +11 -0
- package/scripts/ci_time_ratio.py +168 -0
- package/scripts/council_cli.py +2005 -30
- package/scripts/install.sh +12 -0
- package/scripts/measure_projection_bytes.py +159 -0
- package/scripts/measure_roadmap_trajectory.py +112 -0
- package/scripts/probe_projection_fidelity.py +202 -0
- package/scripts/score_skill_selection.py +198 -0
- package/scripts/skill_collision_clusters.py +162 -0
- /package/scripts/{_backfill_skill_domains.py → _archive/_backfill_skill_domains.py} +0 -0
- /package/scripts/{_bootstrap_tier_frontmatter.py → _archive/_bootstrap_tier_frontmatter.py} +0 -0
- /package/scripts/{_p43_bodies.py → _archive/_p43_bodies.py} +0 -0
- /package/scripts/{_p43_compress.py → _archive/_p43_compress.py} +0 -0
- /package/scripts/{_p4_migrate.py → _archive/_p4_migrate.py} +0 -0
- /package/scripts/{_phase2_shim_helper.py → _archive/_phase2_shim_helper.py} +0 -0
- /package/scripts/{_pilot_council_question.py → _archive/_pilot_council_question.py} +0 -0
package/scripts/council_cli.py
CHANGED
|
@@ -24,6 +24,41 @@ import yaml
|
|
|
24
24
|
|
|
25
25
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
|
26
26
|
SETTINGS_FILE = REPO_ROOT / ".agent-settings.yml"
|
|
27
|
+
AI_COUNCIL_FILE = REPO_ROOT / "agents" / ".ai-council.yml"
|
|
28
|
+
|
|
29
|
+
# Canonical output dirs per ai-council § "Output path convention".
|
|
30
|
+
# Enforced at write-time by `_validate_council_output_path` so shell-side
|
|
31
|
+
# `>` redirects and forgetful agents can't strand artefacts at agents/ root.
|
|
32
|
+
COUNCIL_CANONICAL_DIRS: dict[str, str] = {
|
|
33
|
+
"responses": "agents/council-responses",
|
|
34
|
+
"sessions": "agents/council-sessions",
|
|
35
|
+
"questions": "agents/council-questions",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _validate_council_output_path(
|
|
40
|
+
path_str: str, *, kind: str, subcommand: str,
|
|
41
|
+
) -> Path:
|
|
42
|
+
"""Reject non-canonical --output paths at write-time.
|
|
43
|
+
|
|
44
|
+
`kind` selects the expected canonical dir (`responses`, `sessions`,
|
|
45
|
+
`questions`). Raises `argparse.ArgumentTypeError` on violation so
|
|
46
|
+
`main()` surfaces a clean ❌ message and returns 2.
|
|
47
|
+
"""
|
|
48
|
+
expected_rel = COUNCIL_CANONICAL_DIRS[kind]
|
|
49
|
+
expected_abs = (REPO_ROOT / expected_rel).resolve()
|
|
50
|
+
p = Path(path_str)
|
|
51
|
+
target = p if p.is_absolute() else (REPO_ROOT / p)
|
|
52
|
+
target_resolved = target.resolve()
|
|
53
|
+
try:
|
|
54
|
+
target_resolved.relative_to(expected_abs)
|
|
55
|
+
except ValueError as exc:
|
|
56
|
+
raise argparse.ArgumentTypeError(
|
|
57
|
+
f"council:{subcommand} --output must live under "
|
|
58
|
+
f"{expected_rel}/ (per ai-council § Output path convention); "
|
|
59
|
+
f"got {path_str!r}."
|
|
60
|
+
) from exc
|
|
61
|
+
return p
|
|
27
62
|
|
|
28
63
|
sys.path.insert(0, str(REPO_ROOT))
|
|
29
64
|
|
|
@@ -32,37 +67,188 @@ from scripts.ai_council.bundler import ( # noqa: E402
|
|
|
32
67
|
)
|
|
33
68
|
from scripts.ai_council.clients import ( # noqa: E402
|
|
34
69
|
DEFAULT_MAX_TOKENS, UNLIMITED_TOKENS_FALLBACK,
|
|
35
|
-
AnthropicClient,
|
|
36
|
-
|
|
70
|
+
AnthropicClient, AnthropicCliClient, CliClient, CliClientError,
|
|
71
|
+
CouncilResponse, ExternalAIClient, GeminiClient, GeminiCliClient,
|
|
72
|
+
ManualClient, OpenAIClient, OpenAICliClient, PerplexityClient,
|
|
73
|
+
PerplexityCliClient, XAIClient, XAICliClient,
|
|
74
|
+
load_anthropic_key, load_cli_call_counts, load_openai_key,
|
|
75
|
+
quota_summary_line, reset_cli_call_counts,
|
|
76
|
+
)
|
|
77
|
+
from scripts.ai_council.advisors import ( # noqa: E402
|
|
78
|
+
AdvisorPlan, build_persona_labels, plan_advisor_swap,
|
|
79
|
+
)
|
|
80
|
+
from scripts.ai_council.cli_hints import format_install_hints # noqa: E402
|
|
81
|
+
from scripts.ai_council.config import ( # noqa: E402
|
|
82
|
+
AdvisorConfig, CouncilConfig, CouncilConfigError,
|
|
83
|
+
load_council_config, resolve_api_key,
|
|
84
|
+
)
|
|
85
|
+
from scripts.ai_council.solo_dispatch import ( # noqa: E402
|
|
86
|
+
AuthCache, select_solo_member,
|
|
37
87
|
)
|
|
38
88
|
from scripts.ai_council.modes import ( # noqa: E402
|
|
39
89
|
InvalidModeError, resolve_mode,
|
|
40
90
|
)
|
|
91
|
+
from scripts.ai_council.events_log import append_event # noqa: E402
|
|
92
|
+
from scripts.ai_council.necessity import ( # noqa: E402
|
|
93
|
+
ClassificationResult, SizeFitVerdict, classify_necessity,
|
|
94
|
+
classify_size_fit, downgrade_message, educate_message,
|
|
95
|
+
)
|
|
41
96
|
from scripts.ai_council.orchestrator import ( # noqa: E402
|
|
42
|
-
|
|
97
|
+
ConsensusResult,
|
|
98
|
+
CostBudget, CouncilQuestion, DebateCapExceeded, DebateCheckpoint,
|
|
99
|
+
DebateCostEstimate,
|
|
100
|
+
PeerReviewResult, consult, estimate, estimate_debate_cost, render,
|
|
101
|
+
run_consensus_scoring, run_debate, run_peer_review,
|
|
43
102
|
)
|
|
44
103
|
from scripts.ai_council.pricing import ( # noqa: E402
|
|
45
104
|
PriceTable, estimate_cost, load_prices,
|
|
46
105
|
)
|
|
47
106
|
from scripts.ai_council.project_context import detect_project_context # noqa: E402
|
|
107
|
+
from scripts.ai_council.replay import ( # noqa: E402
|
|
108
|
+
DecisionReplayInputs, render_decision_replay,
|
|
109
|
+
)
|
|
48
110
|
|
|
49
111
|
SCHEMA_VERSION = 1
|
|
50
112
|
|
|
113
|
+
#: Provider names accepted under `mode=api`. Mirrors the routing table
|
|
114
|
+
#: in ``_construct_api_member``; both must stay in sync.
|
|
115
|
+
_API_PROVIDERS = frozenset({"anthropic", "openai", "gemini", "xai", "perplexity"})
|
|
116
|
+
|
|
117
|
+
#: Provider names with a wired ``mode=cli`` subclass. Mirrors the
|
|
118
|
+
#: routing table in ``_construct_cli_member``; both must stay in sync.
|
|
119
|
+
#: Phase 2 ships ``anthropic``; Phase 3 adds ``openai`` + ``gemini``;
|
|
120
|
+
#: Phase 4 adds ``xai`` + ``perplexity`` (community CLIs, no
|
|
121
|
+
#: subscription savings — they still consume the API key and remain
|
|
122
|
+
#: ``billable=True``).
|
|
123
|
+
_CLI_PROVIDERS = frozenset({"anthropic", "openai", "gemini", "xai", "perplexity"})
|
|
124
|
+
|
|
51
125
|
|
|
52
126
|
class CouncilDisabledError(RuntimeError):
|
|
53
127
|
"""Raised when ai_council.enabled is false or no member is enabled."""
|
|
54
128
|
|
|
55
129
|
|
|
56
|
-
def load_settings(
|
|
130
|
+
def load_settings(
|
|
131
|
+
path: Path = SETTINGS_FILE,
|
|
132
|
+
*,
|
|
133
|
+
ai_council_path: Path = AI_COUNCIL_FILE,
|
|
134
|
+
) -> dict[str, Any]:
|
|
57
135
|
"""Load merged settings via the centralized loader.
|
|
58
136
|
|
|
59
137
|
road-to-portable-dev-preferences P3 migration: tolerance contract
|
|
60
138
|
(missing file / malformed YAML / no PyYAML) is handled uniformly by
|
|
61
139
|
``load_agent_settings``. ``ai_council.*`` keys are not whitelisted,
|
|
62
140
|
so the project file remains authoritative for council config.
|
|
141
|
+
|
|
142
|
+
Step-2 council-redesign overlay: when ``agents/.ai-council.yml``
|
|
143
|
+
exists it is the single source of truth — the validated config is
|
|
144
|
+
synthesized back into ``settings['ai_council']`` and wins over any
|
|
145
|
+
legacy block in ``.agent-settings.yml``. The pre-2 path stays alive
|
|
146
|
+
so the migration breadcrumb in ``.agent-settings.yml`` can ship
|
|
147
|
+
independently.
|
|
63
148
|
"""
|
|
64
149
|
from scripts._lib.agent_settings import load_agent_settings
|
|
65
|
-
|
|
150
|
+
settings = load_agent_settings(project_path=path)
|
|
151
|
+
if ai_council_path.exists():
|
|
152
|
+
cfg = load_council_config(ai_council_path)
|
|
153
|
+
settings["ai_council"] = _synthesize_ai_council_block(cfg)
|
|
154
|
+
return settings
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _synthesize_ai_council_block(cfg: CouncilConfig) -> dict[str, Any]:
|
|
158
|
+
"""Project a validated ``CouncilConfig`` onto the legacy dict shape.
|
|
159
|
+
|
|
160
|
+
``build_members`` and the ``_resolve_*`` helpers read the legacy
|
|
161
|
+
``ai_council.*`` keys — keeping the projection identical means no
|
|
162
|
+
downstream caller changes. ``api_key_ref`` is carried through; raw
|
|
163
|
+
keys are never resolved here (resolution is lazy, per enabled
|
|
164
|
+
member, inside ``_construct_api_member``).
|
|
165
|
+
"""
|
|
166
|
+
members: dict[str, dict[str, Any]] = {}
|
|
167
|
+
for name, m in cfg.members.items():
|
|
168
|
+
entry: dict[str, Any] = {"enabled": m.enabled, "model": m.model}
|
|
169
|
+
if m.api_key_ref is not None:
|
|
170
|
+
entry["api_key_ref"] = m.api_key_ref
|
|
171
|
+
if m.mode is not None:
|
|
172
|
+
entry["mode"] = m.mode
|
|
173
|
+
if m.binary is not None:
|
|
174
|
+
entry["binary"] = m.binary
|
|
175
|
+
if m.model_ladder:
|
|
176
|
+
entry["model_ladder"] = list(m.model_ladder)
|
|
177
|
+
members[name] = entry
|
|
178
|
+
advisors: dict[str, dict[str, Any]] = {}
|
|
179
|
+
for name, a in cfg.advisors.items():
|
|
180
|
+
entry = {
|
|
181
|
+
"enabled": a.enabled,
|
|
182
|
+
"member": a.member,
|
|
183
|
+
"persona": a.persona,
|
|
184
|
+
}
|
|
185
|
+
if a.model is not None:
|
|
186
|
+
entry["model"] = a.model
|
|
187
|
+
advisors[name] = entry
|
|
188
|
+
return {
|
|
189
|
+
"enabled": cfg.enabled,
|
|
190
|
+
"mode": cfg.defaults.mode,
|
|
191
|
+
"min_rounds": cfg.defaults.min_rounds,
|
|
192
|
+
"deep_min_rounds": cfg.defaults.deep_min_rounds,
|
|
193
|
+
"max_output_tokens": cfg.defaults.max_output_tokens,
|
|
194
|
+
"session_retention_days": cfg.defaults.session_retention_days,
|
|
195
|
+
"debate_max_rounds": cfg.defaults.debate_max_rounds,
|
|
196
|
+
"cost_budget": {
|
|
197
|
+
"max_input_tokens": cfg.cost_budget.max_input_tokens,
|
|
198
|
+
"max_output_tokens": cfg.cost_budget.max_output_tokens,
|
|
199
|
+
"max_calls": cfg.cost_budget.max_calls,
|
|
200
|
+
"max_total_usd": cfg.cost_budget.max_total_usd,
|
|
201
|
+
},
|
|
202
|
+
"consensus_scoring": {
|
|
203
|
+
"enabled": cfg.consensus_scoring.enabled,
|
|
204
|
+
"strong_threshold": cfg.consensus_scoring.strong_threshold,
|
|
205
|
+
"minority_threshold": cfg.consensus_scoring.minority_threshold,
|
|
206
|
+
"lenses": list(cfg.consensus_scoring.lenses),
|
|
207
|
+
},
|
|
208
|
+
"cli_call_budget": {
|
|
209
|
+
"max_calls_per_day": dict(cfg.cli_call_budget.max_calls_per_day),
|
|
210
|
+
"warn_at": cfg.cli_call_budget.warn_at,
|
|
211
|
+
},
|
|
212
|
+
"necessity_classifier": {
|
|
213
|
+
"enabled": cfg.necessity_classifier.enabled,
|
|
214
|
+
"mode": cfg.necessity_classifier.mode,
|
|
215
|
+
"user_explicit_mode": cfg.necessity_classifier.user_explicit_mode,
|
|
216
|
+
},
|
|
217
|
+
"model_downgrade": {
|
|
218
|
+
"enabled": cfg.model_downgrade.enabled,
|
|
219
|
+
"auto_apply": cfg.model_downgrade.auto_apply,
|
|
220
|
+
},
|
|
221
|
+
"debate": {
|
|
222
|
+
"max_cost_usd": cfg.debate.max_cost_usd,
|
|
223
|
+
"cost_disclosure": {
|
|
224
|
+
"mode": cfg.debate.cost_disclosure.mode,
|
|
225
|
+
"threshold_usd": cfg.debate.cost_disclosure.threshold_usd,
|
|
226
|
+
"show_per_member": cfg.debate.cost_disclosure.show_per_member,
|
|
227
|
+
},
|
|
228
|
+
},
|
|
229
|
+
"lens_overrides": {
|
|
230
|
+
"necessity_classifier_mode": dict(
|
|
231
|
+
cfg.lens_overrides.necessity_classifier_mode,
|
|
232
|
+
),
|
|
233
|
+
"necessity_classifier_user_explicit_mode": dict(
|
|
234
|
+
cfg.lens_overrides.necessity_classifier_user_explicit_mode,
|
|
235
|
+
),
|
|
236
|
+
"model_downgrade": {
|
|
237
|
+
lens: {"enabled": md.enabled, "auto_apply": md.auto_apply}
|
|
238
|
+
for lens, md in cfg.lens_overrides.model_downgrade.items()
|
|
239
|
+
},
|
|
240
|
+
"cost_disclosure": {
|
|
241
|
+
lens: {
|
|
242
|
+
"mode": cd.mode,
|
|
243
|
+
"threshold_usd": cd.threshold_usd,
|
|
244
|
+
"show_per_member": cd.show_per_member,
|
|
245
|
+
}
|
|
246
|
+
for lens, cd in cfg.lens_overrides.cost_disclosure.items()
|
|
247
|
+
},
|
|
248
|
+
},
|
|
249
|
+
"members": members,
|
|
250
|
+
"advisors": advisors,
|
|
251
|
+
}
|
|
66
252
|
|
|
67
253
|
|
|
68
254
|
def build_members(
|
|
@@ -71,6 +257,7 @@ def build_members(
|
|
|
71
257
|
invocation_mode: str | None = None,
|
|
72
258
|
model_overrides: dict[str, str] | None = None,
|
|
73
259
|
siblings_overrides: dict[str, list[str]] | None = None,
|
|
260
|
+
skipped: list[dict[str, Any]] | None = None,
|
|
74
261
|
) -> list[ExternalAIClient]:
|
|
75
262
|
"""Construct enabled council members from settings.
|
|
76
263
|
|
|
@@ -88,6 +275,16 @@ def build_members(
|
|
|
88
275
|
becomes its own billable member with independent cost tracking.
|
|
89
276
|
Mutually exclusive with `model_overrides` for the same provider;
|
|
90
277
|
requires `mode=api`; provider must be enabled in settings.
|
|
278
|
+
|
|
279
|
+
`skipped` is an optional caller-owned list. When provided, each
|
|
280
|
+
cli-mode member that fails to construct (binary missing) is appended
|
|
281
|
+
as `{"member": <name>, "reason": "binary_missing", "detail": <msg>}`
|
|
282
|
+
instead of crashing the loop. The skip is also surfaced on stderr
|
|
283
|
+
as `[council] SKIP <name>: <detail>` so the run log carries it
|
|
284
|
+
even when the caller passes ``None``. Phase 5 Step 2 contract:
|
|
285
|
+
a missing CLI binary degrades that member only — never silently
|
|
286
|
+
drops, never crashes the whole council unless every configured
|
|
287
|
+
member ends up skipped.
|
|
91
288
|
"""
|
|
92
289
|
ai = (settings.get("ai_council") or {}) if isinstance(settings, dict) else {}
|
|
93
290
|
if not ai.get("enabled"):
|
|
@@ -97,6 +294,9 @@ def build_members(
|
|
|
97
294
|
)
|
|
98
295
|
members_cfg = ai.get("members") or {}
|
|
99
296
|
global_mode = ai.get("mode")
|
|
297
|
+
cli_budget_cfg = (ai.get("cli_call_budget") or {}) if isinstance(ai, dict) else {}
|
|
298
|
+
cli_caps = (cli_budget_cfg.get("max_calls_per_day") or {}) if isinstance(cli_budget_cfg, dict) else {}
|
|
299
|
+
cli_warn_at = float(cli_budget_cfg.get("warn_at", 0.8)) if isinstance(cli_budget_cfg, dict) else 0.8
|
|
100
300
|
overrides = model_overrides or {}
|
|
101
301
|
siblings = siblings_overrides or {}
|
|
102
302
|
unknown = set(overrides) - set(members_cfg)
|
|
@@ -138,12 +338,48 @@ def build_members(
|
|
|
138
338
|
raise CouncilDisabledError(
|
|
139
339
|
f"--siblings requires mode=api for member {name!r} (got {mode!r})."
|
|
140
340
|
)
|
|
341
|
+
api_key_ref = cfg.get("api_key_ref")
|
|
141
342
|
for sib_model in siblings[name]:
|
|
142
|
-
members.append(
|
|
343
|
+
members.append(
|
|
344
|
+
_construct_api_member(name, sib_model, api_key_ref=api_key_ref),
|
|
345
|
+
)
|
|
143
346
|
continue
|
|
144
347
|
model = overrides.get(name) or cfg.get("model")
|
|
145
|
-
if mode == "api" and name in
|
|
146
|
-
members.append(
|
|
348
|
+
if mode == "api" and name in _API_PROVIDERS:
|
|
349
|
+
members.append(
|
|
350
|
+
_construct_api_member(name, model, api_key_ref=cfg.get("api_key_ref")),
|
|
351
|
+
)
|
|
352
|
+
elif mode == "cli" and name in _CLI_PROVIDERS:
|
|
353
|
+
try:
|
|
354
|
+
members.append(
|
|
355
|
+
_construct_cli_member(
|
|
356
|
+
name,
|
|
357
|
+
model,
|
|
358
|
+
binary=cfg.get("binary"),
|
|
359
|
+
max_calls_per_day=cli_caps.get(name),
|
|
360
|
+
warn_at=cli_warn_at,
|
|
361
|
+
),
|
|
362
|
+
)
|
|
363
|
+
except CliClientError as exc:
|
|
364
|
+
_, _, display = _CLI_FACTORY[name]
|
|
365
|
+
detail = (
|
|
366
|
+
f"{exc} Install the {display} CLI or flip "
|
|
367
|
+
f"ai_council.members.{name}.mode back to 'api'."
|
|
368
|
+
)
|
|
369
|
+
entry = {
|
|
370
|
+
"member": name,
|
|
371
|
+
"reason": "binary_missing",
|
|
372
|
+
"detail": detail,
|
|
373
|
+
}
|
|
374
|
+
if skipped is not None:
|
|
375
|
+
skipped.append(entry)
|
|
376
|
+
print(f"[council] SKIP {name}: {detail}", file=sys.stderr)
|
|
377
|
+
continue
|
|
378
|
+
elif mode == "cli":
|
|
379
|
+
raise CouncilDisabledError(
|
|
380
|
+
f"member {name!r} resolves to mode=cli but no CLI client is "
|
|
381
|
+
f"wired (known: {sorted(_CLI_PROVIDERS)!r})."
|
|
382
|
+
)
|
|
147
383
|
elif mode == "manual":
|
|
148
384
|
members.append(ManualClient(name=name, model=model or "manual"))
|
|
149
385
|
elif mode == "playwright":
|
|
@@ -152,9 +388,17 @@ def build_members(
|
|
|
152
388
|
)
|
|
153
389
|
else:
|
|
154
390
|
raise CouncilDisabledError(
|
|
155
|
-
f"member {name!r} has no transport — mode={mode},
|
|
391
|
+
f"member {name!r} has no transport — mode={mode}, "
|
|
392
|
+
f"name not in {sorted(_API_PROVIDERS)!r}."
|
|
156
393
|
)
|
|
157
394
|
if not members:
|
|
395
|
+
if skipped:
|
|
396
|
+
names = ", ".join(s["member"] for s in skipped)
|
|
397
|
+
raise CouncilDisabledError(
|
|
398
|
+
f"no council member could be constructed — every enabled "
|
|
399
|
+
f"member was skipped ({names}). See [council] SKIP entries "
|
|
400
|
+
f"on stderr for the per-member reason."
|
|
401
|
+
)
|
|
158
402
|
raise CouncilDisabledError(
|
|
159
403
|
"no council member has `enabled: true` — enable at least one in "
|
|
160
404
|
".agent-settings.yml under ai_council.members.*."
|
|
@@ -162,16 +406,179 @@ def build_members(
|
|
|
162
406
|
return members
|
|
163
407
|
|
|
164
408
|
|
|
165
|
-
def
|
|
166
|
-
|
|
409
|
+
def _build_advisor_plans(
|
|
410
|
+
ai_cfg: dict[str, Any],
|
|
411
|
+
repo_root: Path,
|
|
412
|
+
) -> dict[str, AdvisorPlan]:
|
|
413
|
+
"""Reconstruct AdvisorConfig from the projected dict, then plan swaps.
|
|
414
|
+
|
|
415
|
+
The legacy ``ai_council.advisors`` dict shape is the projection
|
|
416
|
+
written by ``_synthesize_ai_council_block``. Disabled advisors are
|
|
417
|
+
silently skipped by ``plan_advisor_swap``; one-per-provider is
|
|
418
|
+
enforced there. Returns empty when no advisor block is present.
|
|
419
|
+
"""
|
|
420
|
+
raw = ai_cfg.get("advisors") if isinstance(ai_cfg, dict) else None
|
|
421
|
+
if not raw:
|
|
422
|
+
return {}
|
|
423
|
+
advisors: dict[str, AdvisorConfig] = {}
|
|
424
|
+
for name, entry in raw.items():
|
|
425
|
+
if not isinstance(entry, dict):
|
|
426
|
+
continue
|
|
427
|
+
advisors[name] = AdvisorConfig(
|
|
428
|
+
name=name,
|
|
429
|
+
enabled=bool(entry.get("enabled", False)),
|
|
430
|
+
member=str(entry.get("member", "")),
|
|
431
|
+
persona=str(entry.get("persona", "")),
|
|
432
|
+
model=entry.get("model"),
|
|
433
|
+
)
|
|
434
|
+
return plan_advisor_swap(advisors, repo_root)
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def _advisor_model_overrides(
|
|
438
|
+
plans: dict[str, AdvisorPlan],
|
|
439
|
+
explicit: dict[str, str] | None,
|
|
440
|
+
) -> dict[str, str]:
|
|
441
|
+
"""Merge advisor model_overrides under explicit ``--model`` flags.
|
|
442
|
+
|
|
443
|
+
Explicit CLI ``--model`` overrides win over advisor-bound model
|
|
444
|
+
overrides — the user's flag is always authoritative.
|
|
445
|
+
"""
|
|
446
|
+
merged: dict[str, str] = {}
|
|
447
|
+
for member, plan in plans.items():
|
|
448
|
+
if plan.model_override:
|
|
449
|
+
merged[member] = plan.model_override
|
|
450
|
+
if explicit:
|
|
451
|
+
merged.update(explicit)
|
|
452
|
+
return merged
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def _format_advisor_summary(
|
|
456
|
+
plans: dict[str, AdvisorPlan],
|
|
457
|
+
members: list[ExternalAIClient],
|
|
458
|
+
) -> str:
|
|
459
|
+
"""Render the ``advisor: <persona> on <member> via <model>`` lines."""
|
|
460
|
+
if not plans:
|
|
461
|
+
return ""
|
|
462
|
+
member_models = {m.name: m.model for m in members}
|
|
463
|
+
rows: list[str] = []
|
|
464
|
+
for member, plan in plans.items():
|
|
465
|
+
model = member_models.get(member, plan.model_override or "?")
|
|
466
|
+
rows.append(
|
|
467
|
+
f" advisor: {plan.display_name} on {member} via {model}"
|
|
468
|
+
)
|
|
469
|
+
return "\n".join(rows)
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def _construct_api_member(
|
|
473
|
+
name: str,
|
|
474
|
+
model: str | None,
|
|
475
|
+
*,
|
|
476
|
+
api_key_ref: str | None = None,
|
|
477
|
+
) -> ExternalAIClient:
|
|
478
|
+
"""Build an api-mode client for a known provider name.
|
|
479
|
+
|
|
480
|
+
``api_key_ref`` carries the validated ``file:<path>`` / ``env:<VAR>``
|
|
481
|
+
reference from ``agents/.ai-council.yml`` and is resolved lazily here
|
|
482
|
+
so the council does not require keys for disabled providers. When
|
|
483
|
+
``api_key_ref`` is ``None`` (no new config yet, or legacy code path),
|
|
484
|
+
fall back to the per-provider loaders so the pre-step-2
|
|
485
|
+
``.agent-settings.yml`` flow keeps working during migration. Tests
|
|
486
|
+
monkeypatch the legacy loaders — that path stays intact.
|
|
487
|
+
"""
|
|
167
488
|
if name == "anthropic":
|
|
168
|
-
|
|
169
|
-
|
|
489
|
+
api_key = (
|
|
490
|
+
resolve_api_key(api_key_ref, scope="ai_council.members.anthropic")
|
|
491
|
+
if api_key_ref else load_anthropic_key()
|
|
492
|
+
)
|
|
493
|
+
return AnthropicClient(model=model or "claude-sonnet-4-5", api_key=api_key)
|
|
170
494
|
if name == "openai":
|
|
171
|
-
|
|
172
|
-
|
|
495
|
+
api_key = (
|
|
496
|
+
resolve_api_key(api_key_ref, scope="ai_council.members.openai")
|
|
497
|
+
if api_key_ref else load_openai_key()
|
|
498
|
+
)
|
|
499
|
+
return OpenAIClient(model=model or "gpt-4o", api_key=api_key)
|
|
500
|
+
if name == "gemini":
|
|
501
|
+
if not api_key_ref:
|
|
502
|
+
raise CouncilDisabledError(
|
|
503
|
+
"member 'gemini' requires api_key_ref in agents/.ai-council.yml "
|
|
504
|
+
"(e.g. `env:GEMINI_API_KEY`) — no legacy fallback."
|
|
505
|
+
)
|
|
506
|
+
api_key = resolve_api_key(api_key_ref, scope="ai_council.members.gemini")
|
|
507
|
+
return GeminiClient(model=model or "gemini-2.5-pro", api_key=api_key)
|
|
508
|
+
if name == "xai":
|
|
509
|
+
if not api_key_ref:
|
|
510
|
+
raise CouncilDisabledError(
|
|
511
|
+
"member 'xai' requires api_key_ref in agents/.ai-council.yml "
|
|
512
|
+
"(e.g. `env:XAI_API_KEY`) — no legacy fallback."
|
|
513
|
+
)
|
|
514
|
+
api_key = resolve_api_key(api_key_ref, scope="ai_council.members.xai")
|
|
515
|
+
return XAIClient(model=model or "grok-4", api_key=api_key)
|
|
516
|
+
if name == "perplexity":
|
|
517
|
+
if not api_key_ref:
|
|
518
|
+
raise CouncilDisabledError(
|
|
519
|
+
"member 'perplexity' requires api_key_ref in agents/.ai-council.yml "
|
|
520
|
+
"(e.g. `env:PERPLEXITY_API_KEY`) — no legacy fallback."
|
|
521
|
+
)
|
|
522
|
+
api_key = resolve_api_key(api_key_ref, scope="ai_council.members.perplexity")
|
|
523
|
+
return PerplexityClient(model=model or "sonar-pro", api_key=api_key)
|
|
173
524
|
raise CouncilDisabledError(
|
|
174
|
-
f"member {name!r} has no api transport
|
|
525
|
+
f"member {name!r} has no api transport "
|
|
526
|
+
f"(known: {sorted(_API_PROVIDERS)!r})."
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
#: Provider → (class-attribute-name, default_model, human_display) for
|
|
531
|
+
#: cli-mode routing. The class ref is looked up via ``getattr`` on this
|
|
532
|
+
#: module at call time so ``monkeypatch.setattr(council_cli, "AnthropicCliClient", X)``
|
|
533
|
+
#: keeps working from tests. The display string is used by
|
|
534
|
+
#: ``build_members`` to render the "Install the <X> CLI" hint in
|
|
535
|
+
#: skip-with-reason logs without re-importing every subclass at the
|
|
536
|
+
#: call site.
|
|
537
|
+
_CLI_FACTORY: dict[str, tuple[str, str, str]] = {
|
|
538
|
+
"anthropic": ("AnthropicCliClient", "claude-sonnet-4-5", "Claude"),
|
|
539
|
+
"openai": ("OpenAICliClient", "gpt-5", "Codex"),
|
|
540
|
+
"gemini": ("GeminiCliClient", "gemini-2.5-pro", "Gemini"),
|
|
541
|
+
"xai": ("XAICliClient", "grok-4", "Grok (community)"),
|
|
542
|
+
"perplexity": ("PerplexityCliClient", "sonar-pro", "Perplexity (community)"),
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def _construct_cli_member(
|
|
547
|
+
name: str,
|
|
548
|
+
model: str | None,
|
|
549
|
+
*,
|
|
550
|
+
binary: str | None = None,
|
|
551
|
+
max_calls_per_day: int | None = None,
|
|
552
|
+
warn_at: float = 0.8,
|
|
553
|
+
) -> ExternalAIClient:
|
|
554
|
+
"""Build a cli-mode client for a known provider name.
|
|
555
|
+
|
|
556
|
+
``binary`` overrides the provider default (e.g. ``/opt/claude``);
|
|
557
|
+
``None`` falls through to ``shutil.which(default_binary)``. The
|
|
558
|
+
daily quota is plumbed through to the subclass; ``None`` disables
|
|
559
|
+
the local counter (only stderr-based quota detection remains).
|
|
560
|
+
``warn_at`` (step-8 P1) is the fractional threshold flipping the
|
|
561
|
+
pre-run quota summary to its ``⚠️`` shape; default 0.8 mirrors
|
|
562
|
+
``CliCallBudgetConfig``.
|
|
563
|
+
Lets the subclass' ``CliClientError`` propagate so ``build_members``
|
|
564
|
+
can convert it into a structured per-member skip entry without
|
|
565
|
+
crashing the whole council (the original "fail loudly for the
|
|
566
|
+
entire council" contract is preserved when no other member
|
|
567
|
+
survives — the empty-members guard at the end of ``build_members``
|
|
568
|
+
fires with the skip log attached).
|
|
569
|
+
"""
|
|
570
|
+
if name in _CLI_FACTORY:
|
|
571
|
+
attr, default_model, _display = _CLI_FACTORY[name]
|
|
572
|
+
cls = globals()[attr]
|
|
573
|
+
return cls(
|
|
574
|
+
model=model or default_model,
|
|
575
|
+
binary=binary,
|
|
576
|
+
max_calls_per_day=max_calls_per_day,
|
|
577
|
+
warn_at=warn_at,
|
|
578
|
+
)
|
|
579
|
+
raise CouncilDisabledError(
|
|
580
|
+
f"member {name!r} has no cli transport "
|
|
581
|
+
f"(known: {sorted(_CLI_PROVIDERS)!r})."
|
|
175
582
|
)
|
|
176
583
|
|
|
177
584
|
|
|
@@ -180,8 +587,16 @@ def build_question(
|
|
|
180
587
|
input_path: Path,
|
|
181
588
|
input_mode: str,
|
|
182
589
|
max_tokens: int,
|
|
590
|
+
prompt_mode_override: str | None = None,
|
|
183
591
|
) -> tuple[CouncilQuestion, str]:
|
|
184
|
-
"""Bundle the input file. Returns (question, artefact_label).
|
|
592
|
+
"""Bundle the input file. Returns (question, artefact_label).
|
|
593
|
+
|
|
594
|
+
`prompt_mode_override` swaps the per-mode neutrality addendum looked
|
|
595
|
+
up by `system_prompt_for(question.mode, ...)`. The bundle shape is
|
|
596
|
+
unchanged — the bundler still uses `input_mode` to format the
|
|
597
|
+
artefact. Routed by the `/council pr|design|optimize|analysis`
|
|
598
|
+
wrappers via the `--prompt-mode` CLI flag.
|
|
599
|
+
"""
|
|
185
600
|
if input_mode == "prompt":
|
|
186
601
|
text = input_path.read_text(encoding="utf-8")
|
|
187
602
|
ctx = bundle_prompt(text)
|
|
@@ -193,13 +608,19 @@ def build_question(
|
|
|
193
608
|
raise ValueError(
|
|
194
609
|
f"unsupported input mode: {input_mode!r} (use prompt | roadmap)"
|
|
195
610
|
)
|
|
196
|
-
|
|
611
|
+
mode = prompt_mode_override or ctx.mode
|
|
612
|
+
return CouncilQuestion(mode=mode, user_prompt=ctx.text,
|
|
197
613
|
max_tokens=max_tokens), artefact
|
|
198
614
|
|
|
199
615
|
|
|
200
616
|
def format_estimate_table(
|
|
201
617
|
members: list[ExternalAIClient],
|
|
202
618
|
estimates: list[Any],
|
|
619
|
+
*,
|
|
620
|
+
consensus_delta_usd: float = 0.0,
|
|
621
|
+
consensus_extra_calls: int = 0,
|
|
622
|
+
peer_review_delta_usd: float = 0.0,
|
|
623
|
+
peer_review_extra_calls: int = 0,
|
|
203
624
|
) -> str:
|
|
204
625
|
rows = [
|
|
205
626
|
f" {m.name}/{m.model}: "
|
|
@@ -207,10 +628,265 @@ def format_estimate_table(
|
|
|
207
628
|
for m, e in zip(members, estimates)
|
|
208
629
|
]
|
|
209
630
|
total = sum(e.total_usd for e in estimates)
|
|
631
|
+
if consensus_extra_calls > 0:
|
|
632
|
+
rows.append(
|
|
633
|
+
f" +consensus scoring: +{consensus_extra_calls} calls "
|
|
634
|
+
f"(~+${consensus_delta_usd:.4f})"
|
|
635
|
+
)
|
|
636
|
+
total += consensus_delta_usd
|
|
637
|
+
if peer_review_extra_calls > 0:
|
|
638
|
+
rows.append(
|
|
639
|
+
f" +peer-review: +{peer_review_extra_calls} calls "
|
|
640
|
+
f"(~+${peer_review_delta_usd:.4f})"
|
|
641
|
+
)
|
|
642
|
+
total += peer_review_delta_usd
|
|
210
643
|
rows.append(f" TOTAL: ${total:.4f}")
|
|
211
644
|
return "\n".join(rows)
|
|
212
645
|
|
|
213
646
|
|
|
647
|
+
def _consensus_cost_delta(
|
|
648
|
+
ai_cfg: dict[str, Any],
|
|
649
|
+
prompt_mode: str,
|
|
650
|
+
estimates: list[Any],
|
|
651
|
+
n_billable: int,
|
|
652
|
+
) -> tuple[int, float]:
|
|
653
|
+
"""Return ``(extra_calls, extra_usd)`` for the consensus round.
|
|
654
|
+
|
|
655
|
+
Active when ``ai_council.consensus_scoring.enabled`` is true AND the
|
|
656
|
+
invocation's lens is in ``consensus_scoring.lenses``. Each member
|
|
657
|
+
contributes two extra calls (extraction + scoring); the worst-case
|
|
658
|
+
cost uses the base per-member estimate as a ceiling.
|
|
659
|
+
"""
|
|
660
|
+
cs = ai_cfg.get("consensus_scoring") or {}
|
|
661
|
+
if not cs.get("enabled"):
|
|
662
|
+
return 0, 0.0
|
|
663
|
+
lenses = cs.get("lenses") or ["analysis"]
|
|
664
|
+
if prompt_mode not in lenses:
|
|
665
|
+
return 0, 0.0
|
|
666
|
+
extra_calls = 2 * n_billable
|
|
667
|
+
extra_usd = 2.0 * sum(e.total_usd for e in estimates)
|
|
668
|
+
return extra_calls, extra_usd
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def _maybe_run_consensus(
|
|
672
|
+
ai_cfg: dict[str, Any],
|
|
673
|
+
question: CouncilQuestion,
|
|
674
|
+
members: list[ExternalAIClient],
|
|
675
|
+
responses: list[CouncilResponse],
|
|
676
|
+
budget: CostBudget,
|
|
677
|
+
table: PriceTable,
|
|
678
|
+
project: Any,
|
|
679
|
+
args: argparse.Namespace,
|
|
680
|
+
) -> ConsensusResult | None:
|
|
681
|
+
"""Run the consensus scoring round when enabled for this lens."""
|
|
682
|
+
cs = ai_cfg.get("consensus_scoring") or {}
|
|
683
|
+
if not cs.get("enabled"):
|
|
684
|
+
return None
|
|
685
|
+
lenses = cs.get("lenses") or ["analysis"]
|
|
686
|
+
if question.mode not in lenses:
|
|
687
|
+
return None
|
|
688
|
+
return run_consensus_scoring(
|
|
689
|
+
members, responses,
|
|
690
|
+
budget=budget, table=table, project=project,
|
|
691
|
+
original_ask=args.original_ask,
|
|
692
|
+
max_tokens=question.max_tokens,
|
|
693
|
+
strong_threshold=float(cs.get("strong_threshold", 0.7)),
|
|
694
|
+
minority_threshold=float(cs.get("minority_threshold", 0.4)),
|
|
695
|
+
)
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
def _serialise_consensus(consensus: ConsensusResult) -> dict[str, Any]:
|
|
699
|
+
"""Project ConsensusResult onto a JSON-safe dict for session payloads."""
|
|
700
|
+
return {
|
|
701
|
+
"findings": [
|
|
702
|
+
{"id": f.id, "source": f.source, "text": f.text}
|
|
703
|
+
for f in consensus.findings
|
|
704
|
+
],
|
|
705
|
+
"scores": [
|
|
706
|
+
{
|
|
707
|
+
"finding_id": s.finding_id, "scorer": s.scorer,
|
|
708
|
+
"score": s.score, "agree": s.agree, "reason": s.reason,
|
|
709
|
+
}
|
|
710
|
+
for s in consensus.scores
|
|
711
|
+
],
|
|
712
|
+
"metadata": {
|
|
713
|
+
fid: {
|
|
714
|
+
"mean_score": m.mean_score,
|
|
715
|
+
"agreement_rate": m.agreement_rate,
|
|
716
|
+
"consensus_strength": m.consensus_strength,
|
|
717
|
+
"dissent_count": m.dissent_count,
|
|
718
|
+
"scorers": list(m.scorers),
|
|
719
|
+
"concur_count": m.concur_count,
|
|
720
|
+
"dissent_reasons": [list(pair) for pair in m.dissent_reasons],
|
|
721
|
+
"evidence_quality": m.evidence_quality,
|
|
722
|
+
}
|
|
723
|
+
for fid, m in consensus.metadata.items()
|
|
724
|
+
},
|
|
725
|
+
"extraction_responses": _serialise_responses(consensus.extraction_responses),
|
|
726
|
+
"scoring_responses": _serialise_responses(consensus.scoring_responses),
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
|
|
730
|
+
def _decision_replay_settings(
|
|
731
|
+
ai_cfg: dict[str, Any], lens: str,
|
|
732
|
+
) -> tuple[bool, bool]:
|
|
733
|
+
"""Resolve (enabled, include_member_arguments) for ``lens``.
|
|
734
|
+
|
|
735
|
+
Per-lens override under ``lenses.<lens>.decision_replay`` beats the
|
|
736
|
+
global ``decision_replay`` block. Defaults: enabled=True,
|
|
737
|
+
include_member_arguments=True (Phase 9 ships ON by default — the
|
|
738
|
+
artefact is the audit trail GPT review of PR #148 called out as
|
|
739
|
+
missing).
|
|
740
|
+
"""
|
|
741
|
+
global_block = ai_cfg.get("decision_replay") or {}
|
|
742
|
+
enabled = global_block.get("enabled", True)
|
|
743
|
+
include_args = global_block.get("include_member_arguments", True)
|
|
744
|
+
lenses = ai_cfg.get("lenses") or {}
|
|
745
|
+
lens_block = (lenses.get(lens) or {}).get("decision_replay")
|
|
746
|
+
if isinstance(lens_block, dict):
|
|
747
|
+
if "enabled" in lens_block:
|
|
748
|
+
enabled = lens_block["enabled"]
|
|
749
|
+
if "include_member_arguments" in lens_block:
|
|
750
|
+
include_args = lens_block["include_member_arguments"]
|
|
751
|
+
return bool(enabled), bool(include_args)
|
|
752
|
+
|
|
753
|
+
|
|
754
|
+
def _maybe_write_decision_replay(
|
|
755
|
+
*,
|
|
756
|
+
ai_cfg: dict[str, Any],
|
|
757
|
+
lens: str,
|
|
758
|
+
out_path: Path,
|
|
759
|
+
consensus: ConsensusResult | None,
|
|
760
|
+
deliberation: list[CouncilResponse],
|
|
761
|
+
original_ask: str,
|
|
762
|
+
) -> Path | None:
|
|
763
|
+
"""Write ``decision-replay.md`` alongside ``out_path`` when enabled.
|
|
764
|
+
|
|
765
|
+
No-op when ``decision_replay.enabled`` resolves to ``False`` for the
|
|
766
|
+
lens or when ``consensus`` is ``None`` (nothing to replay). Returns
|
|
767
|
+
the artefact path on success, ``None`` otherwise.
|
|
768
|
+
"""
|
|
769
|
+
enabled, include_args = _decision_replay_settings(ai_cfg, lens)
|
|
770
|
+
if not enabled or consensus is None:
|
|
771
|
+
return None
|
|
772
|
+
replay = render_decision_replay(
|
|
773
|
+
DecisionReplayInputs(
|
|
774
|
+
findings=list(consensus.findings),
|
|
775
|
+
scores=list(consensus.scores),
|
|
776
|
+
metadata=dict(consensus.metadata),
|
|
777
|
+
deliberation=deliberation,
|
|
778
|
+
original_ask=original_ask,
|
|
779
|
+
include_member_arguments=include_args,
|
|
780
|
+
),
|
|
781
|
+
)
|
|
782
|
+
target = out_path.parent / "decision-replay.md"
|
|
783
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
784
|
+
target.write_text(replay, encoding="utf-8")
|
|
785
|
+
return target
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
# ── peer-review (Phase 5 / F1, Karpathy anonymous review) ──────────
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
def _peer_review_active(ai_cfg: dict[str, Any], args: argparse.Namespace) -> bool:
|
|
792
|
+
"""Return True when peer-review should fire for this invocation.
|
|
793
|
+
|
|
794
|
+
Resolution chain (highest priority first):
|
|
795
|
+
1. ``--peer-review`` CLI flag — explicit opt-in.
|
|
796
|
+
2. ``ai_council.peer_review.enabled: true`` in
|
|
797
|
+
``agents/.ai-council.yml`` — opt-in via config.
|
|
798
|
+
Both default to false; peer-review is opt-in by R2 verdict.
|
|
799
|
+
"""
|
|
800
|
+
if getattr(args, "peer_review", False):
|
|
801
|
+
return True
|
|
802
|
+
pr_cfg = ai_cfg.get("peer_review") or {}
|
|
803
|
+
return bool(pr_cfg.get("enabled"))
|
|
804
|
+
|
|
805
|
+
|
|
806
|
+
def _peer_review_cost_delta(
|
|
807
|
+
ai_cfg: dict[str, Any],
|
|
808
|
+
args: argparse.Namespace,
|
|
809
|
+
estimates: list[Any],
|
|
810
|
+
n_billable: int,
|
|
811
|
+
) -> tuple[int, float]:
|
|
812
|
+
"""Return ``(extra_calls, extra_usd)`` for the peer-review round.
|
|
813
|
+
|
|
814
|
+
One extra call per billable member (each reviews the others). The
|
|
815
|
+
worst-case cost uses the base per-member estimate as a ceiling —
|
|
816
|
+
same heuristic as ``_consensus_cost_delta``.
|
|
817
|
+
"""
|
|
818
|
+
if not _peer_review_active(ai_cfg, args):
|
|
819
|
+
return 0, 0.0
|
|
820
|
+
if n_billable < 2:
|
|
821
|
+
# Need ≥ 2 distinct deliberation outputs for peer-review to
|
|
822
|
+
# have anything to review. The orchestrator no-ops below 2.
|
|
823
|
+
return 0, 0.0
|
|
824
|
+
extra_calls = n_billable
|
|
825
|
+
extra_usd = sum(e.total_usd for e in estimates)
|
|
826
|
+
return extra_calls, extra_usd
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
def _maybe_run_peer_review(
|
|
830
|
+
ai_cfg: dict[str, Any],
|
|
831
|
+
args: argparse.Namespace,
|
|
832
|
+
question: CouncilQuestion,
|
|
833
|
+
members: list[ExternalAIClient],
|
|
834
|
+
responses: list[CouncilResponse],
|
|
835
|
+
budget: CostBudget,
|
|
836
|
+
table: PriceTable,
|
|
837
|
+
project: Any,
|
|
838
|
+
*,
|
|
839
|
+
persona_labels: dict[str, str] | None = None,
|
|
840
|
+
) -> PeerReviewResult | None:
|
|
841
|
+
"""Run the peer-review pass when opted in.
|
|
842
|
+
|
|
843
|
+
No-ops if fewer than 2 successful deliberation responses exist —
|
|
844
|
+
the orchestrator surfaces the empty result in that case.
|
|
845
|
+
|
|
846
|
+
``persona_labels`` (Phase 6) flows through to ``anonymize_responses``
|
|
847
|
+
so advisor-mode runs render as ``Response A (Contrarian)`` instead
|
|
848
|
+
of bare ``Response A``. Plain-member runs pass ``None``.
|
|
849
|
+
"""
|
|
850
|
+
if not _peer_review_active(ai_cfg, args):
|
|
851
|
+
return None
|
|
852
|
+
result = run_peer_review(
|
|
853
|
+
members, responses,
|
|
854
|
+
budget=budget, table=table, project=project,
|
|
855
|
+
original_ask=args.original_ask,
|
|
856
|
+
max_tokens=question.max_tokens,
|
|
857
|
+
persona_labels=persona_labels,
|
|
858
|
+
)
|
|
859
|
+
if not result.responses:
|
|
860
|
+
return None
|
|
861
|
+
return result
|
|
862
|
+
|
|
863
|
+
|
|
864
|
+
def _serialise_peer_review(peer_review: PeerReviewResult) -> dict[str, Any]:
|
|
865
|
+
"""Project PeerReviewResult onto a JSON-safe dict for session payloads."""
|
|
866
|
+
return {
|
|
867
|
+
"responses": _serialise_responses(peer_review.responses),
|
|
868
|
+
"label_to_source": dict(peer_review.label_to_source),
|
|
869
|
+
"persona_labels": dict(peer_review.persona_labels),
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
def _deserialise_peer_review(
|
|
874
|
+
data: dict[str, Any] | None,
|
|
875
|
+
) -> PeerReviewResult | None:
|
|
876
|
+
"""Reconstruct a PeerReviewResult from a session payload section.
|
|
877
|
+
|
|
878
|
+
Returns ``None`` for payloads predating Phase 5 or runs where the
|
|
879
|
+
flag was not passed.
|
|
880
|
+
"""
|
|
881
|
+
if not data:
|
|
882
|
+
return None
|
|
883
|
+
return PeerReviewResult(
|
|
884
|
+
responses=_deserialise_responses(data.get("responses") or []),
|
|
885
|
+
label_to_source=dict(data.get("label_to_source") or {}),
|
|
886
|
+
persona_labels=dict(data.get("persona_labels") or {}),
|
|
887
|
+
)
|
|
888
|
+
|
|
889
|
+
|
|
214
890
|
# ── subcommands ─────────────────────────────────────────────────────
|
|
215
891
|
|
|
216
892
|
|
|
@@ -273,29 +949,121 @@ def cmd_estimate(
|
|
|
273
949
|
"""Print per-member cost preview. No API calls."""
|
|
274
950
|
if settings is None:
|
|
275
951
|
settings = load_settings()
|
|
952
|
+
ai_cfg = (settings.get("ai_council") or {}) if isinstance(settings, dict) else {}
|
|
953
|
+
advisor_plans = _build_advisor_plans(ai_cfg, REPO_ROOT)
|
|
954
|
+
explicit_overrides = _parse_model_overrides(getattr(args, "model", None))
|
|
955
|
+
skipped: list[dict[str, Any]] = []
|
|
276
956
|
if members is None:
|
|
277
957
|
members = build_members(
|
|
278
958
|
settings,
|
|
279
959
|
invocation_mode=args.mode_override,
|
|
280
|
-
model_overrides=
|
|
960
|
+
model_overrides=_advisor_model_overrides(
|
|
961
|
+
advisor_plans, explicit_overrides,
|
|
962
|
+
),
|
|
281
963
|
siblings_overrides=_parse_siblings_overrides(getattr(args, "siblings", None)),
|
|
964
|
+
skipped=skipped,
|
|
282
965
|
)
|
|
283
966
|
if table is None:
|
|
284
967
|
table = load_prices()
|
|
285
|
-
ai_cfg = (settings.get("ai_council") or {}) if isinstance(settings, dict) else {}
|
|
286
968
|
question, _ = build_question(
|
|
287
969
|
input_path=Path(args.question), input_mode=args.input_mode,
|
|
288
970
|
max_tokens=_resolve_max_tokens(args, ai_cfg),
|
|
971
|
+
prompt_mode_override=getattr(args, "prompt_mode", None),
|
|
289
972
|
)
|
|
290
973
|
project = detect_project_context(REPO_ROOT)
|
|
291
974
|
billable = [m for m in members if getattr(m, "billable", True)]
|
|
292
975
|
estimates = estimate(question, billable, table,
|
|
293
|
-
project=project, original_ask=args.original_ask
|
|
976
|
+
project=project, original_ask=args.original_ask,
|
|
977
|
+
advisor_plans=advisor_plans)
|
|
978
|
+
if getattr(args, "debate", False):
|
|
979
|
+
return _emit_debate_estimate(
|
|
980
|
+
args, ai_cfg, members, billable, estimates, advisor_plans,
|
|
981
|
+
skipped=skipped,
|
|
982
|
+
)
|
|
983
|
+
extra_calls, extra_usd = _consensus_cost_delta(
|
|
984
|
+
ai_cfg, question.mode, estimates, len(billable),
|
|
985
|
+
)
|
|
986
|
+
pr_extra_calls, pr_extra_usd = _peer_review_cost_delta(
|
|
987
|
+
ai_cfg, args, estimates, len(billable),
|
|
988
|
+
)
|
|
294
989
|
sys.stdout.write(
|
|
295
990
|
f"council:estimate · mode={question.mode} · members={len(members)} "
|
|
296
991
|
f"(billable={len(billable)})\n"
|
|
297
992
|
)
|
|
298
|
-
|
|
993
|
+
advisor_summary = _format_advisor_summary(advisor_plans, billable)
|
|
994
|
+
if advisor_summary:
|
|
995
|
+
sys.stdout.write(advisor_summary + "\n")
|
|
996
|
+
if skipped:
|
|
997
|
+
sys.stdout.write(format_install_hints(skipped) + "\n")
|
|
998
|
+
sys.stdout.write(
|
|
999
|
+
format_estimate_table(
|
|
1000
|
+
billable, estimates,
|
|
1001
|
+
consensus_delta_usd=extra_usd,
|
|
1002
|
+
consensus_extra_calls=extra_calls,
|
|
1003
|
+
peer_review_delta_usd=pr_extra_usd,
|
|
1004
|
+
peer_review_extra_calls=pr_extra_calls,
|
|
1005
|
+
) + "\n"
|
|
1006
|
+
)
|
|
1007
|
+
return 0
|
|
1008
|
+
|
|
1009
|
+
|
|
1010
|
+
def _emit_debate_estimate(
|
|
1011
|
+
args: argparse.Namespace,
|
|
1012
|
+
ai_cfg: dict[str, Any],
|
|
1013
|
+
members: list[ExternalAIClient],
|
|
1014
|
+
billable: list[ExternalAIClient],
|
|
1015
|
+
estimates: list[Any],
|
|
1016
|
+
advisor_plans: Any,
|
|
1017
|
+
*,
|
|
1018
|
+
skipped: list[dict[str, Any]] | None = None,
|
|
1019
|
+
) -> int:
|
|
1020
|
+
"""Render the round-by-round debate cost projection.
|
|
1021
|
+
|
|
1022
|
+
Upper bound only — progressive disclosure may stop the debate early.
|
|
1023
|
+
Cost shape mirrors ``cmd_debate``: one call per billable member per
|
|
1024
|
+
round, default ``ai_council.min_rounds`` (typically 2), capped at
|
|
1025
|
+
``ai_council.debate_max_rounds`` (typically 4).
|
|
1026
|
+
"""
|
|
1027
|
+
min_rounds = int(ai_cfg.get("min_rounds", 2))
|
|
1028
|
+
max_rounds_cap = int(ai_cfg.get("debate_max_rounds", 4))
|
|
1029
|
+
requested = (
|
|
1030
|
+
int(args.rounds) if getattr(args, "rounds", None) is not None
|
|
1031
|
+
else min_rounds
|
|
1032
|
+
)
|
|
1033
|
+
if requested < 1:
|
|
1034
|
+
raise argparse.ArgumentTypeError(
|
|
1035
|
+
f"--rounds must be >= 1 (got {requested})"
|
|
1036
|
+
)
|
|
1037
|
+
if requested > max_rounds_cap:
|
|
1038
|
+
raise argparse.ArgumentTypeError(
|
|
1039
|
+
f"--rounds={requested} exceeds debate_max_rounds={max_rounds_cap}; "
|
|
1040
|
+
f"raise the cap in agents/.ai-council.yml or lower --rounds."
|
|
1041
|
+
)
|
|
1042
|
+
rounds = requested
|
|
1043
|
+
per_round_usd = sum(e.total_usd for e in estimates)
|
|
1044
|
+
projected_total = per_round_usd * rounds
|
|
1045
|
+
sys.stdout.write(
|
|
1046
|
+
f"council:estimate · mode=debate · members={len(members)} "
|
|
1047
|
+
f"(billable={len(billable)}) · rounds={rounds} "
|
|
1048
|
+
f"(cap={max_rounds_cap})\n"
|
|
1049
|
+
)
|
|
1050
|
+
advisor_summary = _format_advisor_summary(advisor_plans, billable)
|
|
1051
|
+
if advisor_summary:
|
|
1052
|
+
sys.stdout.write(advisor_summary + "\n")
|
|
1053
|
+
if skipped:
|
|
1054
|
+
sys.stdout.write(format_install_hints(skipped) + "\n")
|
|
1055
|
+
for round_idx in range(1, rounds + 1):
|
|
1056
|
+
sys.stdout.write(f"\nRound {round_idx} of {rounds}:\n")
|
|
1057
|
+
sys.stdout.write(format_estimate_table(billable, estimates) + "\n")
|
|
1058
|
+
if round_idx < rounds:
|
|
1059
|
+
sys.stdout.write(" " + "─" * 40 + "\n")
|
|
1060
|
+
sys.stdout.write(
|
|
1061
|
+
f"\n PROJECTED TOTAL ({rounds} rounds): ${projected_total:.4f}\n"
|
|
1062
|
+
)
|
|
1063
|
+
sys.stdout.write(
|
|
1064
|
+
" Note: progressive disclosure may stop the debate early; "
|
|
1065
|
+
"this is an upper bound.\n"
|
|
1066
|
+
)
|
|
299
1067
|
return 0
|
|
300
1068
|
|
|
301
1069
|
|
|
@@ -325,6 +1093,412 @@ def _deserialise_responses(items: list[dict[str, Any]]) -> list[CouncilResponse]
|
|
|
325
1093
|
return out
|
|
326
1094
|
|
|
327
1095
|
|
|
1096
|
+
def _deserialise_consensus(data: dict[str, Any] | None) -> ConsensusResult | None:
|
|
1097
|
+
"""Reconstruct a ConsensusResult from a serialised payload section.
|
|
1098
|
+
|
|
1099
|
+
Used by ``cmd_render`` to re-render saved sessions that captured a
|
|
1100
|
+
consensus round. Returns ``None`` when the payload predates Phase 4
|
|
1101
|
+
or the round was skipped for the lens.
|
|
1102
|
+
"""
|
|
1103
|
+
if not data:
|
|
1104
|
+
return None
|
|
1105
|
+
from scripts.ai_council.consensus import (
|
|
1106
|
+
ConsensusMetadata, Finding, FindingScore,
|
|
1107
|
+
aggregate_scores, bucket_by_threshold,
|
|
1108
|
+
)
|
|
1109
|
+
findings = [
|
|
1110
|
+
Finding(id=f["id"], source=f["source"], text=f["text"])
|
|
1111
|
+
for f in (data.get("findings") or [])
|
|
1112
|
+
]
|
|
1113
|
+
scores = [
|
|
1114
|
+
FindingScore(
|
|
1115
|
+
finding_id=s["finding_id"], scorer=s["scorer"],
|
|
1116
|
+
score=int(s["score"]), agree=bool(s["agree"]),
|
|
1117
|
+
reason=s.get("reason", ""),
|
|
1118
|
+
)
|
|
1119
|
+
for s in (data.get("scores") or [])
|
|
1120
|
+
]
|
|
1121
|
+
metadata = aggregate_scores(findings, scores)
|
|
1122
|
+
bucket = bucket_by_threshold(findings, metadata)
|
|
1123
|
+
return ConsensusResult(
|
|
1124
|
+
bucket=bucket, findings=findings, scores=scores, metadata=metadata,
|
|
1125
|
+
extraction_responses=_deserialise_responses(
|
|
1126
|
+
data.get("extraction_responses") or [],
|
|
1127
|
+
),
|
|
1128
|
+
scoring_responses=_deserialise_responses(
|
|
1129
|
+
data.get("scoring_responses") or [],
|
|
1130
|
+
),
|
|
1131
|
+
)
|
|
1132
|
+
|
|
1133
|
+
|
|
1134
|
+
def _resolve_necessity_mode(
|
|
1135
|
+
ai_cfg: dict[str, Any],
|
|
1136
|
+
lens: str,
|
|
1137
|
+
invocation: str = "agent",
|
|
1138
|
+
) -> tuple[bool, str]:
|
|
1139
|
+
"""Return ``(enabled, effective_mode)`` for the necessity classifier.
|
|
1140
|
+
|
|
1141
|
+
Two-tier resolution (step-8 D2):
|
|
1142
|
+
|
|
1143
|
+
- ``invocation="agent"`` → reads ``necessity_classifier.mode`` with
|
|
1144
|
+
per-lens override at ``lenses.<lens>.necessity_classifier.mode``
|
|
1145
|
+
(default ``educate``).
|
|
1146
|
+
- ``invocation="user_explicit"`` → reads
|
|
1147
|
+
``necessity_classifier.user_explicit_mode`` with per-lens override
|
|
1148
|
+
at ``lenses.<lens>.necessity_classifier.user_explicit_mode``
|
|
1149
|
+
(default ``warn-only``).
|
|
1150
|
+
|
|
1151
|
+
Reads the synthesized dict shape produced by
|
|
1152
|
+
:func:`_synthesize_ai_council_block`, so both typed-config and
|
|
1153
|
+
legacy-settings paths are honoured.
|
|
1154
|
+
"""
|
|
1155
|
+
nc_block = ai_cfg.get("necessity_classifier") or {}
|
|
1156
|
+
enabled = bool(nc_block.get("enabled", True))
|
|
1157
|
+
lens_overrides = ai_cfg.get("lens_overrides") or {}
|
|
1158
|
+
if invocation == "user_explicit":
|
|
1159
|
+
global_mode = str(nc_block.get("user_explicit_mode", "warn-only"))
|
|
1160
|
+
overrides = (
|
|
1161
|
+
lens_overrides.get("necessity_classifier_user_explicit_mode") or {}
|
|
1162
|
+
)
|
|
1163
|
+
else:
|
|
1164
|
+
global_mode = str(nc_block.get("mode", "educate"))
|
|
1165
|
+
overrides = lens_overrides.get("necessity_classifier_mode") or {}
|
|
1166
|
+
return enabled, str(overrides.get(lens, global_mode))
|
|
1167
|
+
|
|
1168
|
+
|
|
1169
|
+
def _provider_caps_snapshot(ai_cfg: dict[str, Any]) -> dict[str, dict[str, str]]:
|
|
1170
|
+
"""Return ``{provider: {mode, model}}`` for enabled members.
|
|
1171
|
+
|
|
1172
|
+
Step-8 D3 events-log snapshot. Captures only public capability
|
|
1173
|
+
metadata (no API keys, no prompt content) so the log line stays
|
|
1174
|
+
within the privacy floor. Disabled members are excluded.
|
|
1175
|
+
"""
|
|
1176
|
+
members = ai_cfg.get("members") or {}
|
|
1177
|
+
snapshot: dict[str, dict[str, str]] = {}
|
|
1178
|
+
if not isinstance(members, dict):
|
|
1179
|
+
return snapshot
|
|
1180
|
+
for name, cfg in members.items():
|
|
1181
|
+
if not isinstance(cfg, dict) or not cfg.get("enabled", True):
|
|
1182
|
+
continue
|
|
1183
|
+
snapshot[str(name)] = {
|
|
1184
|
+
"mode": str(cfg.get("mode", "")),
|
|
1185
|
+
"model": str(cfg.get("model", "")),
|
|
1186
|
+
}
|
|
1187
|
+
return snapshot
|
|
1188
|
+
|
|
1189
|
+
|
|
1190
|
+
def _necessity_gate(
|
|
1191
|
+
*, prompt: str, lens: str, invocation: str, proceed_anyway: bool,
|
|
1192
|
+
ai_cfg: dict[str, Any], stdout=None, original_ask: str = "",
|
|
1193
|
+
) -> tuple[bool, int, ClassificationResult | None]:
|
|
1194
|
+
"""Apply the Phase-6 necessity classifier before any member fires.
|
|
1195
|
+
|
|
1196
|
+
Returns ``(proceed, exit_code, result)``. ``proceed=True`` means the
|
|
1197
|
+
dispatcher continues; ``proceed=False`` means the caller should
|
|
1198
|
+
return ``exit_code`` immediately. ``result`` carries the verdict for
|
|
1199
|
+
session.md provenance on the proceed path (None when classifier is
|
|
1200
|
+
disabled / off).
|
|
1201
|
+
|
|
1202
|
+
Step-8 D3: every non-disabled branch emits one
|
|
1203
|
+
:func:`append_event` line. ``original_ask`` is forwarded to the
|
|
1204
|
+
events log so the sha256[:12] hash anchors the line to the
|
|
1205
|
+
user-side question without leaking content. When the caller does
|
|
1206
|
+
not have an ``original_ask`` value, the prompt itself is hashed
|
|
1207
|
+
(legacy CLIs route through this path).
|
|
1208
|
+
"""
|
|
1209
|
+
out = stdout if stdout is not None else sys.stdout
|
|
1210
|
+
enabled, mode = _resolve_necessity_mode(ai_cfg, lens, invocation=invocation)
|
|
1211
|
+
if not enabled or mode == "off":
|
|
1212
|
+
return True, 0, None
|
|
1213
|
+
result = classify_necessity(prompt, lens=lens, invocation=invocation)
|
|
1214
|
+
caps = _provider_caps_snapshot(ai_cfg)
|
|
1215
|
+
hashed = original_ask or prompt
|
|
1216
|
+
|
|
1217
|
+
def _emit(action: str) -> None:
|
|
1218
|
+
append_event({
|
|
1219
|
+
"lens": lens, "invocation": invocation,
|
|
1220
|
+
"action": action, "verdict": result.verdict,
|
|
1221
|
+
"category": result.category,
|
|
1222
|
+
"mode": mode, "provider_caps": caps,
|
|
1223
|
+
"original_ask": hashed,
|
|
1224
|
+
})
|
|
1225
|
+
|
|
1226
|
+
if result.verdict != "unnecessary":
|
|
1227
|
+
if result.verdict == "borderline":
|
|
1228
|
+
out.write(
|
|
1229
|
+
f"council:necessity · borderline ({result.category}) · "
|
|
1230
|
+
f"{result.rationale}\n"
|
|
1231
|
+
)
|
|
1232
|
+
_emit("proceed")
|
|
1233
|
+
return True, 0, result
|
|
1234
|
+
# verdict == "unnecessary"
|
|
1235
|
+
if mode == "warn-only":
|
|
1236
|
+
# Annotated but never skips (step-8 D2). Applies to both
|
|
1237
|
+
# invocation tiers when the mode resolves to warn-only.
|
|
1238
|
+
out.write(
|
|
1239
|
+
f"council:necessity · warn-only ({result.category}) · "
|
|
1240
|
+
f"{result.rationale}\n"
|
|
1241
|
+
)
|
|
1242
|
+
_emit("proceed")
|
|
1243
|
+
return True, 0, result
|
|
1244
|
+
if mode == "block":
|
|
1245
|
+
out.write(
|
|
1246
|
+
f"council:necessity · skipped ({result.category}) · "
|
|
1247
|
+
f"{result.rationale}\n"
|
|
1248
|
+
f"council:necessity · mode=block — `--proceed-anyway` has "
|
|
1249
|
+
f"no effect on the block path.\n"
|
|
1250
|
+
)
|
|
1251
|
+
_emit("skip_necessity")
|
|
1252
|
+
return False, 0, result
|
|
1253
|
+
# mode == "educate"
|
|
1254
|
+
if invocation == "agent":
|
|
1255
|
+
out.write(
|
|
1256
|
+
f"council:necessity · skipped (agent, {result.category}) · "
|
|
1257
|
+
f"{result.rationale}\n"
|
|
1258
|
+
)
|
|
1259
|
+
_emit("skip_necessity")
|
|
1260
|
+
return False, 0, result
|
|
1261
|
+
# invocation == "user_explicit"
|
|
1262
|
+
if proceed_anyway:
|
|
1263
|
+
out.write(
|
|
1264
|
+
f"council:necessity · override (user_explicit + "
|
|
1265
|
+
f"--proceed-anyway, {result.category}) · "
|
|
1266
|
+
f"{result.rationale}\n"
|
|
1267
|
+
)
|
|
1268
|
+
_emit("proceed")
|
|
1269
|
+
return True, 0, result
|
|
1270
|
+
out.write(educate_message(result, lens) + "\n")
|
|
1271
|
+
_emit("skip_necessity")
|
|
1272
|
+
return False, 2, result
|
|
1273
|
+
|
|
1274
|
+
|
|
1275
|
+
def _resolve_model_downgrade(
|
|
1276
|
+
ai_cfg: dict[str, Any], lens: str,
|
|
1277
|
+
) -> tuple[bool, bool]:
|
|
1278
|
+
"""Return ``(enabled, auto_apply)`` for the size-fit downgrade gate.
|
|
1279
|
+
|
|
1280
|
+
Per-lens override at ``lenses.<lens>.model_downgrade`` wins over the
|
|
1281
|
+
global ``model_downgrade`` block. Reads the synthesized dict shape
|
|
1282
|
+
from :func:`_synthesize_ai_council_block` so both typed-config and
|
|
1283
|
+
legacy paths are honoured.
|
|
1284
|
+
"""
|
|
1285
|
+
md_block = ai_cfg.get("model_downgrade") or {}
|
|
1286
|
+
enabled = bool(md_block.get("enabled", True))
|
|
1287
|
+
auto_apply = bool(md_block.get("auto_apply", False))
|
|
1288
|
+
overrides = (
|
|
1289
|
+
(ai_cfg.get("lens_overrides") or {}).get("model_downgrade") or {}
|
|
1290
|
+
)
|
|
1291
|
+
lens_override = overrides.get(lens) if isinstance(overrides, dict) else None
|
|
1292
|
+
if isinstance(lens_override, dict):
|
|
1293
|
+
enabled = bool(lens_override.get("enabled", enabled))
|
|
1294
|
+
auto_apply = bool(lens_override.get("auto_apply", auto_apply))
|
|
1295
|
+
return enabled, auto_apply
|
|
1296
|
+
|
|
1297
|
+
|
|
1298
|
+
def _size_fit_gate(
|
|
1299
|
+
*, prompt: str, lens: str, members: list[ExternalAIClient],
|
|
1300
|
+
ai_cfg: dict[str, Any], stdout=None,
|
|
1301
|
+
) -> list[tuple[str, SizeFitVerdict, bool]]:
|
|
1302
|
+
"""Apply the Phase-7 size-fit classifier across enabled members.
|
|
1303
|
+
|
|
1304
|
+
Iterates every member with a configured ``model_ladder`` and runs
|
|
1305
|
+
:func:`classify_size_fit`. When ``auto_apply`` is true and a
|
|
1306
|
+
downgrade is suggested, the member's ``model`` attribute is rewritten
|
|
1307
|
+
in place; otherwise the suggestion is surfaced as a stdout notice
|
|
1308
|
+
and the original model stands. Members without a ladder are skipped
|
|
1309
|
+
silently.
|
|
1310
|
+
|
|
1311
|
+
Returns a list of ``(member_name, verdict, applied)`` tuples for
|
|
1312
|
+
session.md provenance. Never blocks the dispatch — Phase 7 is a
|
|
1313
|
+
suggestion gate, not a refusal gate.
|
|
1314
|
+
"""
|
|
1315
|
+
out = stdout if stdout is not None else sys.stdout
|
|
1316
|
+
enabled, auto_apply = _resolve_model_downgrade(ai_cfg, lens)
|
|
1317
|
+
decisions: list[tuple[str, SizeFitVerdict, bool]] = []
|
|
1318
|
+
if not enabled:
|
|
1319
|
+
return decisions
|
|
1320
|
+
members_cfg = ai_cfg.get("members") or {}
|
|
1321
|
+
for member in members:
|
|
1322
|
+
member_cfg = members_cfg.get(member.name) or {}
|
|
1323
|
+
ladder = member_cfg.get("model_ladder") or ()
|
|
1324
|
+
if not ladder:
|
|
1325
|
+
continue
|
|
1326
|
+
verdict = classify_size_fit(
|
|
1327
|
+
prompt, current_model=member.model, ladder=ladder, lens=lens,
|
|
1328
|
+
)
|
|
1329
|
+
applied = False
|
|
1330
|
+
if not verdict.fit and verdict.suggested_model:
|
|
1331
|
+
if auto_apply:
|
|
1332
|
+
out.write(
|
|
1333
|
+
f"council:size-fit · {member.name} · auto-downgrade "
|
|
1334
|
+
f"`{member.model}` → `{verdict.suggested_model}` · "
|
|
1335
|
+
f"{verdict.reason}\n"
|
|
1336
|
+
)
|
|
1337
|
+
member.model = verdict.suggested_model
|
|
1338
|
+
applied = True
|
|
1339
|
+
else:
|
|
1340
|
+
out.write(
|
|
1341
|
+
f"council:size-fit · {member.name} · "
|
|
1342
|
+
f"{downgrade_message(verdict, member.model)}\n"
|
|
1343
|
+
)
|
|
1344
|
+
decisions.append((member.name, verdict, applied))
|
|
1345
|
+
return decisions
|
|
1346
|
+
|
|
1347
|
+
|
|
1348
|
+
def _resolve_cost_disclosure(
|
|
1349
|
+
ai_cfg: dict[str, Any], lens: str,
|
|
1350
|
+
) -> tuple[str, float, bool]:
|
|
1351
|
+
"""Return ``(mode, threshold_usd, show_per_member)`` for the lens.
|
|
1352
|
+
|
|
1353
|
+
Per-lens override at ``lenses.<lens>.cost_disclosure`` wins over the
|
|
1354
|
+
global ``debate.cost_disclosure`` block. The ``debate`` lens gets
|
|
1355
|
+
the debate-scoped defaults; other lenses default to ``off`` unless
|
|
1356
|
+
explicitly overridden (Phase 8 step 5 \u2014 cheap lenses are opt-in).
|
|
1357
|
+
"""
|
|
1358
|
+
debate_block = ai_cfg.get("debate") or {}
|
|
1359
|
+
debate_disc = debate_block.get("cost_disclosure") or {}
|
|
1360
|
+
if lens == "debate":
|
|
1361
|
+
mode = str(debate_disc.get("mode", "always"))
|
|
1362
|
+
threshold = float(debate_disc.get("threshold_usd", 1.00))
|
|
1363
|
+
show_per_member = bool(debate_disc.get("show_per_member", True))
|
|
1364
|
+
else:
|
|
1365
|
+
mode = "off"
|
|
1366
|
+
threshold = 1.00
|
|
1367
|
+
show_per_member = True
|
|
1368
|
+
overrides = (
|
|
1369
|
+
(ai_cfg.get("lens_overrides") or {}).get("cost_disclosure") or {}
|
|
1370
|
+
)
|
|
1371
|
+
lens_override = overrides.get(lens) if isinstance(overrides, dict) else None
|
|
1372
|
+
if isinstance(lens_override, dict):
|
|
1373
|
+
mode = str(lens_override.get("mode", mode))
|
|
1374
|
+
threshold = float(lens_override.get("threshold_usd", threshold))
|
|
1375
|
+
show_per_member = bool(lens_override.get("show_per_member", show_per_member))
|
|
1376
|
+
return mode, threshold, show_per_member
|
|
1377
|
+
|
|
1378
|
+
|
|
1379
|
+
def _format_cost_disclosure(
|
|
1380
|
+
est: DebateCostEstimate, *, lens: str, show_per_member: bool,
|
|
1381
|
+
) -> str:
|
|
1382
|
+
"""Render the pre-flight disclosure block for stdout.
|
|
1383
|
+
|
|
1384
|
+
Mirrors the roadmap spec: total range across N members \u00d7 R rounds,
|
|
1385
|
+
optional per-member breakdown, and a subscription-member call-out
|
|
1386
|
+
for CLI / manual transports that don't sum into USD totals.
|
|
1387
|
+
"""
|
|
1388
|
+
lines = [
|
|
1389
|
+
f"council:{lens} \u00b7 cost-disclosure \u00b7 estimated "
|
|
1390
|
+
f"${est.low_usd:.4f} \u2013 ${est.high_usd:.4f} "
|
|
1391
|
+
f"(expected ${est.expected_usd:.4f}) across "
|
|
1392
|
+
f"{len(est.per_member)} billable members \u00d7 {est.rounds} rounds",
|
|
1393
|
+
]
|
|
1394
|
+
if show_per_member and est.per_member:
|
|
1395
|
+
lines.append(" per member:")
|
|
1396
|
+
for pm in est.per_member:
|
|
1397
|
+
lines.append(
|
|
1398
|
+
f" \u00b7 {pm['name']:<14} {pm['model']:<22} "
|
|
1399
|
+
f"${pm['low_usd']:.4f} \u2013 ${pm['high_usd']:.4f}",
|
|
1400
|
+
)
|
|
1401
|
+
if est.subscription_members:
|
|
1402
|
+
lines.append(" subscription (no USD spend):")
|
|
1403
|
+
for sm in est.subscription_members:
|
|
1404
|
+
label = sm.get("subscription_label") or sm.get("transport", "")
|
|
1405
|
+
lines.append(
|
|
1406
|
+
f" \u00b7 {sm['name']:<14} {sm['model']:<22} ({label})",
|
|
1407
|
+
)
|
|
1408
|
+
return "\n".join(lines) + "\n"
|
|
1409
|
+
|
|
1410
|
+
|
|
1411
|
+
def _debate_refusal_cap(
|
|
1412
|
+
ai_cfg: dict[str, Any],
|
|
1413
|
+
) -> float:
|
|
1414
|
+
"""Resolve the hard refusal cap (``debate.max_cost_usd``).
|
|
1415
|
+
|
|
1416
|
+
Returns 0.0 when disabled. The cap is unconditional \u2014 no
|
|
1417
|
+
``--proceed-anyway`` override (the user must lower rounds, drop
|
|
1418
|
+
members, or raise the cap explicitly).
|
|
1419
|
+
"""
|
|
1420
|
+
debate_block = ai_cfg.get("debate") or {}
|
|
1421
|
+
return float(debate_block.get("max_cost_usd", 5.00) or 0.0)
|
|
1422
|
+
|
|
1423
|
+
|
|
1424
|
+
def _emit_shadow_slo_banner() -> None:
|
|
1425
|
+
"""Pre-flight SLO banner for solo-dispatch invocations (step-9 P10).
|
|
1426
|
+
|
|
1427
|
+
Reads ``agents/council-shadow-log.jsonl`` and prints the 7-day rolling
|
|
1428
|
+
disagreement rate. ``OK``, ``WARN``, ``BREACH`` are all surfaced so the
|
|
1429
|
+
user can see when single-member quality is drifting. Never auto-flips
|
|
1430
|
+
back to full council \u2014 visibility-first, action-second (D10).
|
|
1431
|
+
"""
|
|
1432
|
+
try:
|
|
1433
|
+
from scripts.ai_council import shadow_dispatch as _sd
|
|
1434
|
+
rate, n = _sd.compute_disagreement_rate(_sd.SHADOW_LOG_PATH)
|
|
1435
|
+
if n == 0:
|
|
1436
|
+
return
|
|
1437
|
+
sys.stdout.write(_sd.slo_banner(rate, n) + "\n")
|
|
1438
|
+
except Exception: # noqa: BLE001 \u2014 banner must never break dispatch.
|
|
1439
|
+
return
|
|
1440
|
+
|
|
1441
|
+
|
|
1442
|
+
def _apply_solo_dispatch(
|
|
1443
|
+
members: list[ExternalAIClient],
|
|
1444
|
+
) -> tuple[list[ExternalAIClient], str | None]:
|
|
1445
|
+
"""Filter ``members`` to a single solo-dispatch pick (step-9 P9).
|
|
1446
|
+
|
|
1447
|
+
Loads the routing chain from ``agents/.ai-council.yml`` and asks
|
|
1448
|
+
:func:`select_solo_member` for the first chain entry whose member
|
|
1449
|
+
is runtime-present. The probe is conservative: a member counts as
|
|
1450
|
+
auth-valid iff ``build_members`` returned a runtime client for it
|
|
1451
|
+
\u2014 build_members has already filtered out missing binaries / bad
|
|
1452
|
+
keys via the ``skipped`` list. Deep CLI auth probes (e.g.
|
|
1453
|
+
``claude auth status``) are reserved for the shadow-mode path.
|
|
1454
|
+
|
|
1455
|
+
Returns ``(filtered_members, marker)``. ``marker`` is a one-line
|
|
1456
|
+
info banner the caller prints to stdout (``None`` when no banner
|
|
1457
|
+
is needed, e.g. config missing). Returns the unfiltered list when
|
|
1458
|
+
no solo member can be picked \u2014 caller never fails the decision.
|
|
1459
|
+
"""
|
|
1460
|
+
try:
|
|
1461
|
+
cfg = load_council_config(AI_COUNCIL_FILE)
|
|
1462
|
+
except (CouncilConfigError, FileNotFoundError):
|
|
1463
|
+
return members, None
|
|
1464
|
+
if not cfg.routing.solo_member_fallback_chain:
|
|
1465
|
+
return (
|
|
1466
|
+
members,
|
|
1467
|
+
"council:solo \u00b7 WARN \u00b7 --single requested but "
|
|
1468
|
+
"routing.solo_member_fallback_chain is empty \u2014 "
|
|
1469
|
+
"escalating to full council.",
|
|
1470
|
+
)
|
|
1471
|
+
runtime_names = {getattr(m, "name", "") for m in members}
|
|
1472
|
+
pick = select_solo_member(
|
|
1473
|
+
cfg.routing,
|
|
1474
|
+
cfg.members,
|
|
1475
|
+
auth_cache=AuthCache(),
|
|
1476
|
+
probe=lambda name, _t: name in runtime_names,
|
|
1477
|
+
)
|
|
1478
|
+
if pick is None:
|
|
1479
|
+
return (
|
|
1480
|
+
members,
|
|
1481
|
+
"council:solo \u00b7 WARN \u00b7 solo dispatch unavailable "
|
|
1482
|
+
"(no chain member runtime-present) \u2014 escalating to "
|
|
1483
|
+
"full council.",
|
|
1484
|
+
)
|
|
1485
|
+
filtered = [m for m in members if getattr(m, "name", "") == pick]
|
|
1486
|
+
if not filtered:
|
|
1487
|
+
# Defensive: ``pick`` came from runtime_names so this should
|
|
1488
|
+
# be unreachable. If we ever get here, escalate rather than
|
|
1489
|
+
# ship an empty council.
|
|
1490
|
+
return (
|
|
1491
|
+
members,
|
|
1492
|
+
"council:solo \u00b7 WARN \u00b7 selected member vanished "
|
|
1493
|
+
"between probe and filter \u2014 escalating to full council.",
|
|
1494
|
+
)
|
|
1495
|
+
return (
|
|
1496
|
+
filtered,
|
|
1497
|
+
f"council:solo \u00b7 dispatching to {pick} only "
|
|
1498
|
+
f"(routing.solo_member_fallback_chain).",
|
|
1499
|
+
)
|
|
1500
|
+
|
|
1501
|
+
|
|
328
1502
|
def cmd_run(
|
|
329
1503
|
args: argparse.Namespace,
|
|
330
1504
|
*,
|
|
@@ -335,29 +1509,114 @@ def cmd_run(
|
|
|
335
1509
|
"""Estimate, then run the council. Requires --confirm to spend."""
|
|
336
1510
|
if settings is None:
|
|
337
1511
|
settings = load_settings()
|
|
1512
|
+
ai_cfg = (settings.get("ai_council") or {}) if isinstance(settings, dict) else {}
|
|
1513
|
+
advisor_plans = _build_advisor_plans(ai_cfg, REPO_ROOT)
|
|
1514
|
+
explicit_overrides = _parse_model_overrides(getattr(args, "model", None))
|
|
1515
|
+
skipped: list[dict[str, Any]] = []
|
|
338
1516
|
if members is None:
|
|
339
1517
|
members = build_members(
|
|
340
1518
|
settings,
|
|
341
1519
|
invocation_mode=args.mode_override,
|
|
342
|
-
model_overrides=
|
|
1520
|
+
model_overrides=_advisor_model_overrides(
|
|
1521
|
+
advisor_plans, explicit_overrides,
|
|
1522
|
+
),
|
|
343
1523
|
siblings_overrides=_parse_siblings_overrides(getattr(args, "siblings", None)),
|
|
1524
|
+
skipped=skipped,
|
|
344
1525
|
)
|
|
1526
|
+
if getattr(args, "single", False):
|
|
1527
|
+
members, solo_banner = _apply_solo_dispatch(members)
|
|
1528
|
+
if solo_banner:
|
|
1529
|
+
sys.stdout.write(solo_banner + "\n")
|
|
1530
|
+
_emit_shadow_slo_banner()
|
|
345
1531
|
if table is None:
|
|
346
1532
|
table = load_prices()
|
|
347
|
-
ai_cfg = (settings.get("ai_council") or {}) if isinstance(settings, dict) else {}
|
|
348
1533
|
question, artefact = build_question(
|
|
349
1534
|
input_path=Path(args.question), input_mode=args.input_mode,
|
|
350
1535
|
max_tokens=_resolve_max_tokens(args, ai_cfg),
|
|
1536
|
+
prompt_mode_override=getattr(args, "prompt_mode", None),
|
|
1537
|
+
)
|
|
1538
|
+
proceed, gate_exit, _necessity_result = _necessity_gate(
|
|
1539
|
+
prompt=question.user_prompt,
|
|
1540
|
+
lens=question.mode,
|
|
1541
|
+
invocation=getattr(args, "invocation", "agent"),
|
|
1542
|
+
proceed_anyway=getattr(args, "proceed_anyway", False),
|
|
1543
|
+
ai_cfg=ai_cfg,
|
|
1544
|
+
original_ask=getattr(args, "original_ask", "") or "",
|
|
1545
|
+
)
|
|
1546
|
+
if not proceed:
|
|
1547
|
+
return gate_exit
|
|
1548
|
+
_size_fit_gate(
|
|
1549
|
+
prompt=question.user_prompt,
|
|
1550
|
+
lens=question.mode,
|
|
1551
|
+
members=members,
|
|
1552
|
+
ai_cfg=ai_cfg,
|
|
351
1553
|
)
|
|
352
1554
|
project = detect_project_context(REPO_ROOT)
|
|
353
1555
|
billable = [m for m in members if getattr(m, "billable", True)]
|
|
354
1556
|
estimates = estimate(question, billable, table,
|
|
355
|
-
project=project, original_ask=args.original_ask
|
|
1557
|
+
project=project, original_ask=args.original_ask,
|
|
1558
|
+
advisor_plans=advisor_plans)
|
|
1559
|
+
extra_calls, extra_usd = _consensus_cost_delta(
|
|
1560
|
+
ai_cfg, question.mode, estimates, len(billable),
|
|
1561
|
+
)
|
|
1562
|
+
pr_extra_calls, pr_extra_usd = _peer_review_cost_delta(
|
|
1563
|
+
ai_cfg, args, estimates, len(billable),
|
|
1564
|
+
)
|
|
356
1565
|
sys.stdout.write(
|
|
357
1566
|
f"council:run · mode={question.mode} · members={len(members)} "
|
|
358
1567
|
f"(billable={len(billable)})\n"
|
|
359
1568
|
)
|
|
360
|
-
|
|
1569
|
+
advisor_summary = _format_advisor_summary(advisor_plans, billable)
|
|
1570
|
+
if advisor_summary:
|
|
1571
|
+
sys.stdout.write(advisor_summary + "\n")
|
|
1572
|
+
if skipped:
|
|
1573
|
+
sys.stdout.write(format_install_hints(skipped) + "\n")
|
|
1574
|
+
sys.stdout.write(
|
|
1575
|
+
format_estimate_table(
|
|
1576
|
+
billable, estimates,
|
|
1577
|
+
consensus_delta_usd=extra_usd,
|
|
1578
|
+
consensus_extra_calls=extra_calls,
|
|
1579
|
+
peer_review_delta_usd=pr_extra_usd,
|
|
1580
|
+
peer_review_extra_calls=pr_extra_calls,
|
|
1581
|
+
) + "\n"
|
|
1582
|
+
)
|
|
1583
|
+
|
|
1584
|
+
# Step-8 P1 — pre-run quota summary. After estimate / before
|
|
1585
|
+
# dispatch so the user sees the budget shape before --confirm.
|
|
1586
|
+
# Uncapped providers are omitted by ``quota_summary_line``; when
|
|
1587
|
+
# no CLI member has a configured cap the summary is empty and we
|
|
1588
|
+
# write nothing.
|
|
1589
|
+
cli_members = [m for m in members if isinstance(m, CliClient)]
|
|
1590
|
+
summary, warn_providers = quota_summary_line(cli_members)
|
|
1591
|
+
if summary:
|
|
1592
|
+
sys.stdout.write(summary + "\n")
|
|
1593
|
+
for prov in warn_providers:
|
|
1594
|
+
sys.stdout.write(f"council:quota · WARN · {prov} near limit\n")
|
|
1595
|
+
|
|
1596
|
+
# Phase 8 step 5 — opt-in cost disclosure for non-debate lenses.
|
|
1597
|
+
# Default mode is "off" for analysis / default (cheap enough that
|
|
1598
|
+
# the disclosure is friction); users opt in by setting
|
|
1599
|
+
# `lenses.<name>.cost_disclosure.mode` in agents/.ai-council.yml.
|
|
1600
|
+
disc_mode, disc_threshold, disc_show = _resolve_cost_disclosure(
|
|
1601
|
+
ai_cfg, question.mode,
|
|
1602
|
+
)
|
|
1603
|
+
if disc_mode != "off":
|
|
1604
|
+
run_estimate = estimate_debate_cost(
|
|
1605
|
+
question, members, table,
|
|
1606
|
+
rounds=1, project=project,
|
|
1607
|
+
original_ask=args.original_ask,
|
|
1608
|
+
advisor_plans=advisor_plans,
|
|
1609
|
+
)
|
|
1610
|
+
if disc_mode == "always" or (
|
|
1611
|
+
disc_mode == "above_threshold"
|
|
1612
|
+
and run_estimate.expected_usd > disc_threshold
|
|
1613
|
+
):
|
|
1614
|
+
sys.stdout.write(
|
|
1615
|
+
_format_cost_disclosure(
|
|
1616
|
+
run_estimate, lens=question.mode,
|
|
1617
|
+
show_per_member=disc_show,
|
|
1618
|
+
)
|
|
1619
|
+
)
|
|
361
1620
|
|
|
362
1621
|
if not args.confirm:
|
|
363
1622
|
sys.stdout.write(
|
|
@@ -378,10 +1637,28 @@ def cmd_run(
|
|
|
378
1637
|
members, question, budget,
|
|
379
1638
|
table=table, project=project,
|
|
380
1639
|
original_ask=args.original_ask, rounds=rounds,
|
|
1640
|
+
advisor_plans=advisor_plans,
|
|
1641
|
+
)
|
|
1642
|
+
# Pipeline order (R4 verdict): deliberation → peer-review → consensus
|
|
1643
|
+
# → synthesis. Peer-review anonymises only deliberation outputs;
|
|
1644
|
+
# consensus-scoring runs on the de-anonymised findings.
|
|
1645
|
+
persona_labels = build_persona_labels(advisor_plans, billable)
|
|
1646
|
+
peer_review = _maybe_run_peer_review(
|
|
1647
|
+
ai_cfg, args, question, members, responses, budget, table, project,
|
|
1648
|
+
persona_labels=persona_labels,
|
|
1649
|
+
)
|
|
1650
|
+
consensus = _maybe_run_consensus(
|
|
1651
|
+
ai_cfg, question, members, responses, budget, table, project, args,
|
|
381
1652
|
)
|
|
382
1653
|
estimated_total = sum(e.total_usd for e in estimates)
|
|
383
1654
|
actual_total = 0.0
|
|
384
|
-
|
|
1655
|
+
all_responses: list[CouncilResponse] = list(responses)
|
|
1656
|
+
if peer_review is not None:
|
|
1657
|
+
all_responses.extend(peer_review.responses)
|
|
1658
|
+
if consensus is not None:
|
|
1659
|
+
all_responses.extend(consensus.extraction_responses)
|
|
1660
|
+
all_responses.extend(consensus.scoring_responses)
|
|
1661
|
+
for r in all_responses:
|
|
385
1662
|
if r.error:
|
|
386
1663
|
continue
|
|
387
1664
|
ce = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
|
|
@@ -389,6 +1666,9 @@ def cmd_run(
|
|
|
389
1666
|
payload = {
|
|
390
1667
|
"schema_version": SCHEMA_VERSION,
|
|
391
1668
|
"mode": question.mode,
|
|
1669
|
+
"prompt_mode": getattr(args, "prompt_mode", None),
|
|
1670
|
+
"prose_synthesis": getattr(args, "prose_synthesis", None),
|
|
1671
|
+
"peer_review_enabled": _peer_review_active(ai_cfg, args),
|
|
392
1672
|
"artefact": artefact,
|
|
393
1673
|
"original_ask": args.original_ask,
|
|
394
1674
|
"members": [f"{m.name}/{m.model}" for m in members],
|
|
@@ -397,22 +1677,479 @@ def cmd_run(
|
|
|
397
1677
|
"cost_usd_actual": round(actual_total, 6),
|
|
398
1678
|
"responses": _serialise_responses(responses),
|
|
399
1679
|
}
|
|
400
|
-
|
|
1680
|
+
if peer_review is not None:
|
|
1681
|
+
payload["peer_review"] = _serialise_peer_review(peer_review)
|
|
1682
|
+
if consensus is not None:
|
|
1683
|
+
payload["consensus"] = _serialise_consensus(consensus)
|
|
1684
|
+
out_path = _validate_council_output_path(
|
|
1685
|
+
args.output, kind="responses", subcommand="run",
|
|
1686
|
+
)
|
|
401
1687
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
402
1688
|
out_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
|
403
1689
|
sys.stdout.write(
|
|
404
1690
|
f"\ncouncil:run · wrote {out_path} "
|
|
405
1691
|
f"(estimated ${estimated_total:.4f} / actual ${actual_total:.4f})\n"
|
|
406
1692
|
)
|
|
1693
|
+
replay_path = _maybe_write_decision_replay(
|
|
1694
|
+
ai_cfg=ai_cfg, lens=question.mode, out_path=out_path,
|
|
1695
|
+
consensus=consensus, deliberation=responses,
|
|
1696
|
+
original_ask=args.original_ask,
|
|
1697
|
+
)
|
|
1698
|
+
if replay_path is not None:
|
|
1699
|
+
sys.stdout.write(f"council:run · wrote {replay_path}\n")
|
|
407
1700
|
errors = [r for r in responses if r.error]
|
|
408
1701
|
return 1 if errors and len(errors) == len(responses) else 0
|
|
409
1702
|
|
|
410
1703
|
|
|
1704
|
+
def _debate_round_filename(round_number: int) -> str:
|
|
1705
|
+
return f"debate-round-{round_number}.json"
|
|
1706
|
+
|
|
1707
|
+
|
|
1708
|
+
def _write_debate_round(
|
|
1709
|
+
out_dir: Path,
|
|
1710
|
+
round_number: int,
|
|
1711
|
+
responses: list[CouncilResponse],
|
|
1712
|
+
*,
|
|
1713
|
+
question: CouncilQuestion,
|
|
1714
|
+
members: list[ExternalAIClient],
|
|
1715
|
+
artefact: str,
|
|
1716
|
+
original_ask: str,
|
|
1717
|
+
total_planned_rounds: int,
|
|
1718
|
+
table: PriceTable,
|
|
1719
|
+
prompt_mode: str | None,
|
|
1720
|
+
prose_synthesis: bool | None,
|
|
1721
|
+
) -> Path:
|
|
1722
|
+
"""Persist a single debate round as a self-contained JSON.
|
|
1723
|
+
|
|
1724
|
+
Each round file mirrors the ``cmd_run`` payload shape — re-rendering
|
|
1725
|
+
via ``council render <debate-round-N.json>`` works without special
|
|
1726
|
+
handling. Round-specific keys (``debate_round``, ``debate_total_rounds``)
|
|
1727
|
+
are additive so the renderer can ignore them safely.
|
|
1728
|
+
"""
|
|
1729
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
1730
|
+
actual_total = 0.0
|
|
1731
|
+
for r in responses:
|
|
1732
|
+
if r.error:
|
|
1733
|
+
continue
|
|
1734
|
+
ce = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
|
|
1735
|
+
actual_total += ce.total_usd
|
|
1736
|
+
payload = {
|
|
1737
|
+
"schema_version": SCHEMA_VERSION,
|
|
1738
|
+
"mode": question.mode,
|
|
1739
|
+
"prompt_mode": prompt_mode,
|
|
1740
|
+
"prose_synthesis": prose_synthesis,
|
|
1741
|
+
"artefact": artefact,
|
|
1742
|
+
"original_ask": original_ask,
|
|
1743
|
+
"members": [f"{m.name}/{m.model}" for m in members],
|
|
1744
|
+
"debate_round": round_number,
|
|
1745
|
+
"debate_total_rounds": total_planned_rounds,
|
|
1746
|
+
"rounds": 1,
|
|
1747
|
+
"cost_usd_actual": round(actual_total, 6),
|
|
1748
|
+
"responses": _serialise_responses(responses),
|
|
1749
|
+
}
|
|
1750
|
+
out_path = out_dir / _debate_round_filename(round_number)
|
|
1751
|
+
out_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
|
1752
|
+
return out_path
|
|
1753
|
+
|
|
1754
|
+
|
|
1755
|
+
def _load_debate_seed(
|
|
1756
|
+
path: Path,
|
|
1757
|
+
expected_members: list[ExternalAIClient],
|
|
1758
|
+
) -> list[CouncilResponse]:
|
|
1759
|
+
"""Load `--continue-as-debate` seed: round-1 responses from a prior session.
|
|
1760
|
+
|
|
1761
|
+
The seed file must be the JSON written by ``cmd_run`` (or a prior
|
|
1762
|
+
debate round). Members + models must match the current invocation —
|
|
1763
|
+
a mismatch is a hard error per the Phase 7 contract, not a silent
|
|
1764
|
+
fallback. The host agent surfaces the mismatch and asks the user
|
|
1765
|
+
to either re-run with matching members or drop ``--continue-as-debate``.
|
|
1766
|
+
"""
|
|
1767
|
+
if not path.exists():
|
|
1768
|
+
raise FileNotFoundError(
|
|
1769
|
+
f"--continue-as-debate path not found: {path}"
|
|
1770
|
+
)
|
|
1771
|
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
1772
|
+
source_members = list(payload.get("members") or [])
|
|
1773
|
+
expected_labels = [f"{m.name}/{m.model}" for m in expected_members]
|
|
1774
|
+
if source_members != expected_labels:
|
|
1775
|
+
raise CouncilDisabledError(
|
|
1776
|
+
f"--continue-as-debate member mismatch: source session has "
|
|
1777
|
+
f"{source_members!r}, current invocation has {expected_labels!r}. "
|
|
1778
|
+
f"Re-run with matching members or drop --continue-as-debate."
|
|
1779
|
+
)
|
|
1780
|
+
return _deserialise_responses(payload.get("responses") or [])
|
|
1781
|
+
|
|
1782
|
+
|
|
1783
|
+
def _make_debate_continue_prompt(
|
|
1784
|
+
*, auto_continue: bool,
|
|
1785
|
+
stream: Any = None,
|
|
1786
|
+
) -> Any:
|
|
1787
|
+
"""Build the on_continue callback for `run_debate()`.
|
|
1788
|
+
|
|
1789
|
+
``--auto-continue`` returns ``None`` so the orchestrator skips the
|
|
1790
|
+
gate entirely (still subject to the hard-cap check). Interactive
|
|
1791
|
+
mode prints the checkpoint line and reads y/N from stdin.
|
|
1792
|
+
"""
|
|
1793
|
+
if auto_continue:
|
|
1794
|
+
return None
|
|
1795
|
+
out = stream or sys.stdout
|
|
1796
|
+
|
|
1797
|
+
def _prompt(checkpoint: DebateCheckpoint) -> bool:
|
|
1798
|
+
out.write(
|
|
1799
|
+
f"\ndebate:checkpoint round={checkpoint.completed_round}/"
|
|
1800
|
+
f"{checkpoint.total_planned_rounds} "
|
|
1801
|
+
f"cost_so_far=${checkpoint.cost_so_far_usd:.4f} "
|
|
1802
|
+
f"next_round_estimate=${checkpoint.next_round_estimate_usd:.4f} "
|
|
1803
|
+
f"— continue? [y/N]: "
|
|
1804
|
+
)
|
|
1805
|
+
out.flush()
|
|
1806
|
+
try:
|
|
1807
|
+
answer = sys.stdin.readline().strip().lower()
|
|
1808
|
+
except (EOFError, KeyboardInterrupt):
|
|
1809
|
+
return False
|
|
1810
|
+
return answer in {"y", "yes"}
|
|
1811
|
+
|
|
1812
|
+
return _prompt
|
|
1813
|
+
|
|
1814
|
+
|
|
1815
|
+
def cmd_debate(
|
|
1816
|
+
args: argparse.Namespace,
|
|
1817
|
+
*,
|
|
1818
|
+
settings: dict[str, Any] | None = None,
|
|
1819
|
+
members: list[ExternalAIClient] | None = None,
|
|
1820
|
+
table: PriceTable | None = None,
|
|
1821
|
+
) -> int:
|
|
1822
|
+
"""Run a multi-round debate with progressive cost disclosure.
|
|
1823
|
+
|
|
1824
|
+
Phase 7 contract: each member produces an initial position in
|
|
1825
|
+
Round 1, then rebuts the strongest opposing position in subsequent
|
|
1826
|
+
rounds. The orchestrator pauses after each round and asks the user
|
|
1827
|
+
to continue (``--auto-continue`` bypasses the prompt). Round files
|
|
1828
|
+
are persisted incrementally so an interrupted debate leaves a
|
|
1829
|
+
recoverable trail.
|
|
1830
|
+
"""
|
|
1831
|
+
if settings is None:
|
|
1832
|
+
settings = load_settings()
|
|
1833
|
+
ai_cfg = (settings.get("ai_council") or {}) if isinstance(settings, dict) else {}
|
|
1834
|
+
advisor_plans = _build_advisor_plans(ai_cfg, REPO_ROOT)
|
|
1835
|
+
explicit_overrides = _parse_model_overrides(getattr(args, "model", None))
|
|
1836
|
+
skipped: list[dict[str, Any]] = []
|
|
1837
|
+
if members is None:
|
|
1838
|
+
members = build_members(
|
|
1839
|
+
settings,
|
|
1840
|
+
invocation_mode=args.mode_override,
|
|
1841
|
+
model_overrides=_advisor_model_overrides(
|
|
1842
|
+
advisor_plans, explicit_overrides,
|
|
1843
|
+
),
|
|
1844
|
+
siblings_overrides=_parse_siblings_overrides(
|
|
1845
|
+
getattr(args, "siblings", None),
|
|
1846
|
+
),
|
|
1847
|
+
skipped=skipped,
|
|
1848
|
+
)
|
|
1849
|
+
if table is None:
|
|
1850
|
+
table = load_prices()
|
|
1851
|
+
question, artefact = build_question(
|
|
1852
|
+
input_path=Path(args.question), input_mode=args.input_mode,
|
|
1853
|
+
max_tokens=_resolve_max_tokens(args, ai_cfg),
|
|
1854
|
+
prompt_mode_override="debate",
|
|
1855
|
+
)
|
|
1856
|
+
proceed, gate_exit, _necessity_result = _necessity_gate(
|
|
1857
|
+
prompt=question.user_prompt,
|
|
1858
|
+
lens="debate",
|
|
1859
|
+
invocation=getattr(args, "invocation", "agent"),
|
|
1860
|
+
proceed_anyway=getattr(args, "proceed_anyway", False),
|
|
1861
|
+
ai_cfg=ai_cfg,
|
|
1862
|
+
original_ask=getattr(args, "original_ask", "") or "",
|
|
1863
|
+
)
|
|
1864
|
+
if not proceed:
|
|
1865
|
+
return gate_exit
|
|
1866
|
+
_size_fit_gate(
|
|
1867
|
+
prompt=question.user_prompt,
|
|
1868
|
+
lens="debate",
|
|
1869
|
+
members=members,
|
|
1870
|
+
ai_cfg=ai_cfg,
|
|
1871
|
+
)
|
|
1872
|
+
project = detect_project_context(REPO_ROOT)
|
|
1873
|
+
billable = [m for m in members if getattr(m, "billable", True)]
|
|
1874
|
+
|
|
1875
|
+
# Resolve round count: explicit --rounds wins; otherwise default 2.
|
|
1876
|
+
# Hard ceiling: ai_council.debate_max_rounds (Phase 0 reserved key).
|
|
1877
|
+
max_rounds_cap = int(ai_cfg.get("debate_max_rounds", 4))
|
|
1878
|
+
requested = (
|
|
1879
|
+
int(args.rounds) if getattr(args, "rounds", None) is not None else 2
|
|
1880
|
+
)
|
|
1881
|
+
if requested < 1:
|
|
1882
|
+
raise argparse.ArgumentTypeError(
|
|
1883
|
+
f"--rounds must be >= 1 (got {requested})"
|
|
1884
|
+
)
|
|
1885
|
+
if requested > max_rounds_cap:
|
|
1886
|
+
raise argparse.ArgumentTypeError(
|
|
1887
|
+
f"--rounds={requested} exceeds debate_max_rounds={max_rounds_cap}; "
|
|
1888
|
+
f"raise the cap in agents/.ai-council.yml or lower --rounds."
|
|
1889
|
+
)
|
|
1890
|
+
rounds = requested
|
|
1891
|
+
|
|
1892
|
+
estimates = estimate(
|
|
1893
|
+
question, billable, table,
|
|
1894
|
+
project=project, original_ask=args.original_ask,
|
|
1895
|
+
advisor_plans=advisor_plans,
|
|
1896
|
+
)
|
|
1897
|
+
per_round_usd = sum(e.total_usd for e in estimates)
|
|
1898
|
+
projected_total = per_round_usd * rounds
|
|
1899
|
+
sys.stdout.write(
|
|
1900
|
+
f"council:debate · members={len(members)} (billable={len(billable)}) "
|
|
1901
|
+
f"· rounds={rounds} (cap={max_rounds_cap})\n"
|
|
1902
|
+
)
|
|
1903
|
+
advisor_summary = _format_advisor_summary(advisor_plans, billable)
|
|
1904
|
+
if advisor_summary:
|
|
1905
|
+
sys.stdout.write(advisor_summary + "\n")
|
|
1906
|
+
if skipped:
|
|
1907
|
+
sys.stdout.write(format_install_hints(skipped) + "\n")
|
|
1908
|
+
sys.stdout.write(
|
|
1909
|
+
format_estimate_table(billable, estimates) + "\n"
|
|
1910
|
+
)
|
|
1911
|
+
sys.stdout.write(
|
|
1912
|
+
f" × {rounds} rounds (worst case, before progressive disclosure)\n"
|
|
1913
|
+
f" PROJECTED TOTAL: ${projected_total:.4f}\n"
|
|
1914
|
+
)
|
|
1915
|
+
|
|
1916
|
+
# Phase 8 — pre-flight cost disclosure + hard refusal cap.
|
|
1917
|
+
debate_estimate = estimate_debate_cost(
|
|
1918
|
+
question, members, table,
|
|
1919
|
+
rounds=rounds, project=project,
|
|
1920
|
+
original_ask=args.original_ask,
|
|
1921
|
+
advisor_plans=advisor_plans,
|
|
1922
|
+
)
|
|
1923
|
+
disc_mode, disc_threshold, disc_show = _resolve_cost_disclosure(
|
|
1924
|
+
ai_cfg, "debate",
|
|
1925
|
+
)
|
|
1926
|
+
should_disclose = (
|
|
1927
|
+
disc_mode == "always"
|
|
1928
|
+
or (
|
|
1929
|
+
disc_mode == "above_threshold"
|
|
1930
|
+
and debate_estimate.expected_usd > disc_threshold
|
|
1931
|
+
)
|
|
1932
|
+
)
|
|
1933
|
+
if should_disclose:
|
|
1934
|
+
sys.stdout.write(
|
|
1935
|
+
_format_cost_disclosure(
|
|
1936
|
+
debate_estimate, lens="debate", show_per_member=disc_show,
|
|
1937
|
+
)
|
|
1938
|
+
)
|
|
1939
|
+
cap = _debate_refusal_cap(ai_cfg)
|
|
1940
|
+
if cap > 0 and debate_estimate.high_usd > cap:
|
|
1941
|
+
sys.stderr.write(
|
|
1942
|
+
f"❌ council:debate refused · high-end estimate "
|
|
1943
|
+
f"${debate_estimate.high_usd:.4f} exceeds "
|
|
1944
|
+
f"debate.max_cost_usd=${cap:.2f}. Lower --rounds, drop "
|
|
1945
|
+
f"members, or raise the cap in agents/.ai-council.yml.\n"
|
|
1946
|
+
)
|
|
1947
|
+
return 4
|
|
1948
|
+
|
|
1949
|
+
if not args.confirm:
|
|
1950
|
+
sys.stdout.write(
|
|
1951
|
+
"\nNo --confirm flag — estimate only. Re-run with --confirm to "
|
|
1952
|
+
"start the debate.\n"
|
|
1953
|
+
)
|
|
1954
|
+
return 0
|
|
1955
|
+
|
|
1956
|
+
cost_cfg = ai_cfg.get("cost_budget") or {}
|
|
1957
|
+
budget = CostBudget(
|
|
1958
|
+
max_input_tokens=int(cost_cfg.get("max_input_tokens", 50_000)),
|
|
1959
|
+
max_output_tokens=int(cost_cfg.get("max_output_tokens", 20_000)),
|
|
1960
|
+
max_calls=int(cost_cfg.get("max_calls", 10)),
|
|
1961
|
+
max_total_usd=float(cost_cfg.get("max_total_usd", 0.0) or 0.0),
|
|
1962
|
+
)
|
|
1963
|
+
|
|
1964
|
+
out_dir = _validate_council_output_path(
|
|
1965
|
+
args.output, kind="responses", subcommand="debate",
|
|
1966
|
+
)
|
|
1967
|
+
seed: list[CouncilResponse] | None = None
|
|
1968
|
+
if getattr(args, "continue_as_debate", None):
|
|
1969
|
+
seed = _load_debate_seed(Path(args.continue_as_debate), billable)
|
|
1970
|
+
sys.stdout.write(
|
|
1971
|
+
f"council:debate · seeding round 1 from "
|
|
1972
|
+
f"{args.continue_as_debate} ({len(seed)} responses)\n"
|
|
1973
|
+
)
|
|
1974
|
+
|
|
1975
|
+
written: list[Path] = []
|
|
1976
|
+
|
|
1977
|
+
def _on_round_complete(round_number: int, results: list[CouncilResponse]) -> None:
|
|
1978
|
+
path = _write_debate_round(
|
|
1979
|
+
out_dir, round_number, results,
|
|
1980
|
+
question=question, members=members,
|
|
1981
|
+
artefact=artefact, original_ask=args.original_ask,
|
|
1982
|
+
total_planned_rounds=rounds, table=table,
|
|
1983
|
+
prompt_mode="debate",
|
|
1984
|
+
prose_synthesis=getattr(args, "prose_synthesis", None),
|
|
1985
|
+
)
|
|
1986
|
+
written.append(path)
|
|
1987
|
+
errors = [r for r in results if r.error]
|
|
1988
|
+
sys.stdout.write(
|
|
1989
|
+
f"council:debate · wrote {path} "
|
|
1990
|
+
f"({len(results) - len(errors)}/{len(results)} ok)\n"
|
|
1991
|
+
)
|
|
1992
|
+
|
|
1993
|
+
on_continue = _make_debate_continue_prompt(
|
|
1994
|
+
auto_continue=bool(getattr(args, "auto_continue", False)),
|
|
1995
|
+
)
|
|
1996
|
+
|
|
1997
|
+
try:
|
|
1998
|
+
all_rounds = run_debate(
|
|
1999
|
+
members, question,
|
|
2000
|
+
budget=budget, table=table, project=project,
|
|
2001
|
+
original_ask=args.original_ask,
|
|
2002
|
+
max_rounds=rounds,
|
|
2003
|
+
on_round_complete=_on_round_complete,
|
|
2004
|
+
on_continue=on_continue,
|
|
2005
|
+
advisor_plans=advisor_plans,
|
|
2006
|
+
seed_round_1=seed,
|
|
2007
|
+
)
|
|
2008
|
+
except DebateCapExceeded as exc:
|
|
2009
|
+
sys.stderr.write(
|
|
2010
|
+
f"❌ council:debate cap reached after round {exc.completed_round}: "
|
|
2011
|
+
f"{exc}\n"
|
|
2012
|
+
f"Partial debate persisted under {out_dir} "
|
|
2013
|
+
f"({len(written)} rounds).\n"
|
|
2014
|
+
)
|
|
2015
|
+
return 3
|
|
2016
|
+
|
|
2017
|
+
actual_total = 0.0
|
|
2018
|
+
for rnd in all_rounds:
|
|
2019
|
+
for r in rnd:
|
|
2020
|
+
if r.error:
|
|
2021
|
+
continue
|
|
2022
|
+
ce = estimate_cost(
|
|
2023
|
+
r.provider, r.model, r.input_tokens, r.output_tokens, table,
|
|
2024
|
+
)
|
|
2025
|
+
actual_total += ce.total_usd
|
|
2026
|
+
sys.stdout.write(
|
|
2027
|
+
f"\ncouncil:debate · {len(all_rounds)} round(s) complete · "
|
|
2028
|
+
f"actual ${actual_total:.4f} (cap projection ${projected_total:.4f})\n"
|
|
2029
|
+
)
|
|
2030
|
+
errors_last = [r for r in all_rounds[-1] if r.error] if all_rounds else []
|
|
2031
|
+
return 1 if errors_last and len(errors_last) == len(all_rounds[-1]) else 0
|
|
2032
|
+
|
|
2033
|
+
|
|
411
2034
|
def cmd_render(args: argparse.Namespace) -> int:
|
|
412
|
-
"""Re-render a saved responses JSON to the markdown report.
|
|
2035
|
+
"""Re-render a saved responses JSON to the markdown report.
|
|
2036
|
+
|
|
2037
|
+
Lens resolution order: explicit ``--prompt-mode`` > ``prompt_mode``
|
|
2038
|
+
in the payload > ``mode`` in the payload > ``None`` (default decision
|
|
2039
|
+
template). R4 Q4 escape hatch ``--prose-synthesis`` overrides the
|
|
2040
|
+
table. ``--output`` writes to ``agents/council-sessions/`` (enforced);
|
|
2041
|
+
omit it for stdout.
|
|
2042
|
+
"""
|
|
413
2043
|
payload = json.loads(Path(args.responses).read_text(encoding="utf-8"))
|
|
414
2044
|
items = payload.get("responses") or []
|
|
415
|
-
|
|
2045
|
+
explicit = getattr(args, "prompt_mode", None)
|
|
2046
|
+
mode = explicit or payload.get("prompt_mode") or payload.get("mode")
|
|
2047
|
+
prose = getattr(args, "prose_synthesis", None)
|
|
2048
|
+
if prose is None:
|
|
2049
|
+
prose = payload.get("prose_synthesis")
|
|
2050
|
+
consensus = _deserialise_consensus(payload.get("consensus"))
|
|
2051
|
+
peer_review = _deserialise_peer_review(payload.get("peer_review"))
|
|
2052
|
+
body = render(
|
|
2053
|
+
_deserialise_responses(items),
|
|
2054
|
+
mode=mode,
|
|
2055
|
+
prose_synthesis=prose,
|
|
2056
|
+
consensus=consensus,
|
|
2057
|
+
peer_review=peer_review,
|
|
2058
|
+
)
|
|
2059
|
+
if getattr(args, "output", None):
|
|
2060
|
+
out_path = _validate_council_output_path(
|
|
2061
|
+
args.output, kind="sessions", subcommand="render",
|
|
2062
|
+
)
|
|
2063
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
2064
|
+
out_path.write_text(body + "\n", encoding="utf-8")
|
|
2065
|
+
sys.stdout.write(f"council:render · wrote {out_path}\n")
|
|
2066
|
+
return 0
|
|
2067
|
+
sys.stdout.write(body + "\n")
|
|
2068
|
+
return 0
|
|
2069
|
+
|
|
2070
|
+
|
|
2071
|
+
def _cmd_replay_low_impact_stats(args: argparse.Namespace) -> int:
|
|
2072
|
+
"""Summarise the session's ``low-impact-resolutions.md`` (Phase 11).
|
|
2073
|
+
|
|
2074
|
+
The log file lives next to the ``responses`` JSON. Missing or empty
|
|
2075
|
+
log → prints an explicit "no entries" line and returns 0 (a session
|
|
2076
|
+
with no low-impact resolutions is not an error).
|
|
2077
|
+
"""
|
|
2078
|
+
from scripts.ai_council.low_impact import ( # noqa: WPS433 — local import
|
|
2079
|
+
parse_low_impact_log,
|
|
2080
|
+
render_low_impact_stats,
|
|
2081
|
+
)
|
|
2082
|
+
|
|
2083
|
+
responses_path = Path(args.responses)
|
|
2084
|
+
log_path = responses_path.parent / "low-impact-resolutions.md"
|
|
2085
|
+
if not log_path.exists():
|
|
2086
|
+
sys.stdout.write(
|
|
2087
|
+
"council:replay · no low-impact-resolutions.md alongside "
|
|
2088
|
+
f"{responses_path} — session had no fast-path entries.\n",
|
|
2089
|
+
)
|
|
2090
|
+
return 0
|
|
2091
|
+
body = log_path.read_text(encoding="utf-8")
|
|
2092
|
+
stats = parse_low_impact_log(body)
|
|
2093
|
+
out = render_low_impact_stats(stats)
|
|
2094
|
+
if getattr(args, "output", None):
|
|
2095
|
+
target = _validate_council_output_path(
|
|
2096
|
+
args.output, kind="sessions", subcommand="replay",
|
|
2097
|
+
)
|
|
2098
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
2099
|
+
target.write_text(out, encoding="utf-8")
|
|
2100
|
+
sys.stdout.write(f"council:replay · wrote {target}\n")
|
|
2101
|
+
return 0
|
|
2102
|
+
sys.stdout.write(out)
|
|
2103
|
+
return 0
|
|
2104
|
+
|
|
2105
|
+
|
|
2106
|
+
def cmd_replay(args: argparse.Namespace) -> int:
|
|
2107
|
+
"""Re-render the ``decision-replay.md`` audit trail (Phase 9).
|
|
2108
|
+
|
|
2109
|
+
Reads a saved ``council:run`` JSON payload, rebuilds the consensus
|
|
2110
|
+
bundle, and emits the replay markdown to stdout (default) or to
|
|
2111
|
+
``--output``. Pure re-projection — no model calls. Returns 2 when
|
|
2112
|
+
the payload lacks consensus data (Phase 9 prerequisite).
|
|
2113
|
+
|
|
2114
|
+
When ``--low-impact-stats`` is set, the consensus replay is skipped
|
|
2115
|
+
and the session's ``low-impact-resolutions.md`` (Phase 11) is
|
|
2116
|
+
summarised instead — count, status breakdown, members used, cost.
|
|
2117
|
+
"""
|
|
2118
|
+
if getattr(args, "low_impact_stats", False):
|
|
2119
|
+
return _cmd_replay_low_impact_stats(args)
|
|
2120
|
+
payload = json.loads(Path(args.responses).read_text(encoding="utf-8"))
|
|
2121
|
+
consensus = _deserialise_consensus(payload.get("consensus"))
|
|
2122
|
+
if consensus is None:
|
|
2123
|
+
sys.stderr.write(
|
|
2124
|
+
"❌ council:replay: payload has no `consensus` block — "
|
|
2125
|
+
"rerun with consensus_scoring enabled for this lens.\n"
|
|
2126
|
+
)
|
|
2127
|
+
return 2
|
|
2128
|
+
deliberation = _deserialise_responses(payload.get("responses") or [])
|
|
2129
|
+
include_args = (
|
|
2130
|
+
bool(args.include_member_arguments)
|
|
2131
|
+
if args.include_member_arguments is not None
|
|
2132
|
+
else True
|
|
2133
|
+
)
|
|
2134
|
+
body = render_decision_replay(
|
|
2135
|
+
DecisionReplayInputs(
|
|
2136
|
+
findings=list(consensus.findings),
|
|
2137
|
+
scores=list(consensus.scores),
|
|
2138
|
+
metadata=dict(consensus.metadata),
|
|
2139
|
+
deliberation=deliberation,
|
|
2140
|
+
original_ask=str(payload.get("original_ask", "")),
|
|
2141
|
+
include_member_arguments=include_args,
|
|
2142
|
+
),
|
|
2143
|
+
)
|
|
2144
|
+
if getattr(args, "output", None):
|
|
2145
|
+
out_path = _validate_council_output_path(
|
|
2146
|
+
args.output, kind="sessions", subcommand="replay",
|
|
2147
|
+
)
|
|
2148
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
2149
|
+
out_path.write_text(body, encoding="utf-8")
|
|
2150
|
+
sys.stdout.write(f"council:replay · wrote {out_path}\n")
|
|
2151
|
+
else:
|
|
2152
|
+
sys.stdout.write(body)
|
|
416
2153
|
return 0
|
|
417
2154
|
|
|
418
2155
|
|
|
@@ -479,6 +2216,15 @@ def _add_common_input_args(p: argparse.ArgumentParser) -> None:
|
|
|
479
2216
|
p.add_argument("--input-mode", choices=["prompt", "roadmap"],
|
|
480
2217
|
default="prompt",
|
|
481
2218
|
help="How to bundle the file (default: prompt).")
|
|
2219
|
+
p.add_argument("--prompt-mode",
|
|
2220
|
+
choices=["pr", "design", "optimize", "analysis"],
|
|
2221
|
+
default=None, dest="prompt_mode",
|
|
2222
|
+
help="Lens-override for the system-prompt addendum. "
|
|
2223
|
+
"The bundle shape stays as --input-mode; only "
|
|
2224
|
+
"the per-mode neutrality addendum is swapped "
|
|
2225
|
+
"(see scripts/ai_council/prompts.py _MODE_TABLE). "
|
|
2226
|
+
"Routed by the /council pr|design|optimize|"
|
|
2227
|
+
"analysis wrappers.")
|
|
482
2228
|
p.add_argument("--max-tokens", type=int, default=None,
|
|
483
2229
|
help="Per-member output budget. Default reads "
|
|
484
2230
|
"ai_council.max_output_tokens from .agent-settings.yml "
|
|
@@ -505,6 +2251,89 @@ def _add_common_input_args(p: argparse.ArgumentParser) -> None:
|
|
|
505
2251
|
"skill.")
|
|
506
2252
|
p.add_argument("--original-ask", default="",
|
|
507
2253
|
help="The user's framing sentence (flows into handoff).")
|
|
2254
|
+
p.add_argument("--peer-review", dest="peer_review", action="store_true",
|
|
2255
|
+
default=False,
|
|
2256
|
+
help="Run an anonymous peer-review pass after the main "
|
|
2257
|
+
"deliberation. Each member critiques the others' "
|
|
2258
|
+
"(anonymised) responses for blind spots before "
|
|
2259
|
+
"synthesis. Adds N extra API calls. Opt-in per the "
|
|
2260
|
+
"R2 verdict; also accepts ai_council.peer_review."
|
|
2261
|
+
"enabled: true in agents/.ai-council.yml.")
|
|
2262
|
+
|
|
2263
|
+
|
|
2264
|
+
def cmd_shadow_report(args: argparse.Namespace) -> int:
|
|
2265
|
+
"""Print the 7-day rolling disagreement rate + SLO status (step-9 P10)."""
|
|
2266
|
+
from pathlib import Path as _Path
|
|
2267
|
+
|
|
2268
|
+
from scripts.ai_council import shadow_dispatch as _sd
|
|
2269
|
+
|
|
2270
|
+
log_path = _Path(args.log) if args.log else _sd.SHADOW_LOG_PATH
|
|
2271
|
+
rate, n = _sd.compute_disagreement_rate(
|
|
2272
|
+
log_path, window_days=int(args.window_days)
|
|
2273
|
+
)
|
|
2274
|
+
print(_sd.slo_banner(rate, n))
|
|
2275
|
+
return 0
|
|
2276
|
+
|
|
2277
|
+
|
|
2278
|
+
def cmd_quota(
|
|
2279
|
+
args: argparse.Namespace,
|
|
2280
|
+
*,
|
|
2281
|
+
settings: dict[str, Any] | None = None,
|
|
2282
|
+
) -> int:
|
|
2283
|
+
"""Dump today's CLI-quota state (step-8 P1, D1).
|
|
2284
|
+
|
|
2285
|
+
Reads ``~/.event4u/agent-config/cli-calls.json`` plus the configured
|
|
2286
|
+
caps from ``.agent-settings.yml`` and prints one line per provider
|
|
2287
|
+
that has a configured ``max_calls_per_day``. ``--reset <provider>``
|
|
2288
|
+
(gated behind ``--confirm``) clears the counter for that provider.
|
|
2289
|
+
"""
|
|
2290
|
+
s = settings if settings is not None else load_settings()
|
|
2291
|
+
ai_cfg = (s.get("ai_council") or {}) if isinstance(s, dict) else {}
|
|
2292
|
+
cli_budget_cfg = (
|
|
2293
|
+
(ai_cfg.get("cli_call_budget") or {}) if isinstance(ai_cfg, dict) else {}
|
|
2294
|
+
)
|
|
2295
|
+
caps = (
|
|
2296
|
+
(cli_budget_cfg.get("max_calls_per_day") or {})
|
|
2297
|
+
if isinstance(cli_budget_cfg, dict)
|
|
2298
|
+
else {}
|
|
2299
|
+
)
|
|
2300
|
+
warn_at = (
|
|
2301
|
+
float(cli_budget_cfg.get("warn_at", 0.8))
|
|
2302
|
+
if isinstance(cli_budget_cfg, dict)
|
|
2303
|
+
else 0.8
|
|
2304
|
+
)
|
|
2305
|
+
|
|
2306
|
+
if getattr(args, "reset", None):
|
|
2307
|
+
provider = args.reset
|
|
2308
|
+
if not getattr(args, "confirm", False):
|
|
2309
|
+
sys.stderr.write(
|
|
2310
|
+
f"❌ council:quota: --reset {provider} requires --confirm.\n"
|
|
2311
|
+
)
|
|
2312
|
+
return 2
|
|
2313
|
+
reset_cli_call_counts(provider=provider)
|
|
2314
|
+
sys.stdout.write(f"council:quota · reset · {provider}\n")
|
|
2315
|
+
return 0
|
|
2316
|
+
|
|
2317
|
+
counts = load_cli_call_counts()
|
|
2318
|
+
if not caps:
|
|
2319
|
+
sys.stdout.write(
|
|
2320
|
+
"council:quota · no providers have a configured "
|
|
2321
|
+
"cli_call_budget.max_calls_per_day cap.\n"
|
|
2322
|
+
)
|
|
2323
|
+
return 0
|
|
2324
|
+
for provider in sorted(caps):
|
|
2325
|
+
limit = int(caps[provider])
|
|
2326
|
+
used = int(counts.get(provider, 0))
|
|
2327
|
+
ratio = used / limit if limit > 0 else 0.0
|
|
2328
|
+
status = "ok"
|
|
2329
|
+
if used >= limit:
|
|
2330
|
+
status = "exhausted"
|
|
2331
|
+
elif ratio >= warn_at:
|
|
2332
|
+
status = "warn"
|
|
2333
|
+
sys.stdout.write(
|
|
2334
|
+
f"council:quota · {provider} · {used}/{limit} · {status}\n"
|
|
2335
|
+
)
|
|
2336
|
+
return 0
|
|
508
2337
|
|
|
509
2338
|
|
|
510
2339
|
def build_parser() -> argparse.ArgumentParser:
|
|
@@ -516,6 +2345,15 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
516
2345
|
|
|
517
2346
|
p_est = sub.add_parser("estimate", help="Pre-call cost preview (no spend).")
|
|
518
2347
|
_add_common_input_args(p_est)
|
|
2348
|
+
p_est.add_argument("--debate", action="store_true", default=False,
|
|
2349
|
+
help="Render the round-by-round projection for a "
|
|
2350
|
+
"debate run (one call per member per round). "
|
|
2351
|
+
"Progressive disclosure may stop the debate "
|
|
2352
|
+
"early — this is an upper bound.")
|
|
2353
|
+
p_est.add_argument("--rounds", type=int, default=None,
|
|
2354
|
+
help="Debate round count for --debate. Defaults to "
|
|
2355
|
+
"ai_council.min_rounds (typically 2); capped "
|
|
2356
|
+
"at ai_council.debate_max_rounds (typically 4).")
|
|
519
2357
|
|
|
520
2358
|
p_run = sub.add_parser("run", help="Run the council; --confirm required to spend.")
|
|
521
2359
|
_add_common_input_args(p_run)
|
|
@@ -534,14 +2372,143 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
534
2372
|
"artefacts. Set by the host agent when the consuming "
|
|
535
2373
|
"rule/skill/command declares council_depth: deep. "
|
|
536
2374
|
"Overridden by explicit --rounds.")
|
|
2375
|
+
p_run.add_argument("--invocation", choices=["agent", "user_explicit"],
|
|
2376
|
+
default="agent",
|
|
2377
|
+
help="Source signal for the necessity classifier "
|
|
2378
|
+
"(Phase 6). 'agent' = autonomous (default; silent "
|
|
2379
|
+
"skip when unnecessary). 'user_explicit' = manual "
|
|
2380
|
+
"user invocation (educate path when unnecessary, "
|
|
2381
|
+
"requires --proceed-anyway to override).")
|
|
2382
|
+
p_run.add_argument("--proceed-anyway", action="store_true",
|
|
2383
|
+
dest="proceed_anyway", default=False,
|
|
2384
|
+
help="Override the necessity-classifier skip / educate "
|
|
2385
|
+
"verdict for this invocation (Phase 6). Has no "
|
|
2386
|
+
"effect when the classifier verdict is "
|
|
2387
|
+
"`necessary` or `borderline`.")
|
|
2388
|
+
p_run.add_argument("--single", action="store_true", default=False,
|
|
2389
|
+
help="Dispatch to a single member from "
|
|
2390
|
+
"routing.solo_member_fallback_chain (step-9 P9). "
|
|
2391
|
+
"Falls back to the full council when the chain is "
|
|
2392
|
+
"empty or no chain member is runtime-present. "
|
|
2393
|
+
"Overridden by env "
|
|
2394
|
+
"AGENT_CONFIG_FORCE_FULL_COUNCIL=1.")
|
|
2395
|
+
_add_prose_synthesis_arg(p_run)
|
|
2396
|
+
|
|
2397
|
+
p_deb = sub.add_parser(
|
|
2398
|
+
"debate",
|
|
2399
|
+
help="Multi-round debate with progressive cost disclosure (Phase 7).",
|
|
2400
|
+
)
|
|
2401
|
+
_add_common_input_args(p_deb)
|
|
2402
|
+
p_deb.add_argument("--output", required=True,
|
|
2403
|
+
help="Directory to write debate-round-N.json files.")
|
|
2404
|
+
p_deb.add_argument("--confirm", action="store_true",
|
|
2405
|
+
help="Required to actually start the debate.")
|
|
2406
|
+
p_deb.add_argument("--rounds", type=int, default=None,
|
|
2407
|
+
help="Number of debate rounds (default 2). Capped by "
|
|
2408
|
+
"ai_council.debate_max_rounds in agents/.ai-council.yml.")
|
|
2409
|
+
p_deb.add_argument("--auto-continue", action="store_true",
|
|
2410
|
+
default=False, dest="auto_continue",
|
|
2411
|
+
help="Skip the between-round y/N prompt. The hard cap "
|
|
2412
|
+
"against cost_budget.max_total_usd still applies.")
|
|
2413
|
+
p_deb.add_argument("--continue-as-debate", default=None,
|
|
2414
|
+
dest="continue_as_debate", metavar="PATH",
|
|
2415
|
+
help="Seed round 1 from an existing council session "
|
|
2416
|
+
"JSON. Members + models must match the current "
|
|
2417
|
+
"invocation.")
|
|
2418
|
+
p_deb.add_argument("--invocation", choices=["agent", "user_explicit"],
|
|
2419
|
+
default="agent",
|
|
2420
|
+
help="Source signal for the necessity classifier "
|
|
2421
|
+
"(Phase 6). 'agent' = autonomous (default; silent "
|
|
2422
|
+
"skip when unnecessary). 'user_explicit' = manual "
|
|
2423
|
+
"user invocation (educate path when unnecessary, "
|
|
2424
|
+
"requires --proceed-anyway to override).")
|
|
2425
|
+
p_deb.add_argument("--proceed-anyway", action="store_true",
|
|
2426
|
+
dest="proceed_anyway", default=False,
|
|
2427
|
+
help="Override the necessity-classifier skip / educate "
|
|
2428
|
+
"verdict for this invocation (Phase 6). Has no "
|
|
2429
|
+
"effect when the classifier verdict is "
|
|
2430
|
+
"`necessary` or `borderline`.")
|
|
2431
|
+
_add_prose_synthesis_arg(p_deb)
|
|
537
2432
|
|
|
538
2433
|
p_ren = sub.add_parser("render", help="Re-render a saved responses JSON.")
|
|
539
2434
|
p_ren.add_argument("responses",
|
|
540
2435
|
help="Path to the JSON written by `council run`.")
|
|
2436
|
+
p_ren.add_argument("--prompt-mode",
|
|
2437
|
+
choices=["default", "pr", "design", "optimize", "analysis",
|
|
2438
|
+
"prompt", "roadmap", "diff", "files"],
|
|
2439
|
+
default=None, dest="prompt_mode",
|
|
2440
|
+
help="Override the synthesis-template lens. Defaults "
|
|
2441
|
+
"to the `mode` recorded in the responses JSON.")
|
|
2442
|
+
p_ren.add_argument("--output", default=None,
|
|
2443
|
+
help="Write the rendered markdown to a file under "
|
|
2444
|
+
"agents/council-sessions/ (enforced). Omit for "
|
|
2445
|
+
"stdout. Prefer this over shell redirects so "
|
|
2446
|
+
"the canonical-path check fires at write-time.")
|
|
2447
|
+
_add_prose_synthesis_arg(p_ren)
|
|
2448
|
+
|
|
2449
|
+
p_rep = sub.add_parser(
|
|
2450
|
+
"replay",
|
|
2451
|
+
help="Re-render decision-replay.md from a saved responses JSON (Phase 9).",
|
|
2452
|
+
)
|
|
2453
|
+
p_rep.add_argument("responses",
|
|
2454
|
+
help="Path to the JSON written by `council run`.")
|
|
2455
|
+
p_rep.add_argument("--output", default=None,
|
|
2456
|
+
help="Optional file to write the replay markdown. "
|
|
2457
|
+
"Defaults to stdout.")
|
|
2458
|
+
rep_group = p_rep.add_mutually_exclusive_group()
|
|
2459
|
+
rep_group.add_argument("--redact-member-arguments",
|
|
2460
|
+
dest="include_member_arguments",
|
|
2461
|
+
action="store_const", const=False, default=None,
|
|
2462
|
+
help="Emit the redacted view (consensus + dissent "
|
|
2463
|
+
"counts only, no per-member arguments).")
|
|
2464
|
+
rep_group.add_argument("--include-member-arguments",
|
|
2465
|
+
dest="include_member_arguments",
|
|
2466
|
+
action="store_const", const=True,
|
|
2467
|
+
help="Include per-member arguments (default).")
|
|
2468
|
+
p_rep.add_argument("--low-impact-stats", action="store_true", default=False,
|
|
2469
|
+
help="Skip the decision replay and print a summary of "
|
|
2470
|
+
"low-impact fast-path resolutions for the session "
|
|
2471
|
+
"(parses `low-impact-resolutions.md` alongside the "
|
|
2472
|
+
"responses JSON).")
|
|
2473
|
+
|
|
2474
|
+
p_quo = sub.add_parser(
|
|
2475
|
+
"quota",
|
|
2476
|
+
help="Dump today's CLI-quota state and configured caps (step-8 P1).",
|
|
2477
|
+
)
|
|
2478
|
+
p_quo.add_argument("--reset", default=None, metavar="PROVIDER",
|
|
2479
|
+
help="Reset today's counter for one provider. "
|
|
2480
|
+
"Requires --confirm.")
|
|
2481
|
+
p_quo.add_argument("--confirm", action="store_true", default=False,
|
|
2482
|
+
help="Confirm a mutating --reset operation.")
|
|
2483
|
+
|
|
2484
|
+
p_sha = sub.add_parser(
|
|
2485
|
+
"shadow-report",
|
|
2486
|
+
help="Read agents/council-shadow-log.jsonl and print the 7-day "
|
|
2487
|
+
"rolling disagreement rate + SLO status (step-9 P10).",
|
|
2488
|
+
)
|
|
2489
|
+
p_sha.add_argument("--log", default=None,
|
|
2490
|
+
help="Path to the shadow log (default: "
|
|
2491
|
+
"agents/council-shadow-log.jsonl).")
|
|
2492
|
+
p_sha.add_argument("--window-days", type=int, default=7,
|
|
2493
|
+
help="Rolling window in days (default: 7).")
|
|
541
2494
|
|
|
542
2495
|
return parser
|
|
543
2496
|
|
|
544
2497
|
|
|
2498
|
+
def _add_prose_synthesis_arg(p: argparse.ArgumentParser) -> None:
|
|
2499
|
+
"""R4 Q4 escape hatch — toggle structured vs prose synthesis."""
|
|
2500
|
+
group = p.add_mutually_exclusive_group()
|
|
2501
|
+
group.add_argument("--prose-synthesis", dest="prose_synthesis",
|
|
2502
|
+
action="store_const", const=True, default=None,
|
|
2503
|
+
help="Force open-ended prose synthesis (bare slot) "
|
|
2504
|
+
"regardless of lens. R4 Q4 escape hatch.")
|
|
2505
|
+
group.add_argument("--no-prose-synthesis", dest="prose_synthesis",
|
|
2506
|
+
action="store_const", const=False,
|
|
2507
|
+
help="Force the structured default decision-lens "
|
|
2508
|
+
"template even on a creative lens "
|
|
2509
|
+
"(design / optimize). Symmetric escape hatch.")
|
|
2510
|
+
|
|
2511
|
+
|
|
545
2512
|
def main(argv: list[str] | None = None) -> int:
|
|
546
2513
|
args = build_parser().parse_args(argv)
|
|
547
2514
|
try:
|
|
@@ -549,8 +2516,16 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
549
2516
|
return cmd_estimate(args)
|
|
550
2517
|
if args.cmd == "run":
|
|
551
2518
|
return cmd_run(args)
|
|
2519
|
+
if args.cmd == "debate":
|
|
2520
|
+
return cmd_debate(args)
|
|
552
2521
|
if args.cmd == "render":
|
|
553
2522
|
return cmd_render(args)
|
|
2523
|
+
if args.cmd == "replay":
|
|
2524
|
+
return cmd_replay(args)
|
|
2525
|
+
if args.cmd == "quota":
|
|
2526
|
+
return cmd_quota(args)
|
|
2527
|
+
if args.cmd == "shadow-report":
|
|
2528
|
+
return cmd_shadow_report(args)
|
|
554
2529
|
except CouncilDisabledError as exc:
|
|
555
2530
|
sys.stderr.write(f"❌ council:{args.cmd}: {exc}\n")
|
|
556
2531
|
return 2
|