@event4u/agent-config 2.12.0 → 2.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/.agent-src/commands/council/analysis.md +142 -0
  2. package/.agent-src/commands/council/debate.md +129 -0
  3. package/.agent-src/commands/council/default.md +8 -0
  4. package/.agent-src/commands/council/design.md +16 -12
  5. package/.agent-src/commands/council/optimize.md +16 -15
  6. package/.agent-src/commands/council/pr.md +12 -12
  7. package/.agent-src/commands/council.md +48 -2
  8. package/.agent-src/commands/memory/learn-low-impact.md +143 -0
  9. package/.agent-src/personas/advisors/contrarian.md +95 -0
  10. package/.agent-src/personas/advisors/executor.md +99 -0
  11. package/.agent-src/personas/advisors/expansionist.md +98 -0
  12. package/.agent-src/personas/advisors/first-principles.md +98 -0
  13. package/.agent-src/personas/advisors/outsider.md +102 -0
  14. package/.agent-src/rules/ask-when-uncertain.md +10 -6
  15. package/.agent-src/rules/copilot-routing.md +19 -0
  16. package/.agent-src/rules/devcontainer-routing.md +20 -0
  17. package/.agent-src/rules/external-reference-deep-dive.md +1 -1
  18. package/.agent-src/rules/fast-path-marker-visibility.md +38 -0
  19. package/.agent-src/rules/laravel-routing.md +20 -0
  20. package/.agent-src/rules/low-impact-corpus-privacy-floor.md +74 -0
  21. package/.agent-src/rules/symfony-routing.md +20 -0
  22. package/.agent-src/skills/ai-council/SKILL.md +388 -10
  23. package/.agent-src/skills/copilot-config/SKILL.md +1 -1
  24. package/.agent-src/skills/devcontainer/SKILL.md +1 -1
  25. package/.agent-src/skills/laravel/SKILL.md +1 -1
  26. package/.agent-src/skills/project-analysis-core/SKILL.md +1 -1
  27. package/.agent-src/skills/project-analyzer/SKILL.md +1 -1
  28. package/.agent-src/skills/symfony-workflow/SKILL.md +1 -1
  29. package/.agent-src/skills/universal-project-analysis/SKILL.md +1 -1
  30. package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
  31. package/.claude-plugin/marketplace.json +4 -1
  32. package/AGENTS.md +1 -1
  33. package/CHANGELOG.md +346 -124
  34. package/CONTRIBUTING.md +5 -0
  35. package/README.md +6 -6
  36. package/config/agent-settings.template.yml +5 -93
  37. package/config/gitignore-block.txt +6 -0
  38. package/docs/architecture/multi-tool-projection.md +53 -0
  39. package/docs/architecture/{compression.md → source-projection.md} +21 -3
  40. package/docs/architecture.md +15 -15
  41. package/docs/archive/CHANGELOG-pre-2.11.0.md +141 -0
  42. package/docs/catalog.md +25 -12
  43. package/docs/contracts/adr-architectural-consensus-mechanism.md +68 -0
  44. package/docs/contracts/adr-level-6-productization.md +7 -9
  45. package/docs/contracts/ai-council-config.md +658 -0
  46. package/docs/contracts/command-clusters.md +58 -2
  47. package/docs/contracts/command-surface-tiers.md +3 -2
  48. package/docs/contracts/cost-profile-defaults.md +5 -0
  49. package/docs/contracts/decision-engine-gates.md +5 -0
  50. package/docs/contracts/decision-trace-v1.md +2 -2
  51. package/docs/contracts/file-ownership-matrix.json +1735 -72
  52. package/docs/contracts/installed-tools-lockfile.md +2 -1
  53. package/docs/contracts/low-impact-corpus-format.md +95 -0
  54. package/docs/contracts/mcp-beta-criteria.md +6 -5
  55. package/docs/contracts/mcp-cloud-scope.md +5 -4
  56. package/docs/contracts/multi-tool-projection-fidelity.md +115 -0
  57. package/docs/contracts/release-trunk-sync.md +4 -3
  58. package/docs/contracts/tier-3-contrib-plugin.md +5 -6
  59. package/docs/getting-started.md +2 -2
  60. package/docs/guidelines/agent-infra/installed-tools-manifest.md +2 -1
  61. package/docs/installation.md +32 -0
  62. package/package.json +1 -1
  63. package/scripts/_archive/README.md +59 -0
  64. package/scripts/_cli/cmd_doctor.py +134 -0
  65. package/scripts/ai_council/_default_prices.py +10 -1
  66. package/scripts/ai_council/advisors.py +148 -0
  67. package/scripts/ai_council/airgap.py +165 -0
  68. package/scripts/ai_council/cli_hints.py +123 -0
  69. package/scripts/ai_council/clients.py +959 -5
  70. package/scripts/ai_council/compile_corpus.py +178 -0
  71. package/scripts/ai_council/confidence_gate.py +156 -0
  72. package/scripts/ai_council/config.py +1364 -0
  73. package/scripts/ai_council/consensus.py +329 -0
  74. package/scripts/ai_council/events_log.py +137 -0
  75. package/scripts/ai_council/learn_low_impact_preview.py +252 -0
  76. package/scripts/ai_council/low_impact.py +714 -0
  77. package/scripts/ai_council/low_impact_corpus.py +466 -0
  78. package/scripts/ai_council/low_impact_intake.py +163 -0
  79. package/scripts/ai_council/modes.py +6 -1
  80. package/scripts/ai_council/necessity.py +782 -0
  81. package/scripts/ai_council/orchestrator.py +872 -20
  82. package/scripts/ai_council/probation_gate.py +152 -0
  83. package/scripts/ai_council/prompts.py +335 -0
  84. package/scripts/ai_council/redact_low_impact_entry.py +155 -0
  85. package/scripts/ai_council/replay.py +155 -0
  86. package/scripts/ai_council/session.py +19 -1
  87. package/scripts/ai_council/shadow_dispatch.py +235 -0
  88. package/scripts/ai_council/solo_dispatch.py +226 -0
  89. package/scripts/audit_cloud_compatibility.py +74 -0
  90. package/scripts/audit_command_surface.py +363 -0
  91. package/scripts/check_compressed_paths.py +6 -1
  92. package/scripts/check_council_layout.py +11 -0
  93. package/scripts/ci_time_ratio.py +168 -0
  94. package/scripts/council_cli.py +2005 -30
  95. package/scripts/install.sh +12 -0
  96. package/scripts/measure_projection_bytes.py +159 -0
  97. package/scripts/measure_roadmap_trajectory.py +112 -0
  98. package/scripts/probe_projection_fidelity.py +202 -0
  99. package/scripts/score_skill_selection.py +198 -0
  100. package/scripts/skill_collision_clusters.py +162 -0
  101. /package/scripts/{_backfill_skill_domains.py → _archive/_backfill_skill_domains.py} +0 -0
  102. /package/scripts/{_bootstrap_tier_frontmatter.py → _archive/_bootstrap_tier_frontmatter.py} +0 -0
  103. /package/scripts/{_p43_bodies.py → _archive/_p43_bodies.py} +0 -0
  104. /package/scripts/{_p43_compress.py → _archive/_p43_compress.py} +0 -0
  105. /package/scripts/{_p4_migrate.py → _archive/_p4_migrate.py} +0 -0
  106. /package/scripts/{_phase2_shim_helper.py → _archive/_phase2_shim_helper.py} +0 -0
  107. /package/scripts/{_pilot_council_question.py → _archive/_pilot_council_question.py} +0 -0
@@ -0,0 +1,155 @@
1
+ """Decision-replay artefact for council sessions (Phase 9).
2
+
3
+ Produces a per-session ``decision-replay.md`` that surfaces the audit
4
+ trail GPT review of PR #148 called out as missing: for each top
5
+ finding, the consensus_strength, agreeing-members with their key
6
+ argument, dissenting-members with their counter-argument, the
7
+ evidence-quality verdict, and a final synthesis verdict line.
8
+
9
+ The artefact is a pure projection of the consensus data plus the
10
+ per-member deliberation texts — no extra model calls. Schema is
11
+ documented in ``docs/contracts/ai-council-config.md`` under
12
+ "Decision-replay schema".
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from dataclasses import dataclass
18
+ from typing import Iterable, Sequence
19
+
20
+ from scripts.ai_council.clients import CouncilResponse
21
+ from scripts.ai_council.consensus import (
22
+ ConsensusMetadata,
23
+ Finding,
24
+ FindingScore,
25
+ )
26
+
27
+
28
+ @dataclass(frozen=True)
29
+ class DecisionReplayInputs:
30
+ """Bundle accepted by :func:`render_decision_replay`.
31
+
32
+ ``include_member_arguments`` toggles the redacted-vs-full output.
33
+ When ``False`` the artefact emits consensus + dissent COUNT only —
34
+ no per-member arguments — for sharing without leaking which model
35
+ framed which point.
36
+ """
37
+
38
+ findings: Sequence[Finding]
39
+ scores: Sequence[FindingScore]
40
+ metadata: dict[str, ConsensusMetadata]
41
+ deliberation: Sequence[CouncilResponse] # last-round per-member texts
42
+ original_ask: str = ""
43
+ include_member_arguments: bool = True
44
+
45
+
46
+ def _verdict(strength: float) -> str:
47
+ """Single-word verdict band for a consensus_strength."""
48
+ if strength > 0.7:
49
+ return "Strong"
50
+ if strength > 0.4:
51
+ return "Moderate"
52
+ return "Weak"
53
+
54
+
55
+ def _scorer_argument(
56
+ scorer: str,
57
+ member_texts: dict[str, str],
58
+ score: FindingScore | None,
59
+ ) -> str:
60
+ """Return the one-line key argument for ``scorer`` on a finding.
61
+
62
+ Prefers the scorer's ``reason`` field (rich, contextual) and falls
63
+ back to the truncated deliberation snippet so the audit trail never
64
+ surfaces an empty argument.
65
+ """
66
+ if score and score.reason:
67
+ flat = " ".join(score.reason.split())
68
+ if len(flat) > 200:
69
+ flat = flat[:199].rstrip() + "…"
70
+ return flat
71
+ snippet = member_texts.get(scorer, "")
72
+ flat = " ".join(snippet.split())
73
+ if not flat:
74
+ return "no argument captured"
75
+ if len(flat) > 200:
76
+ flat = flat[:199].rstrip() + "…"
77
+ return flat
78
+
79
+
80
+ def _scores_for_finding(
81
+ fid: str, scores: Iterable[FindingScore],
82
+ ) -> dict[str, FindingScore]:
83
+ return {s.scorer: s for s in scores if s.finding_id == fid}
84
+
85
+
86
+ def render_decision_replay(inputs: DecisionReplayInputs) -> str:
87
+ """Render the ``decision-replay.md`` body.
88
+
89
+ Sections (in order): a leading H1 plus the original ask blockquote,
90
+ one ``## <finding-id> — <truncated text>`` block per finding (ranked
91
+ by consensus_strength desc), and a trailing footer with the toggle
92
+ state so consumers can tell at a glance whether arguments were
93
+ redacted.
94
+ """
95
+ member_texts = {f"{r.provider}:{r.model}": r.text or "" for r in inputs.deliberation}
96
+ ranked = sorted(
97
+ inputs.findings,
98
+ key=lambda f: inputs.metadata.get(
99
+ f.id,
100
+ ConsensusMetadata(
101
+ finding_id=f.id, consensus_strength=0.0, dissent_count=0,
102
+ scorers=(), mean_score=0.0,
103
+ ),
104
+ ).consensus_strength,
105
+ reverse=True,
106
+ )
107
+ lines: list[str] = ["# Decision Replay\n"]
108
+ if inputs.original_ask.strip():
109
+ ask = " ".join(inputs.original_ask.split())
110
+ if len(ask) > 400:
111
+ ask = ask[:399].rstrip() + "…"
112
+ lines.append(f"> {ask}\n")
113
+ if not ranked:
114
+ lines.append("*No findings were extracted for this session.*\n")
115
+ return "\n".join(lines).rstrip() + "\n"
116
+ for f in ranked:
117
+ m = inputs.metadata.get(f.id)
118
+ if m is None:
119
+ m = ConsensusMetadata(
120
+ finding_id=f.id, consensus_strength=0.0, dissent_count=0,
121
+ scorers=(), mean_score=0.0,
122
+ )
123
+ title = " ".join(f.text.split())
124
+ if len(title) > 120:
125
+ title = title[:119].rstrip() + "…"
126
+ verdict = _verdict(m.consensus_strength)
127
+ lines.append(f"## {f.id} — {title}\n")
128
+ lines.append(
129
+ f"- **Consensus**: {verdict} ({m.consensus_strength:.2f})\n"
130
+ f"- **Evidence quality**: {m.evidence_quality} "
131
+ f"(mean {m.mean_score:.1f}/10)\n"
132
+ f"- **Agreement**: {m.concur_count}/"
133
+ f"{m.concur_count + m.dissent_count} members concur, "
134
+ f"{m.dissent_count} dissent\n",
135
+ )
136
+ if inputs.include_member_arguments:
137
+ score_map = _scores_for_finding(f.id, inputs.scores)
138
+ agreeing = [s for s in m.scorers if score_map.get(s) and score_map[s].agree]
139
+ dissent = [pair for pair in m.dissent_reasons]
140
+ if agreeing:
141
+ lines.append("**Agreeing members**:")
142
+ for scorer in agreeing:
143
+ arg = _scorer_argument(scorer, member_texts, score_map.get(scorer))
144
+ lines.append(f"- _{scorer}_ — {arg}")
145
+ lines.append("")
146
+ if dissent:
147
+ lines.append("**Dissenting members**:")
148
+ for scorer, reason in dissent:
149
+ arg = _scorer_argument(scorer, member_texts, score_map.get(scorer))
150
+ lines.append(f"- _{scorer}_ — {arg}")
151
+ lines.append("")
152
+ lines.append(f"**Synthesis verdict**: {verdict} consensus — {f.source} sourced.\n")
153
+ mode_label = "full" if inputs.include_member_arguments else "redacted (counts only)"
154
+ lines.append(f"---\n\n_artefact mode: {mode_label}_\n")
155
+ return "\n".join(lines).rstrip() + "\n"
@@ -71,14 +71,32 @@ def _utc_timestamp() -> str:
71
71
 
72
72
 
73
73
  def _serialise_response(r: CouncilResponse) -> dict[str, object]:
74
- return {
74
+ """Project a `CouncilResponse` into the manifest schema.
75
+
76
+ Phase 5 / Step 1 — surface ``transport``, ``billable``,
77
+ ``subscription_label``, ``cost_usd``, and ``tokens_estimated`` so
78
+ the audit trail can distinguish flat-rate CLI calls from billable
79
+ api / community-CLI calls. When ``tokens_estimated`` is true the
80
+ token counts are kept (heuristic) but flagged so consumers can
81
+ null or disclaim them.
82
+ """
83
+ meta = r.metadata or {}
84
+ payload: dict[str, object] = {
75
85
  "provider": r.provider,
76
86
  "model": r.model,
77
87
  "input_tokens": r.input_tokens,
78
88
  "output_tokens": r.output_tokens,
79
89
  "latency_ms": r.latency_ms,
80
90
  "error": r.error,
91
+ "transport": meta.get("transport", "api"),
92
+ "billable": bool(meta.get("billable", True)),
93
+ "tokens_estimated": bool(meta.get("tokens_estimated", False)),
81
94
  }
95
+ if meta.get("subscription_label"):
96
+ payload["subscription_label"] = meta["subscription_label"]
97
+ if "cost_usd" in meta:
98
+ payload["cost_usd"] = meta["cost_usd"]
99
+ return payload
82
100
 
83
101
 
84
102
  def _load_retention_days(settings_path: Path | None = None) -> int:
@@ -0,0 +1,235 @@
1
+ """Shadow-mode dispatch for low-impact solo-member decisions (step-9 P10).
2
+
3
+ When ``low_impact.dispatch: single`` is active, a Bernoulli-sampled subset
4
+ of decisions is shadowed through the full council so disagreement between
5
+ the solo verdict and the council verdict can be measured. The shadow log
6
+ lives at ``agents/council-shadow-log.jsonl`` and is subject to the same
7
+ privacy floor as the low-impact corpus: redactor-refused entries are
8
+ dropped, not softened.
9
+
10
+ The flip from ``single`` back to ``full`` is a user decision; this module
11
+ emits data and an SLO banner, nothing else.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import hashlib
17
+ import json
18
+ import random
19
+ from dataclasses import dataclass
20
+ from datetime import datetime, timedelta, timezone
21
+ from pathlib import Path
22
+ from typing import Iterable
23
+
24
+ from scripts.ai_council.bundler import redact
25
+
26
+ SHADOW_LOG_PATH = Path("agents/council-shadow-log.jsonl")
27
+
28
+ SLO_THRESHOLD_WARN = 0.05
29
+ SLO_THRESHOLD_BREACH = 0.08
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class ShadowDecision:
34
+ timestamp: str
35
+ query_hash: str
36
+ solo_verdict: str
37
+ full_verdict: str
38
+ agreed: bool
39
+ #: Step-9 P13 — True when the confidence gate auto-escalated this
40
+ #: decision to the full council. Distinguishes "silent disagreement"
41
+ #: (escalated=False, agreed=False) from "gate-caught" (escalated=True)
42
+ #: in the SLO banner.
43
+ escalated: bool = False
44
+ escalation_reason: str = "ok"
45
+
46
+
47
+ def should_shadow(
48
+ sample_rate: float,
49
+ *,
50
+ rng: random.Random | None = None,
51
+ ) -> bool:
52
+ rate = max(0.0, min(1.0, sample_rate))
53
+ r = rng if rng is not None else random
54
+ return r.random() < rate
55
+
56
+
57
+ def _hash_query(query: str) -> str:
58
+ redacted = redact(query)
59
+ return hashlib.sha256(redacted.encode("utf-8")).hexdigest()[:16]
60
+
61
+
62
+ def _privacy_dropped(redacted: str) -> bool:
63
+ stripped = redacted.strip()
64
+ if not stripped:
65
+ return True
66
+ return stripped.startswith("[redacted")
67
+
68
+
69
+ def record_shadow_decision(
70
+ log_path: Path,
71
+ *,
72
+ query: str,
73
+ solo_verdict: str,
74
+ full_verdict: str,
75
+ escalated: bool = False,
76
+ escalation_reason: str = "ok",
77
+ ) -> ShadowDecision | None:
78
+ """Append one JSONL row. Returns ``None`` when redaction would drop
79
+ the entry (privacy floor — do not soften).
80
+
81
+ ``escalated`` / ``escalation_reason`` come from the confidence
82
+ gate (step-9 P13). When True, ``solo_verdict`` is the rejected
83
+ solo response and ``full_verdict`` is the council's verdict that
84
+ actually answered the user.
85
+ """
86
+ redacted_q = redact(query)
87
+ if _privacy_dropped(redacted_q):
88
+ return None
89
+
90
+ decision = ShadowDecision(
91
+ timestamp=datetime.now(timezone.utc).isoformat(timespec="seconds"),
92
+ query_hash=_hash_query(query),
93
+ solo_verdict=solo_verdict,
94
+ full_verdict=full_verdict,
95
+ agreed=(solo_verdict == full_verdict),
96
+ escalated=escalated,
97
+ escalation_reason=escalation_reason,
98
+ )
99
+ log_path.parent.mkdir(parents=True, exist_ok=True)
100
+ with log_path.open("a", encoding="utf-8") as f:
101
+ f.write(json.dumps({
102
+ "timestamp": decision.timestamp,
103
+ "query_hash": decision.query_hash,
104
+ "solo_verdict": decision.solo_verdict,
105
+ "full_verdict": decision.full_verdict,
106
+ "agreed": decision.agreed,
107
+ "escalated": decision.escalated,
108
+ "escalation_reason": decision.escalation_reason,
109
+ }) + "\n")
110
+ return decision
111
+
112
+
113
+ def _iter_log(log_path: Path) -> Iterable[dict]:
114
+ if not log_path.exists():
115
+ return
116
+ with log_path.open("r", encoding="utf-8") as f:
117
+ for line in f:
118
+ line = line.strip()
119
+ if not line:
120
+ continue
121
+ try:
122
+ yield json.loads(line)
123
+ except json.JSONDecodeError:
124
+ continue
125
+
126
+
127
+ def compute_disagreement_rate(
128
+ log_path: Path,
129
+ *,
130
+ window_days: int = 7,
131
+ now: datetime | None = None,
132
+ ) -> tuple[float, int]:
133
+ """``(disagreement_rate, sample_count)`` over the rolling window.
134
+
135
+ Counts a row as "disagreed" when ``agreed=False`` regardless of
136
+ the escalation flag — a gate-caught split is still a sign that
137
+ solo mode was wrong on that decision. :func:`compute_escalation_rate`
138
+ breaks the same window down by ``escalated=True`` for the banner.
139
+ """
140
+ cutoff = (now or datetime.now(timezone.utc)) - timedelta(days=window_days)
141
+ total = 0
142
+ disagreed = 0
143
+ for row in _iter_log(log_path):
144
+ raw_ts = row.get("timestamp", "")
145
+ try:
146
+ ts = datetime.fromisoformat(raw_ts.replace("Z", "+00:00"))
147
+ except ValueError:
148
+ continue
149
+ if ts.tzinfo is None:
150
+ ts = ts.replace(tzinfo=timezone.utc)
151
+ if ts < cutoff:
152
+ continue
153
+ total += 1
154
+ if not row.get("agreed", True):
155
+ disagreed += 1
156
+ if total == 0:
157
+ return 0.0, 0
158
+ return disagreed / total, total
159
+
160
+
161
+ def compute_escalation_rate(
162
+ log_path: Path,
163
+ *,
164
+ window_days: int = 7,
165
+ now: datetime | None = None,
166
+ ) -> tuple[float, int]:
167
+ """``(escalation_rate, sample_count)`` — fraction with ``escalated=True``.
168
+
169
+ Step-9 P13 — separates gate-caught escalations from silent
170
+ disagreement so the banner can name the dominant failure mode.
171
+ """
172
+ cutoff = (now or datetime.now(timezone.utc)) - timedelta(days=window_days)
173
+ total = 0
174
+ escalated = 0
175
+ for row in _iter_log(log_path):
176
+ raw_ts = row.get("timestamp", "")
177
+ try:
178
+ ts = datetime.fromisoformat(raw_ts.replace("Z", "+00:00"))
179
+ except ValueError:
180
+ continue
181
+ if ts.tzinfo is None:
182
+ ts = ts.replace(tzinfo=timezone.utc)
183
+ if ts < cutoff:
184
+ continue
185
+ total += 1
186
+ if row.get("escalated", False):
187
+ escalated += 1
188
+ if total == 0:
189
+ return 0.0, 0
190
+ return escalated / total, total
191
+
192
+
193
+ def slo_status(rate: float) -> str:
194
+ if rate < SLO_THRESHOLD_WARN:
195
+ return "OK"
196
+ if rate < SLO_THRESHOLD_BREACH:
197
+ return "WARN"
198
+ return "BREACH"
199
+
200
+
201
+ def slo_banner(
202
+ rate: float,
203
+ sample_count: int,
204
+ *,
205
+ escalation_rate: float | None = None,
206
+ ) -> str:
207
+ """One-line SLO banner. ``escalation_rate`` is appended when given.
208
+
209
+ Step-9 P13 — escalation tail surfaces the share of decisions the
210
+ confidence gate caught before they reached the user.
211
+ """
212
+ pct = rate * 100
213
+ status = slo_status(rate)
214
+ if sample_count == 0:
215
+ return "[shadow SLO] no samples yet"
216
+ if status == "OK":
217
+ base = (
218
+ f"[shadow SLO] OK · {pct:.1f}% disagreement over "
219
+ f"{sample_count} samples (<5%)"
220
+ )
221
+ elif status == "WARN":
222
+ base = (
223
+ f"[shadow SLO] WARN · {pct:.1f}% disagreement over "
224
+ f"{sample_count} samples (5–8% — consider reverting to "
225
+ f"low_impact.dispatch: full)"
226
+ )
227
+ else:
228
+ base = (
229
+ f"[shadow SLO] BREACH · {pct:.1f}% disagreement over "
230
+ f"{sample_count} samples (>8% — revert to "
231
+ f"low_impact.dispatch: full)"
232
+ )
233
+ if escalation_rate is not None:
234
+ base += f" · {escalation_rate * 100:.1f}% auto-escalated"
235
+ return base
@@ -0,0 +1,226 @@
1
+ """Solo-member dispatch — step-9 P9 (U2).
2
+
3
+ Picks the first enabled, auth-valid member from
4
+ ``routing.solo_member_fallback_chain`` so low-impact decisions can
5
+ optionally route to a single member instead of the full council. The
6
+ selection is intentionally side-effect-free: callers own logging,
7
+ dispatch, and the all-invalid → full-council fallback.
8
+
9
+ Iron Law: a None selection from :func:`select_solo_member` is the
10
+ caller's signal to fall back to the full council with a WARN log —
11
+ NEVER to fail the decision. The dispatcher must never break a
12
+ user's flow because a CLI was offline.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import os
18
+ import time
19
+ from dataclasses import dataclass, field
20
+ from typing import Callable, Mapping
21
+
22
+ from scripts.ai_council.config import MemberConfig, RoutingConfig
23
+ from scripts.ai_council.confidence_gate import (
24
+ EscalationDecision,
25
+ should_escalate,
26
+ )
27
+
28
+ #: TTL for cached auth-probe results. Lazy probe per session; bumped
29
+ #: forward whenever a probe is re-run.
30
+ _AUTH_CACHE_TTL_SECONDS = 15 * 60
31
+
32
+ #: Env var that forces every solo-dispatch path back to full council
33
+ #: for the current invocation. Honored by :func:`select_solo_member`
34
+ #: and surfaced through :func:`force_full_council`.
35
+ FORCE_FULL_ENV = "AGENT_CONFIG_FORCE_FULL_COUNCIL"
36
+
37
+
38
+ @dataclass
39
+ class AuthCacheEntry:
40
+ """One auth-probe result with the expiry it was cached against."""
41
+
42
+ valid: bool
43
+ expires_at: float
44
+
45
+
46
+ @dataclass
47
+ class AuthCache:
48
+ """In-memory cache for auth-probe verdicts (per-process)."""
49
+
50
+ entries: dict[str, AuthCacheEntry] = field(default_factory=dict)
51
+
52
+ def get(self, name: str, *, now: float) -> bool | None:
53
+ entry = self.entries.get(name)
54
+ if entry is None or entry.expires_at <= now:
55
+ return None
56
+ return entry.valid
57
+
58
+ def set(self, name: str, *, valid: bool, now: float) -> None:
59
+ self.entries[name] = AuthCacheEntry(
60
+ valid=valid, expires_at=now + _AUTH_CACHE_TTL_SECONDS,
61
+ )
62
+
63
+
64
+ def force_full_council(env: Mapping[str, str] | None = None) -> bool:
65
+ """Return True iff the env-var override is set to ``1``.
66
+
67
+ Truthy values other than ``1`` are intentionally rejected — the
68
+ override is a hard one-bit switch, not a free-form bool.
69
+ """
70
+ src = env if env is not None else os.environ
71
+ return src.get(FORCE_FULL_ENV, "") == "1"
72
+
73
+
74
+ def select_solo_member(
75
+ routing: RoutingConfig,
76
+ members: Mapping[str, MemberConfig],
77
+ *,
78
+ auth_cache: AuthCache,
79
+ probe: Callable[[str, float], bool],
80
+ now: float | None = None,
81
+ env: Mapping[str, str] | None = None,
82
+ ) -> str | None:
83
+ """Return the first chain entry whose member is enabled + auth-valid.
84
+
85
+ Walks ``routing.solo_member_fallback_chain`` in order. For each
86
+ entry: skip when the member is missing or disabled; consult the
87
+ auth cache; on miss probe lazily with the configured timeout and
88
+ cache the result. Returns the provider name of the first valid
89
+ member, or ``None`` when every chain entry is unavailable.
90
+
91
+ ``probe(name, timeout_s) -> bool`` is the caller-supplied auth
92
+ check. It MUST honor ``timeout_s`` and return False on timeout
93
+ so the dispatcher cannot stall on a wedged CLI.
94
+
95
+ Env-var override (``AGENT_CONFIG_FORCE_FULL_COUNCIL=1``) short-
96
+ circuits to None, treating the whole chain as unavailable. The
97
+ caller still owns the WARN log + full-council escalation.
98
+ """
99
+ if force_full_council(env):
100
+ return None
101
+ if now is None:
102
+ now = time.monotonic()
103
+ timeout_s = routing.auth_check_timeout_seconds
104
+ for name in routing.solo_member_fallback_chain:
105
+ member = members.get(name)
106
+ if member is None or not member.enabled:
107
+ continue
108
+ cached = auth_cache.get(name, now=now)
109
+ if cached is False:
110
+ continue
111
+ if cached is True:
112
+ return name
113
+ try:
114
+ valid = bool(probe(name, timeout_s))
115
+ except Exception:
116
+ # Probe blew up — treat as auth-invalid so the chain
117
+ # walks to the next entry. Don't swallow silently in
118
+ # production: callers should log probe failures.
119
+ valid = False
120
+ auth_cache.set(name, valid=valid, now=now)
121
+ if valid:
122
+ return name
123
+ return None
124
+
125
+
126
+ @dataclass(frozen=True)
127
+ class SoloDispatchResult:
128
+ """Outcome of :func:`dispatch_with_escalation`.
129
+
130
+ ``verdict`` is the final answer text returned to the caller.
131
+ ``escalated`` is True when the solo response was rejected by the
132
+ confidence gate and the full council ran. ``solo_member`` /
133
+ ``solo_response`` are populated even on escalation so the shadow
134
+ log can record both sides without re-running the solo step.
135
+ """
136
+
137
+ verdict: str
138
+ escalated: bool
139
+ escalation_reason: str # 'low_confidence' | 'split' | 'refusal' | 'short_response' | 'ok' | 'no_solo_member'
140
+ solo_member: str | None
141
+ solo_response: str | None
142
+ solo_confidence: float | None
143
+
144
+
145
+ def dispatch_with_escalation(
146
+ routing: RoutingConfig,
147
+ members: Mapping[str, MemberConfig],
148
+ *,
149
+ auth_cache: AuthCache,
150
+ probe: Callable[[str, float], bool],
151
+ run_solo: Callable[[str], str],
152
+ run_full: Callable[[], str],
153
+ confidence_floor: float,
154
+ now: float | None = None,
155
+ env: Mapping[str, str] | None = None,
156
+ ) -> SoloDispatchResult:
157
+ """Solo-dispatch with auto-escalation on low-confidence / split / refusal.
158
+
159
+ Step-9 P13 — defense-in-depth on top of shadow-mode SLO.
160
+
161
+ Flow:
162
+
163
+ 1. ``select_solo_member`` picks the chain entry.
164
+ 2. None → escalate immediately (``no_solo_member``).
165
+ 3. ``run_solo`` is invoked; response is scored via
166
+ :func:`scripts.ai_council.confidence_gate.should_escalate`.
167
+ 4. Verdict ``escalate=True`` → ``run_full`` is invoked and that
168
+ verdict is returned; the solo response stays on the result
169
+ for shadow logging.
170
+ 5. ``escalate=False`` → solo verdict is returned as-is.
171
+
172
+ ``run_solo(name) -> str`` and ``run_full() -> str`` are caller-
173
+ supplied; this module owns no LLM transport. Callers MUST raise
174
+ on transport errors — escalation is for *content* low-confidence,
175
+ not infrastructure failures (those bubble up to the orchestrator's
176
+ own retry / fallback policy).
177
+ """
178
+ name = select_solo_member(
179
+ routing,
180
+ members,
181
+ auth_cache=auth_cache,
182
+ probe=probe,
183
+ now=now,
184
+ env=env,
185
+ )
186
+ if name is None:
187
+ return SoloDispatchResult(
188
+ verdict=run_full(),
189
+ escalated=True,
190
+ escalation_reason="no_solo_member",
191
+ solo_member=None,
192
+ solo_response=None,
193
+ solo_confidence=None,
194
+ )
195
+ solo = run_solo(name)
196
+ decision: EscalationDecision = should_escalate(solo, floor=confidence_floor)
197
+ if decision.escalate:
198
+ return SoloDispatchResult(
199
+ verdict=run_full(),
200
+ escalated=True,
201
+ escalation_reason=decision.reason,
202
+ solo_member=name,
203
+ solo_response=solo,
204
+ solo_confidence=decision.confidence,
205
+ )
206
+ return SoloDispatchResult(
207
+ verdict=solo,
208
+ escalated=False,
209
+ escalation_reason="ok",
210
+ solo_member=name,
211
+ solo_response=solo,
212
+ solo_confidence=decision.confidence,
213
+ )
214
+
215
+
216
+ __all__ = [
217
+ "AUTH_CACHE_TTL_SECONDS",
218
+ "AuthCache",
219
+ "AuthCacheEntry",
220
+ "FORCE_FULL_ENV",
221
+ "SoloDispatchResult",
222
+ "dispatch_with_escalation",
223
+ "force_full_council",
224
+ "select_solo_member",
225
+ ]
226
+ AUTH_CACHE_TTL_SECONDS = _AUTH_CACHE_TTL_SECONDS