@event4u/agent-config 2.12.0 → 2.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/council/analysis.md +142 -0
- package/.agent-src/commands/council/debate.md +129 -0
- package/.agent-src/commands/council/default.md +8 -0
- package/.agent-src/commands/council/design.md +16 -12
- package/.agent-src/commands/council/optimize.md +16 -15
- package/.agent-src/commands/council/pr.md +12 -12
- package/.agent-src/commands/council.md +48 -2
- package/.agent-src/commands/memory/learn-low-impact.md +143 -0
- package/.agent-src/personas/advisors/contrarian.md +95 -0
- package/.agent-src/personas/advisors/executor.md +99 -0
- package/.agent-src/personas/advisors/expansionist.md +98 -0
- package/.agent-src/personas/advisors/first-principles.md +98 -0
- package/.agent-src/personas/advisors/outsider.md +102 -0
- package/.agent-src/rules/ask-when-uncertain.md +10 -6
- package/.agent-src/rules/copilot-routing.md +19 -0
- package/.agent-src/rules/devcontainer-routing.md +20 -0
- package/.agent-src/rules/external-reference-deep-dive.md +1 -1
- package/.agent-src/rules/fast-path-marker-visibility.md +38 -0
- package/.agent-src/rules/laravel-routing.md +20 -0
- package/.agent-src/rules/low-impact-corpus-privacy-floor.md +74 -0
- package/.agent-src/rules/symfony-routing.md +20 -0
- package/.agent-src/skills/ai-council/SKILL.md +388 -10
- package/.agent-src/skills/copilot-config/SKILL.md +1 -1
- package/.agent-src/skills/devcontainer/SKILL.md +1 -1
- package/.agent-src/skills/laravel/SKILL.md +1 -1
- package/.agent-src/skills/project-analysis-core/SKILL.md +1 -1
- package/.agent-src/skills/project-analyzer/SKILL.md +1 -1
- package/.agent-src/skills/symfony-workflow/SKILL.md +1 -1
- package/.agent-src/skills/universal-project-analysis/SKILL.md +1 -1
- package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
- package/.claude-plugin/marketplace.json +4 -1
- package/AGENTS.md +1 -1
- package/CHANGELOG.md +346 -124
- package/CONTRIBUTING.md +5 -0
- package/README.md +6 -6
- package/config/agent-settings.template.yml +5 -93
- package/config/gitignore-block.txt +6 -0
- package/docs/architecture/multi-tool-projection.md +53 -0
- package/docs/architecture/{compression.md → source-projection.md} +21 -3
- package/docs/architecture.md +15 -15
- package/docs/archive/CHANGELOG-pre-2.11.0.md +141 -0
- package/docs/catalog.md +25 -12
- package/docs/contracts/adr-architectural-consensus-mechanism.md +68 -0
- package/docs/contracts/adr-level-6-productization.md +7 -9
- package/docs/contracts/ai-council-config.md +658 -0
- package/docs/contracts/command-clusters.md +58 -2
- package/docs/contracts/command-surface-tiers.md +3 -2
- package/docs/contracts/cost-profile-defaults.md +5 -0
- package/docs/contracts/decision-engine-gates.md +5 -0
- package/docs/contracts/decision-trace-v1.md +2 -2
- package/docs/contracts/file-ownership-matrix.json +1735 -72
- package/docs/contracts/installed-tools-lockfile.md +2 -1
- package/docs/contracts/low-impact-corpus-format.md +95 -0
- package/docs/contracts/mcp-beta-criteria.md +6 -5
- package/docs/contracts/mcp-cloud-scope.md +5 -4
- package/docs/contracts/multi-tool-projection-fidelity.md +115 -0
- package/docs/contracts/release-trunk-sync.md +4 -3
- package/docs/contracts/tier-3-contrib-plugin.md +5 -6
- package/docs/getting-started.md +2 -2
- package/docs/guidelines/agent-infra/installed-tools-manifest.md +2 -1
- package/docs/installation.md +32 -0
- package/package.json +1 -1
- package/scripts/_archive/README.md +59 -0
- package/scripts/_cli/cmd_doctor.py +134 -0
- package/scripts/ai_council/_default_prices.py +10 -1
- package/scripts/ai_council/advisors.py +148 -0
- package/scripts/ai_council/airgap.py +165 -0
- package/scripts/ai_council/cli_hints.py +123 -0
- package/scripts/ai_council/clients.py +959 -5
- package/scripts/ai_council/compile_corpus.py +178 -0
- package/scripts/ai_council/confidence_gate.py +156 -0
- package/scripts/ai_council/config.py +1364 -0
- package/scripts/ai_council/consensus.py +329 -0
- package/scripts/ai_council/events_log.py +137 -0
- package/scripts/ai_council/learn_low_impact_preview.py +252 -0
- package/scripts/ai_council/low_impact.py +714 -0
- package/scripts/ai_council/low_impact_corpus.py +466 -0
- package/scripts/ai_council/low_impact_intake.py +163 -0
- package/scripts/ai_council/modes.py +6 -1
- package/scripts/ai_council/necessity.py +782 -0
- package/scripts/ai_council/orchestrator.py +872 -20
- package/scripts/ai_council/probation_gate.py +152 -0
- package/scripts/ai_council/prompts.py +335 -0
- package/scripts/ai_council/redact_low_impact_entry.py +155 -0
- package/scripts/ai_council/replay.py +155 -0
- package/scripts/ai_council/session.py +19 -1
- package/scripts/ai_council/shadow_dispatch.py +235 -0
- package/scripts/ai_council/solo_dispatch.py +226 -0
- package/scripts/audit_cloud_compatibility.py +74 -0
- package/scripts/audit_command_surface.py +363 -0
- package/scripts/check_compressed_paths.py +6 -1
- package/scripts/check_council_layout.py +11 -0
- package/scripts/ci_time_ratio.py +168 -0
- package/scripts/council_cli.py +2005 -30
- package/scripts/install.sh +12 -0
- package/scripts/measure_projection_bytes.py +159 -0
- package/scripts/measure_roadmap_trajectory.py +112 -0
- package/scripts/probe_projection_fidelity.py +202 -0
- package/scripts/score_skill_selection.py +198 -0
- package/scripts/skill_collision_clusters.py +162 -0
- /package/scripts/{_backfill_skill_domains.py → _archive/_backfill_skill_domains.py} +0 -0
- /package/scripts/{_bootstrap_tier_frontmatter.py → _archive/_bootstrap_tier_frontmatter.py} +0 -0
- /package/scripts/{_p43_bodies.py → _archive/_p43_bodies.py} +0 -0
- /package/scripts/{_p43_compress.py → _archive/_p43_compress.py} +0 -0
- /package/scripts/{_p4_migrate.py → _archive/_p4_migrate.py} +0 -0
- /package/scripts/{_phase2_shim_helper.py → _archive/_phase2_shim_helper.py} +0 -0
- /package/scripts/{_pilot_council_question.py → _archive/_pilot_council_question.py} +0 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
"""Consensus scoring for the analysis lens (Phase 4 / F3).
|
|
2
|
+
|
|
3
|
+
After the final deliberation round, members score each other's
|
|
4
|
+
findings. The renderer ranks findings by consensus and surfaces a
|
|
5
|
+
"Minority Views" section for sub-threshold items so they remain
|
|
6
|
+
audit-trail signal rather than silent drop.
|
|
7
|
+
|
|
8
|
+
Schema (Opus's machine-readable contract):
|
|
9
|
+
|
|
10
|
+
Finding — `{id: str, source: str, text: str}`
|
|
11
|
+
FindingScore — `{finding_id: str, scorer: str, score: 1..10,
|
|
12
|
+
agree: bool, reason: str}`
|
|
13
|
+
ConsensusMetadata — per-finding aggregate:
|
|
14
|
+
`{finding_id, consensus_strength: 0..1,
|
|
15
|
+
dissent_count, scorers, mean_score}`
|
|
16
|
+
|
|
17
|
+
Threshold bucketing (Phase 4 Step 3):
|
|
18
|
+
|
|
19
|
+
consensus_strength > strong → Strong Consensus
|
|
20
|
+
minority < strength <= strong → Findings (default body)
|
|
21
|
+
strength <= minority → Minority Views
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import json
|
|
27
|
+
import re
|
|
28
|
+
from dataclasses import dataclass, field
|
|
29
|
+
from typing import Iterable
|
|
30
|
+
|
|
31
|
+
_JSON_BLOCK = re.compile(r"```(?:json)?\s*(\[.*?\])\s*```", re.DOTALL)
|
|
32
|
+
_BARE_ARRAY = re.compile(r"(\[\s*\{.*?\}\s*\])", re.DOTALL)
|
|
33
|
+
|
|
34
|
+
# Defaults mirror the roadmap (Phase 4 Step 4). The .agent-settings.yml
|
|
35
|
+
# block overrides them at run time.
|
|
36
|
+
DEFAULT_STRONG_THRESHOLD: float = 0.7
|
|
37
|
+
DEFAULT_MINORITY_THRESHOLD: float = 0.4
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(frozen=True)
|
|
41
|
+
class Finding:
|
|
42
|
+
"""One finding extracted from a member's deliberation output."""
|
|
43
|
+
|
|
44
|
+
id: str
|
|
45
|
+
source: str # provider/model that authored the finding
|
|
46
|
+
text: str
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass(frozen=True)
|
|
50
|
+
class FindingScore:
|
|
51
|
+
"""One scorer's vote on one finding."""
|
|
52
|
+
|
|
53
|
+
finding_id: str
|
|
54
|
+
scorer: str
|
|
55
|
+
score: int # 1..10
|
|
56
|
+
agree: bool
|
|
57
|
+
reason: str
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def evidence_quality(mean_score: float) -> str:
|
|
61
|
+
"""Classify mean score into a single-letter evidence-quality bucket.
|
|
62
|
+
|
|
63
|
+
H (high) — mean ≥ 8.0; member agreement ran high.
|
|
64
|
+
M (medium) — 6.0 ≤ mean < 8.0; majority support, mixed conviction.
|
|
65
|
+
L (low) — mean < 6.0 or no scorers; weak or contested.
|
|
66
|
+
|
|
67
|
+
Used by Phase 9 to surface a quick "how much did members back this"
|
|
68
|
+
signal next to the raw consensus_strength number.
|
|
69
|
+
"""
|
|
70
|
+
if mean_score >= 8.0:
|
|
71
|
+
return "H"
|
|
72
|
+
if mean_score >= 6.0:
|
|
73
|
+
return "M"
|
|
74
|
+
return "L"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass(frozen=True)
|
|
78
|
+
class ConsensusMetadata:
|
|
79
|
+
"""Aggregate consensus stats for a single finding.
|
|
80
|
+
|
|
81
|
+
Phase 9 adds ``concur_count``, ``dissent_reasons`` (per-scorer
|
|
82
|
+
one-line rationales for disagreement), and ``evidence_quality``
|
|
83
|
+
(H/M/L bucket of the mean score) so the renderer can emit
|
|
84
|
+
"N/M members concur; X dissented citing …; mean evidence-quality H"
|
|
85
|
+
without needing the underlying FindingScore list.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
finding_id: str
|
|
89
|
+
consensus_strength: float # 0..1
|
|
90
|
+
dissent_count: int
|
|
91
|
+
scorers: tuple[str, ...]
|
|
92
|
+
mean_score: float
|
|
93
|
+
concur_count: int = 0
|
|
94
|
+
dissent_reasons: tuple[tuple[str, str], ...] = () # (scorer, reason)
|
|
95
|
+
evidence_quality: str = "L"
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@dataclass(frozen=True)
|
|
99
|
+
class ConsensusBucket:
|
|
100
|
+
"""Threshold-bucketed findings ready for renderer sectioning."""
|
|
101
|
+
|
|
102
|
+
strong: list[tuple[Finding, ConsensusMetadata]] = field(default_factory=list)
|
|
103
|
+
findings: list[tuple[Finding, ConsensusMetadata]] = field(default_factory=list)
|
|
104
|
+
minority: list[tuple[Finding, ConsensusMetadata]] = field(default_factory=list)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def aggregate_scores(
|
|
108
|
+
findings: Iterable[Finding],
|
|
109
|
+
scores: Iterable[FindingScore],
|
|
110
|
+
) -> dict[str, ConsensusMetadata]:
|
|
111
|
+
"""Aggregate per-finding scores into ConsensusMetadata.
|
|
112
|
+
|
|
113
|
+
`consensus_strength` = mean(score) / 10 * agreement_rate.
|
|
114
|
+
|
|
115
|
+
A finding's *own author* is never expected to score it; we drop
|
|
116
|
+
self-scores defensively to keep the aggregate honest. Missing
|
|
117
|
+
findings get zero scorers (strength=0, dissent_count=0).
|
|
118
|
+
"""
|
|
119
|
+
by_id: dict[str, list[FindingScore]] = {f.id: [] for f in findings}
|
|
120
|
+
sources: dict[str, str] = {f.id: f.source for f in findings}
|
|
121
|
+
for s in scores:
|
|
122
|
+
if s.finding_id not in by_id:
|
|
123
|
+
continue
|
|
124
|
+
if s.scorer == sources[s.finding_id]:
|
|
125
|
+
continue # ignore self-scores
|
|
126
|
+
by_id[s.finding_id].append(s)
|
|
127
|
+
out: dict[str, ConsensusMetadata] = {}
|
|
128
|
+
for fid, fs in by_id.items():
|
|
129
|
+
if not fs:
|
|
130
|
+
out[fid] = ConsensusMetadata(
|
|
131
|
+
finding_id=fid, consensus_strength=0.0,
|
|
132
|
+
dissent_count=0, scorers=(), mean_score=0.0,
|
|
133
|
+
concur_count=0, dissent_reasons=(), evidence_quality="L",
|
|
134
|
+
)
|
|
135
|
+
continue
|
|
136
|
+
mean = sum(s.score for s in fs) / len(fs)
|
|
137
|
+
agree_rate = sum(1 for s in fs if s.agree) / len(fs)
|
|
138
|
+
strength = (mean / 10.0) * agree_rate
|
|
139
|
+
dissent = sum(1 for s in fs if not s.agree)
|
|
140
|
+
concur = sum(1 for s in fs if s.agree)
|
|
141
|
+
scorers = tuple(s.scorer for s in fs)
|
|
142
|
+
# Phase 9 — collect (scorer, reason) pairs for dissenters only,
|
|
143
|
+
# in scoring order, so the renderer surfaces who pushed back
|
|
144
|
+
# and why without re-walking the FindingScore list.
|
|
145
|
+
dissent_reasons = tuple(
|
|
146
|
+
(s.scorer, s.reason) for s in fs if not s.agree
|
|
147
|
+
)
|
|
148
|
+
mean_rounded = round(mean, 2)
|
|
149
|
+
out[fid] = ConsensusMetadata(
|
|
150
|
+
finding_id=fid, consensus_strength=round(strength, 3),
|
|
151
|
+
dissent_count=dissent, scorers=scorers,
|
|
152
|
+
mean_score=mean_rounded,
|
|
153
|
+
concur_count=concur, dissent_reasons=dissent_reasons,
|
|
154
|
+
evidence_quality=evidence_quality(mean_rounded),
|
|
155
|
+
)
|
|
156
|
+
return out
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def bucket_by_threshold(
|
|
160
|
+
findings: Iterable[Finding],
|
|
161
|
+
metadata: dict[str, ConsensusMetadata],
|
|
162
|
+
*,
|
|
163
|
+
strong: float = DEFAULT_STRONG_THRESHOLD,
|
|
164
|
+
minority: float = DEFAULT_MINORITY_THRESHOLD,
|
|
165
|
+
) -> ConsensusBucket:
|
|
166
|
+
"""Split findings into Strong / Findings / Minority buckets.
|
|
167
|
+
|
|
168
|
+
`strong` and `minority` are the thresholds from
|
|
169
|
+
`.agent-settings.yml::ai_council.consensus_threshold_*`. Findings
|
|
170
|
+
with no metadata (no scorers) fall into the Minority bucket — they
|
|
171
|
+
were uncontested but unsupported.
|
|
172
|
+
"""
|
|
173
|
+
if not 0.0 <= minority <= strong <= 1.0:
|
|
174
|
+
raise ValueError(
|
|
175
|
+
f"Threshold ordering broken: 0 <= {minority} <= {strong} <= 1 required.",
|
|
176
|
+
)
|
|
177
|
+
bucket = ConsensusBucket()
|
|
178
|
+
for f in findings:
|
|
179
|
+
m = metadata.get(f.id)
|
|
180
|
+
if m is None:
|
|
181
|
+
m = ConsensusMetadata(
|
|
182
|
+
finding_id=f.id, consensus_strength=0.0,
|
|
183
|
+
dissent_count=0, scorers=(), mean_score=0.0,
|
|
184
|
+
concur_count=0, dissent_reasons=(), evidence_quality="L",
|
|
185
|
+
)
|
|
186
|
+
if m.consensus_strength > strong:
|
|
187
|
+
bucket.strong.append((f, m))
|
|
188
|
+
elif m.consensus_strength > minority:
|
|
189
|
+
bucket.findings.append((f, m))
|
|
190
|
+
else:
|
|
191
|
+
bucket.minority.append((f, m))
|
|
192
|
+
# Strongest first inside each bucket.
|
|
193
|
+
for lst in (bucket.strong, bucket.findings, bucket.minority):
|
|
194
|
+
lst.sort(key=lambda pair: pair[1].consensus_strength, reverse=True)
|
|
195
|
+
return bucket
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def parse_findings_response(text: str, *, source: str) -> list[Finding]:
|
|
199
|
+
"""Parse a member's structured-findings response into Finding objects.
|
|
200
|
+
|
|
201
|
+
Accepts either a fenced ```json``` block or a bare JSON array. Each
|
|
202
|
+
item must be `{id: str, text: str}` (the `source` is set from the
|
|
203
|
+
`source` arg so we can attribute findings to their author). Items
|
|
204
|
+
missing required keys are skipped silently — extraction is best-
|
|
205
|
+
effort, never raises.
|
|
206
|
+
"""
|
|
207
|
+
array = _extract_json_array(text)
|
|
208
|
+
if not array:
|
|
209
|
+
return []
|
|
210
|
+
try:
|
|
211
|
+
parsed = json.loads(array)
|
|
212
|
+
except json.JSONDecodeError:
|
|
213
|
+
return []
|
|
214
|
+
if not isinstance(parsed, list):
|
|
215
|
+
return []
|
|
216
|
+
out: list[Finding] = []
|
|
217
|
+
for item in parsed:
|
|
218
|
+
if not isinstance(item, dict):
|
|
219
|
+
continue
|
|
220
|
+
fid = item.get("id")
|
|
221
|
+
txt = item.get("text")
|
|
222
|
+
if not fid or not txt:
|
|
223
|
+
continue
|
|
224
|
+
out.append(Finding(id=str(fid), source=source, text=str(txt).strip()))
|
|
225
|
+
return out
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def parse_scores_response(text: str, *, scorer: str) -> list[FindingScore]:
|
|
229
|
+
"""Parse a member's scoring response into FindingScore objects.
|
|
230
|
+
|
|
231
|
+
Each item must be `{finding_id, score, agree, reason}`. Scores are
|
|
232
|
+
clamped to 1..10; non-numeric scores or out-of-range values cause
|
|
233
|
+
the item to be skipped (defensive — never poison aggregates).
|
|
234
|
+
"""
|
|
235
|
+
array = _extract_json_array(text)
|
|
236
|
+
if not array:
|
|
237
|
+
return []
|
|
238
|
+
try:
|
|
239
|
+
parsed = json.loads(array)
|
|
240
|
+
except json.JSONDecodeError:
|
|
241
|
+
return []
|
|
242
|
+
if not isinstance(parsed, list):
|
|
243
|
+
return []
|
|
244
|
+
out: list[FindingScore] = []
|
|
245
|
+
for item in parsed:
|
|
246
|
+
if not isinstance(item, dict):
|
|
247
|
+
continue
|
|
248
|
+
fid = item.get("finding_id") or item.get("id")
|
|
249
|
+
score = item.get("score")
|
|
250
|
+
if not fid or not isinstance(score, (int, float)):
|
|
251
|
+
continue
|
|
252
|
+
score_int = int(score)
|
|
253
|
+
if not 1 <= score_int <= 10:
|
|
254
|
+
continue
|
|
255
|
+
out.append(FindingScore(
|
|
256
|
+
finding_id=str(fid), scorer=scorer, score=score_int,
|
|
257
|
+
agree=bool(item.get("agree", True)),
|
|
258
|
+
reason=str(item.get("reason", "")).strip(),
|
|
259
|
+
))
|
|
260
|
+
return out
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _extract_json_array(text: str) -> str:
|
|
264
|
+
"""Best-effort JSON-array extraction from a model response."""
|
|
265
|
+
if not text:
|
|
266
|
+
return ""
|
|
267
|
+
fenced = _JSON_BLOCK.search(text)
|
|
268
|
+
if fenced:
|
|
269
|
+
return fenced.group(1)
|
|
270
|
+
bare = _BARE_ARRAY.search(text)
|
|
271
|
+
if bare:
|
|
272
|
+
return bare.group(1)
|
|
273
|
+
return ""
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def anonymize_findings(findings: list[Finding]) -> dict[str, Finding]:
|
|
277
|
+
"""Return `{anon_label: Finding}` map so scorers see neutral labels.
|
|
278
|
+
|
|
279
|
+
Labels are `Finding-A`, `Finding-B`, … in input order. The author
|
|
280
|
+
mapping must be kept out of the prompt — keep it server-side only.
|
|
281
|
+
"""
|
|
282
|
+
out: dict[str, Finding] = {}
|
|
283
|
+
for idx, f in enumerate(findings):
|
|
284
|
+
label = f"Finding-{chr(ord('A') + idx)}"
|
|
285
|
+
out[label] = f
|
|
286
|
+
return out
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def anonymize_responses(
|
|
290
|
+
responses: Iterable[tuple[str, str]],
|
|
291
|
+
*,
|
|
292
|
+
persona_labels: dict[str, str] | None = None,
|
|
293
|
+
) -> tuple[dict[str, str], dict[str, str]]:
|
|
294
|
+
"""Anonymize deliberation responses for the peer-review round (Phase 5).
|
|
295
|
+
|
|
296
|
+
`responses` is an iterable of ``(source, text)`` pairs where ``source``
|
|
297
|
+
is the canonical `provider:model` identifier. Returns:
|
|
298
|
+
|
|
299
|
+
- ``anon_text``: ``{Response-A: <body>}`` map fed into the prompt.
|
|
300
|
+
- ``label_to_source``: ``{Response-A: provider:model}`` map kept
|
|
301
|
+
server-side so the orchestrator can de-anonymize at synthesis time.
|
|
302
|
+
|
|
303
|
+
Empty / whitespace-only texts are skipped — they leak nothing and
|
|
304
|
+
would clutter the prompt. Input order is preserved so determinism
|
|
305
|
+
holds for tests (Iron-Law neutrality §peer-review: anonymization
|
|
306
|
+
strips identity, not order; deterministic A/B labels avoid
|
|
307
|
+
accidental cross-run reidentification when the same artefact is
|
|
308
|
+
re-run).
|
|
309
|
+
|
|
310
|
+
Phase 6 Step 3a wires `persona_labels` so advisor-mode runs render
|
|
311
|
+
as ``Response A (Contrarian)`` while provider identity stays
|
|
312
|
+
stripped. ``persona_labels`` maps ``source`` → ``persona`` (e.g.
|
|
313
|
+
``"anthropic:claude-opus-4-1" -> "Contrarian"``); sources missing
|
|
314
|
+
from the map render as bare ``Response A``. Plain-member runs pass
|
|
315
|
+
``persona_labels=None`` and behave exactly like today.
|
|
316
|
+
"""
|
|
317
|
+
anon_text: dict[str, str] = {}
|
|
318
|
+
label_to_source: dict[str, str] = {}
|
|
319
|
+
idx = 0
|
|
320
|
+
for source, text in responses:
|
|
321
|
+
if not text or not text.strip():
|
|
322
|
+
continue
|
|
323
|
+
base = f"Response-{chr(ord('A') + idx)}"
|
|
324
|
+
persona = (persona_labels or {}).get(source)
|
|
325
|
+
label = f"{base} ({persona})" if persona else base
|
|
326
|
+
anon_text[label] = text.strip()
|
|
327
|
+
label_to_source[label] = source
|
|
328
|
+
idx += 1
|
|
329
|
+
return anon_text, label_to_source
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""Persistent council events log (step-8 phase 3).
|
|
2
|
+
|
|
3
|
+
Single-function module that appends one JSON line per council event to
|
|
4
|
+
``<project_root>/agents/council-events.log``. Schema v1 carries the
|
|
5
|
+
minimum needed to answer the "why did the council skip / block this?"
|
|
6
|
+
question at retro time without leaking prompt content.
|
|
7
|
+
|
|
8
|
+
Privacy floor:
|
|
9
|
+
``original_ask`` is never written verbatim — the caller passes the
|
|
10
|
+
raw string, and :func:`append_event` writes ``sha256(value)[:12]``
|
|
11
|
+
as ``original_ask_hash``. Mirrors the privacy floor in
|
|
12
|
+
``agents/low-impact-decisions.md``.
|
|
13
|
+
|
|
14
|
+
Kill-switch:
|
|
15
|
+
``AGENT_CONFIG_NO_EVENTS_LOG=1`` short-circuits :func:`append_event`
|
|
16
|
+
to a no-op. Mirrors Step 7's ``AGENT_CONFIG_LEGACY_ANCHOR=1``
|
|
17
|
+
pattern. Tested via env-var override; the agent never reads or
|
|
18
|
+
parses the log itself.
|
|
19
|
+
|
|
20
|
+
See: ``agents/roadmaps/step-8-quota-necessity-transparency.md`` (D3,
|
|
21
|
+
D5) and ``docs/contracts/ai-council-config.md``.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import hashlib
|
|
27
|
+
import json
|
|
28
|
+
import os
|
|
29
|
+
from datetime import datetime, timezone
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import Any, Literal
|
|
32
|
+
|
|
33
|
+
SCHEMA_VERSION = 1
|
|
34
|
+
|
|
35
|
+
EventAction = Literal["proceed", "skip_necessity", "block_quota"]
|
|
36
|
+
|
|
37
|
+
_VALID_ACTIONS: frozenset[str] = frozenset(
|
|
38
|
+
{"proceed", "skip_necessity", "block_quota"},
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
#: Environment-variable kill-switch. Truthy values disable all writes;
|
|
42
|
+
#: the function silently returns. Designed for CI / sandboxed runs and
|
|
43
|
+
#: privacy-conscious power users.
|
|
44
|
+
_KILL_SWITCH_ENV = "AGENT_CONFIG_NO_EVENTS_LOG"
|
|
45
|
+
|
|
46
|
+
#: Default log path, resolved relative to the package root (two levels
|
|
47
|
+
#: above ``scripts/ai_council/``). Callers can override via
|
|
48
|
+
#: ``log_path=`` for tests.
|
|
49
|
+
_DEFAULT_LOG_PATH = (
|
|
50
|
+
Path(__file__).resolve().parents[2] / "agents" / "council-events.log"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _hash_original_ask(original_ask: str) -> str:
|
|
55
|
+
"""Return sha256(original_ask)[:12] — the privacy-floor hash.
|
|
56
|
+
|
|
57
|
+
Empty / missing input maps to a stable sentinel so the schema field
|
|
58
|
+
is always populated.
|
|
59
|
+
"""
|
|
60
|
+
if not original_ask:
|
|
61
|
+
return "0" * 12
|
|
62
|
+
return hashlib.sha256(
|
|
63
|
+
original_ask.encode("utf-8", errors="replace"),
|
|
64
|
+
).hexdigest()[:12]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _kill_switch_active() -> bool:
|
|
68
|
+
value = os.environ.get(_KILL_SWITCH_ENV, "")
|
|
69
|
+
return value not in ("", "0", "false", "False")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def append_event(
|
|
73
|
+
event: dict[str, Any], *, log_path: Path | None = None,
|
|
74
|
+
) -> bool:
|
|
75
|
+
"""Append a single JSON event line to the council events log.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
event: Mapping with the v1 schema fields. Required keys:
|
|
79
|
+
``lens``, ``invocation``, ``action``, ``verdict``,
|
|
80
|
+
``provider_caps``, ``original_ask``. The function injects
|
|
81
|
+
``schema_version``, ``ts_utc``, and replaces
|
|
82
|
+
``original_ask`` with ``original_ask_hash``. Unknown keys
|
|
83
|
+
pass through verbatim — callers should not abuse this for
|
|
84
|
+
free-form payloads (privacy floor).
|
|
85
|
+
log_path: Override for tests. Defaults to
|
|
86
|
+
``<project_root>/agents/council-events.log``.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
``True`` when a line was written; ``False`` when the kill-switch
|
|
90
|
+
suppressed the write. Never raises on missing parent dir — the
|
|
91
|
+
function creates it on demand.
|
|
92
|
+
|
|
93
|
+
Raises:
|
|
94
|
+
ValueError: ``action`` not in :data:`_VALID_ACTIONS`.
|
|
95
|
+
"""
|
|
96
|
+
if _kill_switch_active():
|
|
97
|
+
return False
|
|
98
|
+
|
|
99
|
+
action = event.get("action")
|
|
100
|
+
if action not in _VALID_ACTIONS:
|
|
101
|
+
raise ValueError(
|
|
102
|
+
f"events_log: action={action!r} not in "
|
|
103
|
+
f"{sorted(_VALID_ACTIONS)}.",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
raw_ask = event.pop("original_ask", "") if "original_ask" in event else ""
|
|
107
|
+
record = {
|
|
108
|
+
"schema_version": SCHEMA_VERSION,
|
|
109
|
+
"ts_utc": datetime.now(timezone.utc).isoformat(
|
|
110
|
+
timespec="seconds",
|
|
111
|
+
).replace("+00:00", "Z"),
|
|
112
|
+
"lens": event.get("lens", ""),
|
|
113
|
+
"invocation": event.get("invocation", ""),
|
|
114
|
+
"action": action,
|
|
115
|
+
"verdict": event.get("verdict", ""),
|
|
116
|
+
"provider_caps": event.get("provider_caps", {}),
|
|
117
|
+
"original_ask_hash": _hash_original_ask(raw_ask),
|
|
118
|
+
}
|
|
119
|
+
# Pass-through for any caller-supplied diagnostic fields that are
|
|
120
|
+
# not in the schema-v1 reserved set (e.g. `category`, `rationale`).
|
|
121
|
+
# The schema-v1 fields above always win on collision.
|
|
122
|
+
reserved = set(record) | {"original_ask"}
|
|
123
|
+
for k, v in event.items():
|
|
124
|
+
if k not in reserved:
|
|
125
|
+
record[k] = v
|
|
126
|
+
|
|
127
|
+
target = Path(log_path) if log_path is not None else _DEFAULT_LOG_PATH
|
|
128
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
129
|
+
line = json.dumps(record, ensure_ascii=False, separators=(",", ":"))
|
|
130
|
+
with target.open("a", encoding="utf-8") as fh:
|
|
131
|
+
fh.write(line + "\n")
|
|
132
|
+
return True
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def default_log_path() -> Path:
|
|
136
|
+
"""Return the canonical events-log path (callers / tests)."""
|
|
137
|
+
return _DEFAULT_LOG_PATH
|