@event4u/agent-config 2.13.0 → 2.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/memory/learn-low-impact.md +143 -0
- package/.agent-src/rules/ask-when-uncertain.md +10 -6
- package/.agent-src/rules/copilot-routing.md +1 -1
- package/.agent-src/rules/devcontainer-routing.md +1 -1
- package/.agent-src/rules/external-reference-deep-dive.md +1 -1
- package/.agent-src/rules/fast-path-marker-visibility.md +38 -0
- package/.agent-src/rules/low-impact-corpus-privacy-floor.md +74 -0
- package/.agent-src/rules/symfony-routing.md +1 -1
- package/.agent-src/skills/ai-council/SKILL.md +208 -8
- package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
- package/.claude-plugin/marketplace.json +2 -1
- package/CHANGELOG.md +299 -124
- package/README.md +6 -6
- package/config/gitignore-block.txt +6 -0
- package/docs/architecture.md +12 -12
- package/docs/archive/CHANGELOG-pre-2.11.0.md +141 -0
- package/docs/catalog.md +10 -7
- package/docs/contracts/adr-architectural-consensus-mechanism.md +4 -3
- package/docs/contracts/adr-level-6-productization.md +7 -9
- package/docs/contracts/ai-council-config.md +492 -20
- package/docs/contracts/command-clusters.md +1 -1
- package/docs/contracts/command-surface-tiers.md +3 -2
- package/docs/contracts/cost-profile-defaults.md +5 -0
- package/docs/contracts/decision-engine-gates.md +5 -0
- package/docs/contracts/decision-trace-v1.md +2 -2
- package/docs/contracts/file-ownership-matrix.json +1735 -72
- package/docs/contracts/installed-tools-lockfile.md +2 -1
- package/docs/contracts/low-impact-corpus-format.md +95 -0
- package/docs/contracts/mcp-beta-criteria.md +6 -5
- package/docs/contracts/mcp-cloud-scope.md +5 -4
- package/docs/contracts/multi-tool-projection-fidelity.md +8 -2
- package/docs/contracts/release-trunk-sync.md +4 -3
- package/docs/contracts/tier-3-contrib-plugin.md +5 -6
- package/docs/getting-started.md +2 -2
- package/docs/guidelines/agent-infra/installed-tools-manifest.md +2 -1
- package/docs/installation.md +32 -0
- package/package.json +1 -1
- package/scripts/_cli/cmd_doctor.py +134 -0
- package/scripts/ai_council/airgap.py +165 -0
- package/scripts/ai_council/cli_hints.py +123 -0
- package/scripts/ai_council/clients.py +787 -5
- package/scripts/ai_council/compile_corpus.py +178 -0
- package/scripts/ai_council/confidence_gate.py +156 -0
- package/scripts/ai_council/config.py +1007 -11
- package/scripts/ai_council/consensus.py +41 -2
- package/scripts/ai_council/events_log.py +137 -0
- package/scripts/ai_council/learn_low_impact_preview.py +252 -0
- package/scripts/ai_council/low_impact.py +714 -0
- package/scripts/ai_council/low_impact_corpus.py +466 -0
- package/scripts/ai_council/low_impact_intake.py +163 -0
- package/scripts/ai_council/modes.py +6 -1
- package/scripts/ai_council/necessity.py +782 -0
- package/scripts/ai_council/orchestrator.py +252 -14
- package/scripts/ai_council/probation_gate.py +152 -0
- package/scripts/ai_council/redact_low_impact_entry.py +155 -0
- package/scripts/ai_council/replay.py +155 -0
- package/scripts/ai_council/session.py +19 -1
- package/scripts/ai_council/shadow_dispatch.py +235 -0
- package/scripts/ai_council/solo_dispatch.py +226 -0
- package/scripts/audit_cloud_compatibility.py +74 -0
- package/scripts/audit_command_surface.py +363 -0
- package/scripts/check_council_layout.py +11 -0
- package/scripts/council_cli.py +1046 -15
- package/scripts/install.sh +12 -0
|
@@ -20,7 +20,7 @@ CouncilResponse, never raise) is unchanged.
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
22
|
from dataclasses import dataclass
|
|
23
|
-
from typing import Callable
|
|
23
|
+
from typing import Any, Callable
|
|
24
24
|
|
|
25
25
|
from scripts.ai_council.budget_guard import (
|
|
26
26
|
record_spend as _record_daily_spend,
|
|
@@ -99,6 +99,99 @@ class OverrunEvent:
|
|
|
99
99
|
OnOverrunCallback = Callable[[OverrunEvent], bool]
|
|
100
100
|
|
|
101
101
|
|
|
102
|
+
@dataclass(frozen=True)
|
|
103
|
+
class DebateCostEstimate:
|
|
104
|
+
"""Pre-flight debate cost summary (Phase 8).
|
|
105
|
+
|
|
106
|
+
``low_usd`` / ``expected_usd`` / ``high_usd`` are the rolled-up
|
|
107
|
+
spend bounds across every billable member × ``rounds``. The
|
|
108
|
+
expected estimate matches the per-round ``estimate()`` total
|
|
109
|
+
multiplied by rounds (worst-case ``max_output_tokens``). ``low_usd``
|
|
110
|
+
discounts output to 25% of the ceiling — most members do not hit
|
|
111
|
+
their token budget. ``high_usd`` adds a 20% over-run buffer per the
|
|
112
|
+
roadmap's ±20% accuracy target.
|
|
113
|
+
|
|
114
|
+
``per_member`` carries one entry per billable member with the same
|
|
115
|
+
bound triple, plus the member's transport label (api / cli /
|
|
116
|
+
manual). ``subscription_members`` lists non-billable members so the
|
|
117
|
+
disclosure block can call out the "covered by subscription" rows
|
|
118
|
+
without summing them into USD totals.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
rounds: int
|
|
122
|
+
low_usd: float
|
|
123
|
+
expected_usd: float
|
|
124
|
+
high_usd: float
|
|
125
|
+
per_member: list[dict[str, Any]]
|
|
126
|
+
subscription_members: list[dict[str, str]]
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def estimate_debate_cost(
|
|
130
|
+
question: CouncilQuestion,
|
|
131
|
+
members: list[ExternalAIClient],
|
|
132
|
+
table: PriceTable,
|
|
133
|
+
*,
|
|
134
|
+
rounds: int,
|
|
135
|
+
project: ProjectContext | None = None,
|
|
136
|
+
original_ask: str = "",
|
|
137
|
+
advisor_plans: dict[str, AdvisorPlan] | None = None,
|
|
138
|
+
) -> DebateCostEstimate:
|
|
139
|
+
"""Project total spend for an N-round debate across all members.
|
|
140
|
+
|
|
141
|
+
Mirrors :func:`estimate` per-member, then multiplies by ``rounds``
|
|
142
|
+
to account for the per-round preamble + critique pass. CLI / manual
|
|
143
|
+
members (``billable=False``) are excluded from USD totals and
|
|
144
|
+
surfaced separately in ``subscription_members`` so the disclosure
|
|
145
|
+
block can label them as covered by the user's flat-rate plan.
|
|
146
|
+
"""
|
|
147
|
+
if rounds < 1:
|
|
148
|
+
raise ValueError(f"rounds must be >= 1 (got {rounds!r}).")
|
|
149
|
+
billable_members = [m for m in members if getattr(m, "billable", True)]
|
|
150
|
+
sub_members = [
|
|
151
|
+
{
|
|
152
|
+
"name": m.name,
|
|
153
|
+
"model": m.model,
|
|
154
|
+
"transport": getattr(m, "transport", "api"),
|
|
155
|
+
"subscription_label": getattr(m, "subscription_label", ""),
|
|
156
|
+
}
|
|
157
|
+
for m in members
|
|
158
|
+
if not getattr(m, "billable", True)
|
|
159
|
+
]
|
|
160
|
+
per_round = estimate(
|
|
161
|
+
question, billable_members, table,
|
|
162
|
+
project=project, original_ask=original_ask,
|
|
163
|
+
advisor_plans=advisor_plans,
|
|
164
|
+
)
|
|
165
|
+
expected = sum(e.total_usd for e in per_round) * rounds
|
|
166
|
+
# Low bound: output tokens rarely reach `max_output_tokens` ceiling.
|
|
167
|
+
# Use input-only cost + 25% of the output ceiling — empirical floor
|
|
168
|
+
# from manual debate traces.
|
|
169
|
+
low = (
|
|
170
|
+
sum(e.input_usd + 0.25 * e.output_usd for e in per_round) * rounds
|
|
171
|
+
)
|
|
172
|
+
# High bound: +20% over-run buffer (roadmap ±20% accuracy target).
|
|
173
|
+
high = expected * 1.20
|
|
174
|
+
per_member: list[dict[str, Any]] = []
|
|
175
|
+
for member, est in zip(billable_members, per_round):
|
|
176
|
+
member_expected = est.total_usd * rounds
|
|
177
|
+
per_member.append({
|
|
178
|
+
"name": member.name,
|
|
179
|
+
"model": member.model,
|
|
180
|
+
"transport": getattr(member, "transport", "api"),
|
|
181
|
+
"low_usd": (est.input_usd + 0.25 * est.output_usd) * rounds,
|
|
182
|
+
"expected_usd": member_expected,
|
|
183
|
+
"high_usd": member_expected * 1.20,
|
|
184
|
+
})
|
|
185
|
+
return DebateCostEstimate(
|
|
186
|
+
rounds=rounds,
|
|
187
|
+
low_usd=low,
|
|
188
|
+
expected_usd=expected,
|
|
189
|
+
high_usd=high,
|
|
190
|
+
per_member=per_member,
|
|
191
|
+
subscription_members=sub_members,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
|
|
102
195
|
def estimate(
|
|
103
196
|
question: CouncilQuestion,
|
|
104
197
|
members: list[ExternalAIClient],
|
|
@@ -272,6 +365,7 @@ def _run_round(
|
|
|
272
365
|
provider=member.name, model=member.model, text="",
|
|
273
366
|
error=f"{type(exc).__name__}: {exc}",
|
|
274
367
|
)
|
|
368
|
+
_stamp_transport_metadata(response, member)
|
|
275
369
|
results.append(response)
|
|
276
370
|
spent["input"] += response.input_tokens
|
|
277
371
|
spent["output"] += response.output_tokens
|
|
@@ -341,6 +435,7 @@ def _run_round(
|
|
|
341
435
|
results.append(response)
|
|
342
436
|
spent["input"] += response.input_tokens
|
|
343
437
|
spent["output"] += response.output_tokens
|
|
438
|
+
actual_usd: float | None = None
|
|
344
439
|
if estimates is not None and table is not None:
|
|
345
440
|
# Bill the actual output against the budget using the
|
|
346
441
|
# member's per-1M output rate. Re-use estimate_cost with
|
|
@@ -349,6 +444,7 @@ def _run_round(
|
|
|
349
444
|
member.name, member.model,
|
|
350
445
|
response.input_tokens, response.output_tokens, table,
|
|
351
446
|
)
|
|
447
|
+
actual_usd = actual.total_usd
|
|
352
448
|
spent["usd"] += actual.total_usd
|
|
353
449
|
# Persist to the rolling 24h ledger when the daily cap is
|
|
354
450
|
# active. Errors are swallowed inside record_spend.
|
|
@@ -356,14 +452,44 @@ def _run_round(
|
|
|
356
452
|
_record_daily_spend(
|
|
357
453
|
actual.total_usd, member.name, member.model,
|
|
358
454
|
)
|
|
455
|
+
_stamp_transport_metadata(response, member, cost_usd=actual_usd)
|
|
359
456
|
|
|
360
457
|
return results
|
|
361
458
|
|
|
362
459
|
|
|
363
460
|
def _aborted(member: ExternalAIClient, reason: str) -> CouncilResponse:
|
|
364
|
-
|
|
461
|
+
response = CouncilResponse(
|
|
365
462
|
provider=member.name, model=member.model, text="", error=reason,
|
|
366
463
|
)
|
|
464
|
+
_stamp_transport_metadata(response, member)
|
|
465
|
+
return response
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def _stamp_transport_metadata(
|
|
469
|
+
response: CouncilResponse,
|
|
470
|
+
member: ExternalAIClient,
|
|
471
|
+
*,
|
|
472
|
+
cost_usd: float | None = None,
|
|
473
|
+
) -> None:
|
|
474
|
+
"""Annotate `response.metadata` with transport / billable / cost info.
|
|
475
|
+
|
|
476
|
+
Phase 5 / Step 1 — the session writer and orchestrator renderer key
|
|
477
|
+
off these fields to format the cost line as either
|
|
478
|
+
``cost: subscription (claude-pro)`` (non-billable vendor CLI) or
|
|
479
|
+
``cost: $0.NNNN (… in / … out)`` (billable api or community CLI).
|
|
480
|
+
Stamped here (and not in each client) so the writer stays decoupled
|
|
481
|
+
from the client class hierarchy.
|
|
482
|
+
"""
|
|
483
|
+
meta = dict(response.metadata or {})
|
|
484
|
+
transport = getattr(member, "transport", "api")
|
|
485
|
+
meta.setdefault("transport", transport)
|
|
486
|
+
meta.setdefault("billable", bool(getattr(member, "billable", True)))
|
|
487
|
+
label = getattr(member, "subscription_label", "") or ""
|
|
488
|
+
if label and not meta.get("billable", True):
|
|
489
|
+
meta.setdefault("subscription_label", label)
|
|
490
|
+
if cost_usd is not None:
|
|
491
|
+
meta["cost_usd"] = float(cost_usd)
|
|
492
|
+
response.metadata = meta
|
|
367
493
|
|
|
368
494
|
|
|
369
495
|
def _augment_for_next_round(
|
|
@@ -857,6 +983,57 @@ def run_consensus_scoring(
|
|
|
857
983
|
)
|
|
858
984
|
|
|
859
985
|
|
|
986
|
+
def _render_response_meta(r: CouncilResponse) -> str:
|
|
987
|
+
"""Format the per-member meta line — tokens, cost (or subscription), latency.
|
|
988
|
+
|
|
989
|
+
Phase 5 / Step 1 — non-billable vendor-CLI calls render
|
|
990
|
+
``cost: subscription (<label>)`` with no token detail (the local
|
|
991
|
+
session counted them but the user is on a flat rate). Billable
|
|
992
|
+
calls (api or community CLI) render ``cost: $X.XXXX`` plus tokens.
|
|
993
|
+
Tokens marked ``estimated=True`` get a ``~`` prefix so the audit
|
|
994
|
+
trail flags heuristic counts.
|
|
995
|
+
"""
|
|
996
|
+
meta_dict = r.metadata or {}
|
|
997
|
+
billable = bool(meta_dict.get("billable", True))
|
|
998
|
+
estimated = bool(meta_dict.get("tokens_estimated", False))
|
|
999
|
+
parts: list[str] = []
|
|
1000
|
+
if not billable:
|
|
1001
|
+
label = meta_dict.get("subscription_label") or "flat-rate"
|
|
1002
|
+
parts.append(f"cost: subscription ({label})")
|
|
1003
|
+
else:
|
|
1004
|
+
cost_usd = meta_dict.get("cost_usd")
|
|
1005
|
+
if isinstance(cost_usd, (int, float)):
|
|
1006
|
+
parts.append(f"cost: ${cost_usd:.4f}")
|
|
1007
|
+
prefix = "~" if estimated else ""
|
|
1008
|
+
parts.append(
|
|
1009
|
+
f"tokens: {prefix}{r.input_tokens} in / {prefix}{r.output_tokens} out"
|
|
1010
|
+
)
|
|
1011
|
+
parts.append(f"{r.latency_ms} ms")
|
|
1012
|
+
return f"*{' · '.join(parts)}*"
|
|
1013
|
+
|
|
1014
|
+
|
|
1015
|
+
# Lens defaults for the Phase 9 confidence-explanation badge. The PR
|
|
1016
|
+
# lens stays terse so the existing "Must-fix / Nice-to-have" structure
|
|
1017
|
+
# isn't drowned in scorer prose; every other decision lens shows the
|
|
1018
|
+
# explanation by default. Creative lenses (design/optimize) never reach
|
|
1019
|
+
# this code path because they skip consensus scoring entirely.
|
|
1020
|
+
_DEFAULT_EXPLAIN_LENSES: frozenset[str] = frozenset({
|
|
1021
|
+
"default", "analysis", "debate", "prompt", "roadmap", "diff", "files",
|
|
1022
|
+
})
|
|
1023
|
+
|
|
1024
|
+
|
|
1025
|
+
def _default_explain_confidence(mode: str | None) -> bool:
|
|
1026
|
+
"""Decide whether the confidence-explanation badge fires by default.
|
|
1027
|
+
|
|
1028
|
+
Pulled into a helper so the CLI ``--explain-confidence`` /
|
|
1029
|
+
``--no-explain-confidence`` flags and the lens override path share
|
|
1030
|
+
one truth source.
|
|
1031
|
+
"""
|
|
1032
|
+
if mode is None:
|
|
1033
|
+
return True
|
|
1034
|
+
return mode in _DEFAULT_EXPLAIN_LENSES
|
|
1035
|
+
|
|
1036
|
+
|
|
860
1037
|
def render(
|
|
861
1038
|
responses: list[CouncilResponse],
|
|
862
1039
|
*,
|
|
@@ -864,6 +1041,7 @@ def render(
|
|
|
864
1041
|
prose_synthesis: bool | None = None,
|
|
865
1042
|
consensus: ConsensusResult | None = None,
|
|
866
1043
|
peer_review: PeerReviewResult | None = None,
|
|
1044
|
+
explain_confidence: bool | None = None,
|
|
867
1045
|
) -> str:
|
|
868
1046
|
"""Render stacked sections + a lens-aware synthesis prompt slot.
|
|
869
1047
|
|
|
@@ -885,19 +1063,21 @@ def render(
|
|
|
885
1063
|
`Peer-Review-Surfaced Blind Spots` addendum.
|
|
886
1064
|
"""
|
|
887
1065
|
blocks: list[str] = []
|
|
1066
|
+
explain = (
|
|
1067
|
+
explain_confidence
|
|
1068
|
+
if explain_confidence is not None
|
|
1069
|
+
else _default_explain_confidence(mode)
|
|
1070
|
+
)
|
|
888
1071
|
if consensus is not None and (
|
|
889
1072
|
consensus.bucket.strong or consensus.bucket.findings or consensus.bucket.minority
|
|
890
1073
|
):
|
|
891
|
-
blocks.append(_render_consensus(consensus.bucket))
|
|
1074
|
+
blocks.append(_render_consensus(consensus.bucket, explain=explain))
|
|
892
1075
|
for r in responses:
|
|
893
1076
|
header = f"## {r.provider} · {r.model}"
|
|
894
1077
|
if r.error:
|
|
895
1078
|
blocks.append(f"{header}\n\n*ERROR:* `{r.error}`")
|
|
896
1079
|
continue
|
|
897
|
-
meta = (
|
|
898
|
-
f"*tokens: {r.input_tokens} in / {r.output_tokens} out · "
|
|
899
|
-
f"{r.latency_ms} ms*"
|
|
900
|
-
)
|
|
1080
|
+
meta = _render_response_meta(r)
|
|
901
1081
|
blocks.append(f"{header}\n\n{meta}\n\n{r.text}")
|
|
902
1082
|
if peer_review is not None and peer_review.responses:
|
|
903
1083
|
blocks.append(_render_peer_review(peer_review))
|
|
@@ -937,32 +1117,90 @@ def _render_peer_review(peer_review: PeerReviewResult) -> str:
|
|
|
937
1117
|
return "\n\n".join(lines)
|
|
938
1118
|
|
|
939
1119
|
|
|
940
|
-
def _render_consensus(bucket: ConsensusBucket) -> str:
|
|
941
|
-
"""Render Strong / Findings / Minority sections in renderer order.
|
|
1120
|
+
def _render_consensus(bucket: ConsensusBucket, *, explain: bool = True) -> str:
|
|
1121
|
+
"""Render Strong / Findings / Minority sections in renderer order.
|
|
1122
|
+
|
|
1123
|
+
``explain`` toggles the Phase 9 confidence-explanation badge — when
|
|
1124
|
+
``False`` the renderer falls back to the terse Phase 4 badge so the
|
|
1125
|
+
PR lens (and any caller passing ``--no-explain-confidence``) keeps
|
|
1126
|
+
its compact output.
|
|
1127
|
+
"""
|
|
942
1128
|
parts: list[str] = []
|
|
943
1129
|
if bucket.strong:
|
|
944
|
-
parts.append(
|
|
1130
|
+
parts.append(
|
|
1131
|
+
"## Strong Consensus\n\n"
|
|
1132
|
+
+ _render_bucket(bucket.strong, explain=explain),
|
|
1133
|
+
)
|
|
945
1134
|
if bucket.findings:
|
|
946
|
-
parts.append(
|
|
1135
|
+
parts.append(
|
|
1136
|
+
"## Findings\n\n"
|
|
1137
|
+
+ _render_bucket(bucket.findings, explain=explain),
|
|
1138
|
+
)
|
|
947
1139
|
if bucket.minority:
|
|
948
1140
|
parts.append(
|
|
949
1141
|
"## Minority Views\n\n"
|
|
950
1142
|
"*Sub-threshold by consensus; kept for audit trail.*\n\n"
|
|
951
|
-
+ _render_bucket(bucket.minority)
|
|
1143
|
+
+ _render_bucket(bucket.minority, explain=explain),
|
|
952
1144
|
)
|
|
953
1145
|
return "\n\n".join(parts)
|
|
954
1146
|
|
|
955
1147
|
|
|
1148
|
+
def _truncate_reason(reason: str, *, limit: int = 120) -> str:
|
|
1149
|
+
"""Collapse a multi-line scorer reason to a single ≤``limit``-char line.
|
|
1150
|
+
|
|
1151
|
+
Phase 9 — the dissent summary must fit on one line; we keep the
|
|
1152
|
+
first sentence-ish chunk and add an ellipsis when truncating. Empty
|
|
1153
|
+
reasons render as ``no rationale``.
|
|
1154
|
+
"""
|
|
1155
|
+
flat = " ".join(reason.split()) if reason else ""
|
|
1156
|
+
if not flat:
|
|
1157
|
+
return "no rationale"
|
|
1158
|
+
if len(flat) <= limit:
|
|
1159
|
+
return flat
|
|
1160
|
+
return flat[: limit - 1].rstrip() + "…"
|
|
1161
|
+
|
|
1162
|
+
|
|
956
1163
|
def _render_bucket(
|
|
957
1164
|
items: list[tuple[Finding, ConsensusMetadata]],
|
|
1165
|
+
*,
|
|
1166
|
+
explain: bool = True,
|
|
958
1167
|
) -> str:
|
|
1168
|
+
"""Render one bucket of (finding, metadata) tuples.
|
|
1169
|
+
|
|
1170
|
+
The Phase 4 terse badge (``strength · mean · scorers · dissent``)
|
|
1171
|
+
is preserved on the first line. Phase 9 adds a second
|
|
1172
|
+
confidence-explanation line whenever ``explain`` is true *and* at
|
|
1173
|
+
least one scorer rated the finding — the explanation needs scorer
|
|
1174
|
+
data to be meaningful.
|
|
1175
|
+
"""
|
|
959
1176
|
lines: list[str] = []
|
|
960
1177
|
for f, m in items:
|
|
961
|
-
|
|
1178
|
+
terse_badge = (
|
|
962
1179
|
f"strength {m.consensus_strength:.2f} · "
|
|
963
1180
|
f"mean {m.mean_score:.1f}/10 · "
|
|
964
1181
|
f"{len(m.scorers)} scorers · "
|
|
965
1182
|
f"{m.dissent_count} dissent"
|
|
966
1183
|
)
|
|
967
|
-
|
|
1184
|
+
block = f"- **{f.id}** — {f.text} \n _{terse_badge}_"
|
|
1185
|
+
if explain and m.scorers:
|
|
1186
|
+
total = m.concur_count + m.dissent_count
|
|
1187
|
+
if total <= 0:
|
|
1188
|
+
total = len(m.scorers)
|
|
1189
|
+
parts: list[str] = [
|
|
1190
|
+
f"{m.concur_count}/{total} members concur",
|
|
1191
|
+
]
|
|
1192
|
+
if m.dissent_reasons:
|
|
1193
|
+
first = m.dissent_reasons[0]
|
|
1194
|
+
parts.append(
|
|
1195
|
+
f"{first[0]} dissented citing "
|
|
1196
|
+
f"{_truncate_reason(first[1])}",
|
|
1197
|
+
)
|
|
1198
|
+
extra = len(m.dissent_reasons) - 1
|
|
1199
|
+
if extra > 0:
|
|
1200
|
+
parts.append(f"{extra} other dissent(s)")
|
|
1201
|
+
else:
|
|
1202
|
+
parts.append("no dissent")
|
|
1203
|
+
parts.append(f"mean evidence-quality {m.evidence_quality}")
|
|
1204
|
+
block += " \n _" + "; ".join(parts) + "_"
|
|
1205
|
+
lines.append(block)
|
|
968
1206
|
return "\n".join(lines)
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""Probation promote-and-prune for ``agents/low-impact-decisions.md``.
|
|
2
|
+
|
|
3
|
+
Phase 12 § Step 3. Runs at council startup AND after every intake
|
|
4
|
+
append. Idempotent — a second run on an unchanged corpus is a no-op.
|
|
5
|
+
|
|
6
|
+
Rules:
|
|
7
|
+
|
|
8
|
+
- **Prune.** For each ``## On Probation`` entry, drop any ``seen``
|
|
9
|
+
timestamp older than ``WINDOW_DAYS`` (default 30) from ``today``
|
|
10
|
+
(UTC). If the ``seen`` array empties, drop the whole entry.
|
|
11
|
+
- **Promote.** If the trimmed ``seen`` array has ≥ ``PROMOTION_THRESHOLD``
|
|
12
|
+
entries (default 3), move the entry to ``## Validated`` — strip the
|
|
13
|
+
``seen`` array, add ``validated <today>`` marker. One-way: a
|
|
14
|
+
Validated entry never falls back.
|
|
15
|
+
- **Log.** Returns :class:`GateRun` with the counts, suitable for
|
|
16
|
+
one-line session-artefact logging.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import re
|
|
22
|
+
from dataclasses import dataclass
|
|
23
|
+
from datetime import datetime, timedelta, timezone
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
WINDOW_DAYS = 30
|
|
27
|
+
PROMOTION_THRESHOLD = 3
|
|
28
|
+
|
|
29
|
+
_PROBATION_HEADER = "## On Probation"
|
|
30
|
+
_VALIDATED_HEADER = "## Validated"
|
|
31
|
+
_TERMINAL_HEADERS = (
|
|
32
|
+
"## Anti-Examples",
|
|
33
|
+
"## Security",
|
|
34
|
+
"## Provenance",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True)
|
|
39
|
+
class GateRun:
|
|
40
|
+
pruned_timestamps: int
|
|
41
|
+
dropped_entries: int
|
|
42
|
+
promoted_entries: int
|
|
43
|
+
|
|
44
|
+
def log_line(self) -> str:
|
|
45
|
+
return (
|
|
46
|
+
f"probation-gate: pruned {self.pruned_timestamps} stale "
|
|
47
|
+
f"timestamps; promoted {self.promoted_entries} entries; "
|
|
48
|
+
f"dropped {self.dropped_entries} expired entries"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def is_noop(self) -> bool:
|
|
53
|
+
return (self.pruned_timestamps == 0
|
|
54
|
+
and self.dropped_entries == 0
|
|
55
|
+
and self.promoted_entries == 0)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _today() -> datetime:
|
|
59
|
+
return datetime.now(timezone.utc)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _section_span(text: str, header: str) -> tuple[int, int] | None:
|
|
63
|
+
i = text.find(header)
|
|
64
|
+
if i < 0:
|
|
65
|
+
return None
|
|
66
|
+
body_start = text.find("\n", i) + 1
|
|
67
|
+
end = len(text)
|
|
68
|
+
for other in (_PROBATION_HEADER, _VALIDATED_HEADER) + _TERMINAL_HEADERS:
|
|
69
|
+
if other == header:
|
|
70
|
+
continue
|
|
71
|
+
j = text.find("\n" + other, body_start)
|
|
72
|
+
if 0 <= j < end:
|
|
73
|
+
end = j
|
|
74
|
+
return body_start, end
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _parse_probation_line(line: str) -> tuple[str, str, list[str]] | None:
|
|
78
|
+
m = re.match(
|
|
79
|
+
r'^(\s*-\s*"[^"]+")\s*—\s*first-seen\s+(\d{4}-\d{2}-\d{2})'
|
|
80
|
+
r'\s*·\s*seen\s*\[([^\]]*)\]\s*$',
|
|
81
|
+
line,
|
|
82
|
+
)
|
|
83
|
+
if not m:
|
|
84
|
+
return None
|
|
85
|
+
prefix = m.group(1)
|
|
86
|
+
first_seen = m.group(2)
|
|
87
|
+
seen_raw = m.group(3).strip()
|
|
88
|
+
seen = [s.strip() for s in seen_raw.split(",") if s.strip()] if seen_raw else []
|
|
89
|
+
return prefix, first_seen, seen
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _parse_date(s: str) -> datetime | None:
|
|
93
|
+
try:
|
|
94
|
+
return datetime.strptime(s, "%Y-%m-%d").replace(tzinfo=timezone.utc)
|
|
95
|
+
except ValueError:
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def run_gate(corpus_path: Path, *, today: datetime | None = None) -> GateRun:
|
|
100
|
+
"""Promote-and-prune pass. Writes corpus only when state changes."""
|
|
101
|
+
today = today or _today()
|
|
102
|
+
cutoff = today - timedelta(days=WINDOW_DAYS)
|
|
103
|
+
text = corpus_path.read_text(encoding="utf-8")
|
|
104
|
+
prob = _section_span(text, _PROBATION_HEADER)
|
|
105
|
+
val = _section_span(text, _VALIDATED_HEADER)
|
|
106
|
+
if not prob or not val:
|
|
107
|
+
return GateRun(0, 0, 0)
|
|
108
|
+
|
|
109
|
+
prob_body = text[prob[0]:prob[1]]
|
|
110
|
+
promoted: list[str] = []
|
|
111
|
+
pruned_ts = 0
|
|
112
|
+
dropped = 0
|
|
113
|
+
out_lines: list[str] = []
|
|
114
|
+
for line in prob_body.splitlines():
|
|
115
|
+
parsed = _parse_probation_line(line)
|
|
116
|
+
if parsed is None:
|
|
117
|
+
out_lines.append(line)
|
|
118
|
+
continue
|
|
119
|
+
prefix, first_seen, seen = parsed
|
|
120
|
+
original_len = len(seen)
|
|
121
|
+
fresh = [
|
|
122
|
+
s for s in seen
|
|
123
|
+
if (d := _parse_date(s)) is not None and d >= cutoff
|
|
124
|
+
]
|
|
125
|
+
pruned_ts += original_len - len(fresh)
|
|
126
|
+
if len(fresh) >= PROMOTION_THRESHOLD:
|
|
127
|
+
today_str = today.strftime("%Y-%m-%d")
|
|
128
|
+
promoted.append(
|
|
129
|
+
f'{prefix} — domain: low-impact · validated {today_str}'
|
|
130
|
+
)
|
|
131
|
+
continue
|
|
132
|
+
if not fresh:
|
|
133
|
+
dropped += 1
|
|
134
|
+
continue
|
|
135
|
+
out_lines.append(
|
|
136
|
+
f'{prefix} — first-seen {first_seen} · seen [{", ".join(fresh)}]'
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
new_prob_body = "\n".join(out_lines)
|
|
140
|
+
if not new_prob_body.endswith("\n"):
|
|
141
|
+
new_prob_body += "\n"
|
|
142
|
+
|
|
143
|
+
new_text = text[:prob[0]] + new_prob_body + text[prob[1]:]
|
|
144
|
+
if promoted:
|
|
145
|
+
v_start, v_end = _section_span(new_text, _VALIDATED_HEADER) # type: ignore[misc]
|
|
146
|
+
insertion = "\n".join(promoted) + "\n"
|
|
147
|
+
new_text = new_text[:v_end].rstrip() + "\n\n" + insertion + new_text[v_end:]
|
|
148
|
+
|
|
149
|
+
result = GateRun(pruned_ts, dropped, len(promoted))
|
|
150
|
+
if not result.is_noop:
|
|
151
|
+
corpus_path.write_text(new_text, encoding="utf-8")
|
|
152
|
+
return result
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""Privacy floor for `agents/low-impact-decisions.md` (Phase 12).
|
|
2
|
+
|
|
3
|
+
Non-bypassable redactor invoked on intake (write-side) AND on
|
|
4
|
+
upstream (`/learn-low-impact`, leave-the-repo side). Both gates call
|
|
5
|
+
:func:`redact_low_impact_entry` and refuse to proceed when a forbidden
|
|
6
|
+
pattern fires.
|
|
7
|
+
|
|
8
|
+
Iron Law: nothing leaves the project repo until this redactor clears
|
|
9
|
+
the entry. See ``.augment/rules/low-impact-corpus-privacy-floor.md``.
|
|
10
|
+
|
|
11
|
+
Forbidden-content classes (per Phase 12 § Step 4):
|
|
12
|
+
|
|
13
|
+
1. Secrets — raw-key prefixes mirrored from
|
|
14
|
+
:data:`scripts.ai_council.config._RAW_KEY_PREFIXES`, plus a
|
|
15
|
+
generic ``api[-_]?key:\\s*<token>`` shape.
|
|
16
|
+
2. Emails — RFC-5322-ish shape, deliberately permissive.
|
|
17
|
+
3. Project-rooted paths — anything starting ``/Users/``, ``/home/``,
|
|
18
|
+
``/opt/``, ``/private/``, drive letters (``C:\\``), or the
|
|
19
|
+
configured repo root from ``.agent-settings.yml`` when supplied.
|
|
20
|
+
4. Customer / tenant names — caller passes a name list (project
|
|
21
|
+
policy); generic placeholders ``<customer>``, ``<tenant>``,
|
|
22
|
+
``<account>``, ``<user>`` survive.
|
|
23
|
+
5. Internal hostnames — ``*.internal``, ``*.local``, plus any
|
|
24
|
+
project-private domain the caller supplies.
|
|
25
|
+
6. Monetary amounts — ``$1,234`` / ``€500`` / ``USD 1000`` shapes
|
|
26
|
+
that look like business figures (lone ``$0.05`` cap mentions in
|
|
27
|
+
curly-brace context are skipped via the call-site, not here).
|
|
28
|
+
7. Business-context SQL identifiers — caller-supplied table /
|
|
29
|
+
column allow-list. Default empty.
|
|
30
|
+
8. Inline code excerpts > 40 chars — any backtick-fenced run > 40.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
from __future__ import annotations
|
|
34
|
+
|
|
35
|
+
import re
|
|
36
|
+
from dataclasses import dataclass, field
|
|
37
|
+
from pathlib import Path
|
|
38
|
+
from typing import Iterable
|
|
39
|
+
|
|
40
|
+
from scripts.ai_council.config import _RAW_KEY_PREFIXES
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass(frozen=True)
|
|
44
|
+
class RedactionViolation:
|
|
45
|
+
"""Single forbidden-pattern hit."""
|
|
46
|
+
|
|
47
|
+
category: str
|
|
48
|
+
snippet: str
|
|
49
|
+
note: str = ""
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass(frozen=True)
|
|
53
|
+
class RedactionResult:
|
|
54
|
+
"""Outcome of one redaction pass."""
|
|
55
|
+
|
|
56
|
+
ok: bool
|
|
57
|
+
violations: tuple[RedactionViolation, ...] = ()
|
|
58
|
+
|
|
59
|
+
def summary(self) -> str:
|
|
60
|
+
if self.ok:
|
|
61
|
+
return "redaction: clean"
|
|
62
|
+
parts = [f"{v.category}: {v.snippet!r}" for v in self.violations]
|
|
63
|
+
return "redaction REFUSED — " + "; ".join(parts)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
_EMAIL_RE = re.compile(r"\b[\w.+-]+@[\w-]+\.[\w.-]+\b")
|
|
67
|
+
_PATH_RE = re.compile(
|
|
68
|
+
r"(?:^|[\s\"'(])"
|
|
69
|
+
r"(?:/Users/|/home/|/opt/|/private/|[A-Z]:\\)"
|
|
70
|
+
r"[\w.\-/\\]+"
|
|
71
|
+
)
|
|
72
|
+
_INTERNAL_HOST_RE = re.compile(
|
|
73
|
+
r"\b[a-zA-Z0-9][\w.-]*\.(?:internal|local)\b",
|
|
74
|
+
re.IGNORECASE,
|
|
75
|
+
)
|
|
76
|
+
_MONEY_RE = re.compile(
|
|
77
|
+
r"(?:[\$€£¥]\s?\d{1,3}(?:[,.]\d{3})*(?:\.\d+)?"
|
|
78
|
+
r"|\b(?:USD|EUR|GBP|JPY)\s?\d+(?:[,.]\d+)?)"
|
|
79
|
+
)
|
|
80
|
+
_API_KEY_RE = re.compile(
|
|
81
|
+
r"(?i)\bapi[_-]?key\b\s*[:=]\s*[A-Za-z0-9+/=_\-]{12,}"
|
|
82
|
+
)
|
|
83
|
+
_CODE_FENCE_RE = re.compile(r"`([^`]{41,})`")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _check_secrets(text: str) -> list[RedactionViolation]:
|
|
87
|
+
hits: list[RedactionViolation] = []
|
|
88
|
+
for prefix in _RAW_KEY_PREFIXES:
|
|
89
|
+
pat = re.compile(re.escape(prefix) + r"[A-Za-z0-9_\-]{6,}")
|
|
90
|
+
m = pat.search(text)
|
|
91
|
+
if m:
|
|
92
|
+
hits.append(RedactionViolation(
|
|
93
|
+
"secret", m.group(0)[:8] + "…",
|
|
94
|
+
f"raw-key prefix {prefix!r}",
|
|
95
|
+
))
|
|
96
|
+
m = _API_KEY_RE.search(text)
|
|
97
|
+
if m:
|
|
98
|
+
hits.append(RedactionViolation("secret", m.group(0)[:20] + "…",
|
|
99
|
+
"inline api_key"))
|
|
100
|
+
return hits
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _check_patterns(text: str, repo_root: str | None,
|
|
104
|
+
private_domains: Iterable[str],
|
|
105
|
+
customer_names: Iterable[str],
|
|
106
|
+
sql_identifiers: Iterable[str]) -> list[RedactionViolation]:
|
|
107
|
+
hits: list[RedactionViolation] = []
|
|
108
|
+
for m in _EMAIL_RE.finditer(text):
|
|
109
|
+
hits.append(RedactionViolation("email", m.group(0)))
|
|
110
|
+
for m in _PATH_RE.finditer(text):
|
|
111
|
+
hits.append(RedactionViolation("project_path", m.group(0).strip()))
|
|
112
|
+
if repo_root and repo_root in text:
|
|
113
|
+
hits.append(RedactionViolation("project_path", repo_root,
|
|
114
|
+
"configured repo root"))
|
|
115
|
+
for m in _INTERNAL_HOST_RE.finditer(text):
|
|
116
|
+
hits.append(RedactionViolation("internal_hostname", m.group(0)))
|
|
117
|
+
for dom in private_domains:
|
|
118
|
+
if dom and dom in text:
|
|
119
|
+
hits.append(RedactionViolation("internal_hostname", dom,
|
|
120
|
+
"configured private domain"))
|
|
121
|
+
for m in _MONEY_RE.finditer(text):
|
|
122
|
+
hits.append(RedactionViolation("monetary_amount", m.group(0)))
|
|
123
|
+
for name in customer_names:
|
|
124
|
+
if name and re.search(rf"\b{re.escape(name)}\b", text, re.IGNORECASE):
|
|
125
|
+
hits.append(RedactionViolation("customer_name", name))
|
|
126
|
+
for ident in sql_identifiers:
|
|
127
|
+
if ident and re.search(rf"\b{re.escape(ident)}\b", text):
|
|
128
|
+
hits.append(RedactionViolation("sql_identifier", ident))
|
|
129
|
+
for m in _CODE_FENCE_RE.finditer(text):
|
|
130
|
+
hits.append(RedactionViolation("long_code_excerpt",
|
|
131
|
+
m.group(1)[:40] + "…",
|
|
132
|
+
f"{len(m.group(1))} chars"))
|
|
133
|
+
return hits
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def redact_low_impact_entry(
|
|
137
|
+
text: str,
|
|
138
|
+
*,
|
|
139
|
+
repo_root: str | None = None,
|
|
140
|
+
private_domains: Iterable[str] = (),
|
|
141
|
+
customer_names: Iterable[str] = (),
|
|
142
|
+
sql_identifiers: Iterable[str] = (),
|
|
143
|
+
) -> RedactionResult:
|
|
144
|
+
"""Run the privacy floor over ``text``. Returns clean or refused.
|
|
145
|
+
|
|
146
|
+
The redactor never auto-rewrites the entry — that would be a soft
|
|
147
|
+
privacy gate. It refuses + surfaces what to rephrase, which keeps
|
|
148
|
+
the user in the loop and the audit trail honest.
|
|
149
|
+
"""
|
|
150
|
+
violations: list[RedactionViolation] = []
|
|
151
|
+
violations.extend(_check_secrets(text))
|
|
152
|
+
violations.extend(_check_patterns(
|
|
153
|
+
text, repo_root, private_domains, customer_names, sql_identifiers,
|
|
154
|
+
))
|
|
155
|
+
return RedactionResult(ok=not violations, violations=tuple(violations))
|