@event4u/agent-config 2.12.0 → 2.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/council/analysis.md +142 -0
- package/.agent-src/commands/council/debate.md +129 -0
- package/.agent-src/commands/council/default.md +8 -0
- package/.agent-src/commands/council/design.md +16 -12
- package/.agent-src/commands/council/optimize.md +16 -15
- package/.agent-src/commands/council/pr.md +12 -12
- package/.agent-src/commands/council.md +48 -2
- package/.agent-src/commands/memory/learn-low-impact.md +143 -0
- package/.agent-src/personas/advisors/contrarian.md +95 -0
- package/.agent-src/personas/advisors/executor.md +99 -0
- package/.agent-src/personas/advisors/expansionist.md +98 -0
- package/.agent-src/personas/advisors/first-principles.md +98 -0
- package/.agent-src/personas/advisors/outsider.md +102 -0
- package/.agent-src/rules/ask-when-uncertain.md +10 -6
- package/.agent-src/rules/copilot-routing.md +19 -0
- package/.agent-src/rules/devcontainer-routing.md +20 -0
- package/.agent-src/rules/external-reference-deep-dive.md +1 -1
- package/.agent-src/rules/fast-path-marker-visibility.md +38 -0
- package/.agent-src/rules/laravel-routing.md +20 -0
- package/.agent-src/rules/low-impact-corpus-privacy-floor.md +74 -0
- package/.agent-src/rules/symfony-routing.md +20 -0
- package/.agent-src/skills/ai-council/SKILL.md +388 -10
- package/.agent-src/skills/copilot-config/SKILL.md +1 -1
- package/.agent-src/skills/devcontainer/SKILL.md +1 -1
- package/.agent-src/skills/laravel/SKILL.md +1 -1
- package/.agent-src/skills/project-analysis-core/SKILL.md +1 -1
- package/.agent-src/skills/project-analyzer/SKILL.md +1 -1
- package/.agent-src/skills/symfony-workflow/SKILL.md +1 -1
- package/.agent-src/skills/universal-project-analysis/SKILL.md +1 -1
- package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
- package/.claude-plugin/marketplace.json +4 -1
- package/AGENTS.md +1 -1
- package/CHANGELOG.md +346 -124
- package/CONTRIBUTING.md +5 -0
- package/README.md +6 -6
- package/config/agent-settings.template.yml +5 -93
- package/config/gitignore-block.txt +6 -0
- package/docs/architecture/multi-tool-projection.md +53 -0
- package/docs/architecture/{compression.md → source-projection.md} +21 -3
- package/docs/architecture.md +15 -15
- package/docs/archive/CHANGELOG-pre-2.11.0.md +141 -0
- package/docs/catalog.md +25 -12
- package/docs/contracts/adr-architectural-consensus-mechanism.md +68 -0
- package/docs/contracts/adr-level-6-productization.md +7 -9
- package/docs/contracts/ai-council-config.md +658 -0
- package/docs/contracts/command-clusters.md +58 -2
- package/docs/contracts/command-surface-tiers.md +3 -2
- package/docs/contracts/cost-profile-defaults.md +5 -0
- package/docs/contracts/decision-engine-gates.md +5 -0
- package/docs/contracts/decision-trace-v1.md +2 -2
- package/docs/contracts/file-ownership-matrix.json +1735 -72
- package/docs/contracts/installed-tools-lockfile.md +2 -1
- package/docs/contracts/low-impact-corpus-format.md +95 -0
- package/docs/contracts/mcp-beta-criteria.md +6 -5
- package/docs/contracts/mcp-cloud-scope.md +5 -4
- package/docs/contracts/multi-tool-projection-fidelity.md +115 -0
- package/docs/contracts/release-trunk-sync.md +4 -3
- package/docs/contracts/tier-3-contrib-plugin.md +5 -6
- package/docs/getting-started.md +2 -2
- package/docs/guidelines/agent-infra/installed-tools-manifest.md +2 -1
- package/docs/installation.md +32 -0
- package/package.json +1 -1
- package/scripts/_archive/README.md +59 -0
- package/scripts/_cli/cmd_doctor.py +134 -0
- package/scripts/ai_council/_default_prices.py +10 -1
- package/scripts/ai_council/advisors.py +148 -0
- package/scripts/ai_council/airgap.py +165 -0
- package/scripts/ai_council/cli_hints.py +123 -0
- package/scripts/ai_council/clients.py +959 -5
- package/scripts/ai_council/compile_corpus.py +178 -0
- package/scripts/ai_council/confidence_gate.py +156 -0
- package/scripts/ai_council/config.py +1364 -0
- package/scripts/ai_council/consensus.py +329 -0
- package/scripts/ai_council/events_log.py +137 -0
- package/scripts/ai_council/learn_low_impact_preview.py +252 -0
- package/scripts/ai_council/low_impact.py +714 -0
- package/scripts/ai_council/low_impact_corpus.py +466 -0
- package/scripts/ai_council/low_impact_intake.py +163 -0
- package/scripts/ai_council/modes.py +6 -1
- package/scripts/ai_council/necessity.py +782 -0
- package/scripts/ai_council/orchestrator.py +872 -20
- package/scripts/ai_council/probation_gate.py +152 -0
- package/scripts/ai_council/prompts.py +335 -0
- package/scripts/ai_council/redact_low_impact_entry.py +155 -0
- package/scripts/ai_council/replay.py +155 -0
- package/scripts/ai_council/session.py +19 -1
- package/scripts/ai_council/shadow_dispatch.py +235 -0
- package/scripts/ai_council/solo_dispatch.py +226 -0
- package/scripts/audit_cloud_compatibility.py +74 -0
- package/scripts/audit_command_surface.py +363 -0
- package/scripts/check_compressed_paths.py +6 -1
- package/scripts/check_council_layout.py +11 -0
- package/scripts/ci_time_ratio.py +168 -0
- package/scripts/council_cli.py +2005 -30
- package/scripts/install.sh +12 -0
- package/scripts/measure_projection_bytes.py +159 -0
- package/scripts/measure_roadmap_trajectory.py +112 -0
- package/scripts/probe_projection_fidelity.py +202 -0
- package/scripts/score_skill_selection.py +198 -0
- package/scripts/skill_collision_clusters.py +162 -0
- /package/scripts/{_backfill_skill_domains.py → _archive/_backfill_skill_domains.py} +0 -0
- /package/scripts/{_bootstrap_tier_frontmatter.py → _archive/_bootstrap_tier_frontmatter.py} +0 -0
- /package/scripts/{_p43_bodies.py → _archive/_p43_bodies.py} +0 -0
- /package/scripts/{_p43_compress.py → _archive/_p43_compress.py} +0 -0
- /package/scripts/{_p4_migrate.py → _archive/_p4_migrate.py} +0 -0
- /package/scripts/{_phase2_shim_helper.py → _archive/_phase2_shim_helper.py} +0 -0
- /package/scripts/{_pilot_council_question.py → _archive/_pilot_council_question.py} +0 -0
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""Probation promote-and-prune for ``agents/low-impact-decisions.md``.
|
|
2
|
+
|
|
3
|
+
Phase 12 § Step 3. Runs at council startup AND after every intake
|
|
4
|
+
append. Idempotent — a second run on an unchanged corpus is a no-op.
|
|
5
|
+
|
|
6
|
+
Rules:
|
|
7
|
+
|
|
8
|
+
- **Prune.** For each ``## On Probation`` entry, drop any ``seen``
|
|
9
|
+
timestamp older than ``WINDOW_DAYS`` (default 30) from ``today``
|
|
10
|
+
(UTC). If the ``seen`` array empties, drop the whole entry.
|
|
11
|
+
- **Promote.** If the trimmed ``seen`` array has ≥ ``PROMOTION_THRESHOLD``
|
|
12
|
+
entries (default 3), move the entry to ``## Validated`` — strip the
|
|
13
|
+
``seen`` array, add ``validated <today>`` marker. One-way: a
|
|
14
|
+
Validated entry never falls back.
|
|
15
|
+
- **Log.** Returns :class:`GateRun` with the counts, suitable for
|
|
16
|
+
one-line session-artefact logging.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import re
|
|
22
|
+
from dataclasses import dataclass
|
|
23
|
+
from datetime import datetime, timedelta, timezone
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
WINDOW_DAYS = 30
|
|
27
|
+
PROMOTION_THRESHOLD = 3
|
|
28
|
+
|
|
29
|
+
_PROBATION_HEADER = "## On Probation"
|
|
30
|
+
_VALIDATED_HEADER = "## Validated"
|
|
31
|
+
_TERMINAL_HEADERS = (
|
|
32
|
+
"## Anti-Examples",
|
|
33
|
+
"## Security",
|
|
34
|
+
"## Provenance",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True)
|
|
39
|
+
class GateRun:
|
|
40
|
+
pruned_timestamps: int
|
|
41
|
+
dropped_entries: int
|
|
42
|
+
promoted_entries: int
|
|
43
|
+
|
|
44
|
+
def log_line(self) -> str:
|
|
45
|
+
return (
|
|
46
|
+
f"probation-gate: pruned {self.pruned_timestamps} stale "
|
|
47
|
+
f"timestamps; promoted {self.promoted_entries} entries; "
|
|
48
|
+
f"dropped {self.dropped_entries} expired entries"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def is_noop(self) -> bool:
|
|
53
|
+
return (self.pruned_timestamps == 0
|
|
54
|
+
and self.dropped_entries == 0
|
|
55
|
+
and self.promoted_entries == 0)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _today() -> datetime:
|
|
59
|
+
return datetime.now(timezone.utc)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _section_span(text: str, header: str) -> tuple[int, int] | None:
|
|
63
|
+
i = text.find(header)
|
|
64
|
+
if i < 0:
|
|
65
|
+
return None
|
|
66
|
+
body_start = text.find("\n", i) + 1
|
|
67
|
+
end = len(text)
|
|
68
|
+
for other in (_PROBATION_HEADER, _VALIDATED_HEADER) + _TERMINAL_HEADERS:
|
|
69
|
+
if other == header:
|
|
70
|
+
continue
|
|
71
|
+
j = text.find("\n" + other, body_start)
|
|
72
|
+
if 0 <= j < end:
|
|
73
|
+
end = j
|
|
74
|
+
return body_start, end
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _parse_probation_line(line: str) -> tuple[str, str, list[str]] | None:
|
|
78
|
+
m = re.match(
|
|
79
|
+
r'^(\s*-\s*"[^"]+")\s*—\s*first-seen\s+(\d{4}-\d{2}-\d{2})'
|
|
80
|
+
r'\s*·\s*seen\s*\[([^\]]*)\]\s*$',
|
|
81
|
+
line,
|
|
82
|
+
)
|
|
83
|
+
if not m:
|
|
84
|
+
return None
|
|
85
|
+
prefix = m.group(1)
|
|
86
|
+
first_seen = m.group(2)
|
|
87
|
+
seen_raw = m.group(3).strip()
|
|
88
|
+
seen = [s.strip() for s in seen_raw.split(",") if s.strip()] if seen_raw else []
|
|
89
|
+
return prefix, first_seen, seen
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _parse_date(s: str) -> datetime | None:
|
|
93
|
+
try:
|
|
94
|
+
return datetime.strptime(s, "%Y-%m-%d").replace(tzinfo=timezone.utc)
|
|
95
|
+
except ValueError:
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def run_gate(corpus_path: Path, *, today: datetime | None = None) -> GateRun:
|
|
100
|
+
"""Promote-and-prune pass. Writes corpus only when state changes."""
|
|
101
|
+
today = today or _today()
|
|
102
|
+
cutoff = today - timedelta(days=WINDOW_DAYS)
|
|
103
|
+
text = corpus_path.read_text(encoding="utf-8")
|
|
104
|
+
prob = _section_span(text, _PROBATION_HEADER)
|
|
105
|
+
val = _section_span(text, _VALIDATED_HEADER)
|
|
106
|
+
if not prob or not val:
|
|
107
|
+
return GateRun(0, 0, 0)
|
|
108
|
+
|
|
109
|
+
prob_body = text[prob[0]:prob[1]]
|
|
110
|
+
promoted: list[str] = []
|
|
111
|
+
pruned_ts = 0
|
|
112
|
+
dropped = 0
|
|
113
|
+
out_lines: list[str] = []
|
|
114
|
+
for line in prob_body.splitlines():
|
|
115
|
+
parsed = _parse_probation_line(line)
|
|
116
|
+
if parsed is None:
|
|
117
|
+
out_lines.append(line)
|
|
118
|
+
continue
|
|
119
|
+
prefix, first_seen, seen = parsed
|
|
120
|
+
original_len = len(seen)
|
|
121
|
+
fresh = [
|
|
122
|
+
s for s in seen
|
|
123
|
+
if (d := _parse_date(s)) is not None and d >= cutoff
|
|
124
|
+
]
|
|
125
|
+
pruned_ts += original_len - len(fresh)
|
|
126
|
+
if len(fresh) >= PROMOTION_THRESHOLD:
|
|
127
|
+
today_str = today.strftime("%Y-%m-%d")
|
|
128
|
+
promoted.append(
|
|
129
|
+
f'{prefix} — domain: low-impact · validated {today_str}'
|
|
130
|
+
)
|
|
131
|
+
continue
|
|
132
|
+
if not fresh:
|
|
133
|
+
dropped += 1
|
|
134
|
+
continue
|
|
135
|
+
out_lines.append(
|
|
136
|
+
f'{prefix} — first-seen {first_seen} · seen [{", ".join(fresh)}]'
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
new_prob_body = "\n".join(out_lines)
|
|
140
|
+
if not new_prob_body.endswith("\n"):
|
|
141
|
+
new_prob_body += "\n"
|
|
142
|
+
|
|
143
|
+
new_text = text[:prob[0]] + new_prob_body + text[prob[1]:]
|
|
144
|
+
if promoted:
|
|
145
|
+
v_start, v_end = _section_span(new_text, _VALIDATED_HEADER) # type: ignore[misc]
|
|
146
|
+
insertion = "\n".join(promoted) + "\n"
|
|
147
|
+
new_text = new_text[:v_end].rstrip() + "\n\n" + insertion + new_text[v_end:]
|
|
148
|
+
|
|
149
|
+
result = GateRun(pruned_ts, dropped, len(promoted))
|
|
150
|
+
if not result.is_noop:
|
|
151
|
+
corpus_path.write_text(new_text, encoding="utf-8")
|
|
152
|
+
return result
|
|
@@ -122,6 +122,42 @@ MUST:
|
|
|
122
122
|
evidence in the artefact).
|
|
123
123
|
""".strip()
|
|
124
124
|
|
|
125
|
+
ANALYSIS_MODE = """\
|
|
126
|
+
The artefact is a local analysis output (from a project analyzer,
|
|
127
|
+
audit script, or codebase scan). Critique the **analysis itself**, not
|
|
128
|
+
the underlying codebase. You MUST:
|
|
129
|
+
1. Flag findings that are restated under different headings —
|
|
130
|
+
deduplicate aggressively. The downstream consumer wants a unique
|
|
131
|
+
Top-N, not a long list with overlap.
|
|
132
|
+
2. Score the evidence quality of each finding: confirmed (the
|
|
133
|
+
analysis cites file:line / metric), inferred (plausible from
|
|
134
|
+
stated context), or speculative (no citation, vibes-only).
|
|
135
|
+
Speculative findings must be called out by name.
|
|
136
|
+
3. Identify findings that are roadmap-ready (concrete enough to land
|
|
137
|
+
as a phase step) vs ones that need a discovery loop first.
|
|
138
|
+
4. Propose 3–5 follow-up actions ranked by leverage — what the next
|
|
139
|
+
roadmap should attack first. Cite the supporting finding(s) by id
|
|
140
|
+
or heading.
|
|
141
|
+
End with: a Top-N consensus list (one bullet per finding the
|
|
142
|
+
analysis surfaces) plus a single sentence on the strongest blind
|
|
143
|
+
spot the analysis itself has.
|
|
144
|
+
""".strip()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
DEBATE_MODE = """\
|
|
148
|
+
The artefact is the topic of a structured multi-round debate. You are
|
|
149
|
+
one of several independent reviewers. Round-specific instructions:
|
|
150
|
+
1. Round 1 — state your strongest, most defensible position on the
|
|
151
|
+
topic. Argue from evidence and first principles. Do not hedge.
|
|
152
|
+
2. Round 2+ — read the anonymised positions from the previous round.
|
|
153
|
+
Identify the SINGLE strongest opposing position and write a
|
|
154
|
+
rebuttal addressed at its strongest steel-manned form. Your task
|
|
155
|
+
is to find the load-bearing flaw the opposing reviewer missed —
|
|
156
|
+
do NOT search for common ground.
|
|
157
|
+
End each round with: a one-line position summary and the single
|
|
158
|
+
piece of evidence that would change your mind.
|
|
159
|
+
""".strip()
|
|
160
|
+
|
|
125
161
|
|
|
126
162
|
_MODE_TABLE = {
|
|
127
163
|
"prompt": PROMPT_MODE,
|
|
@@ -131,9 +167,185 @@ _MODE_TABLE = {
|
|
|
131
167
|
"pr": PR_MODE,
|
|
132
168
|
"design": DESIGN_MODE,
|
|
133
169
|
"optimize": OPTIMIZE_MODE,
|
|
170
|
+
"analysis": ANALYSIS_MODE,
|
|
171
|
+
"debate": DEBATE_MODE,
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# ── Consensus-scoring prompts (Phase 4 / F3) ──────────────────────────
|
|
176
|
+
#
|
|
177
|
+
# Two-step extraction + scoring round used by the analysis lens. The
|
|
178
|
+
# extraction pass asks each member to surface its own top findings in
|
|
179
|
+
# a strict JSON shape; the scoring pass asks each member to rate
|
|
180
|
+
# anonymised findings produced by the *other* members.
|
|
181
|
+
#
|
|
182
|
+
# Iron Law of Neutrality applies to both: the extraction prompt never
|
|
183
|
+
# names other reviewers, and the scoring prompt strips the source
|
|
184
|
+
# author by using `Finding-A` / `Finding-B` labels (see
|
|
185
|
+
# `consensus.anonymize_findings`).
|
|
186
|
+
|
|
187
|
+
FINDING_EXTRACTION_PROMPT = """\
|
|
188
|
+
You have just produced an analysis. Re-emit your top findings as a
|
|
189
|
+
strict JSON array suitable for downstream tooling. Each item MUST
|
|
190
|
+
have:
|
|
191
|
+
|
|
192
|
+
{"id": "<short-slug>", "text": "<one-sentence finding>"}
|
|
193
|
+
|
|
194
|
+
Rules:
|
|
195
|
+
- 3-7 findings, ordered by importance (most important first).
|
|
196
|
+
- `id` is a 1-3 word kebab-case slug, unique within your array.
|
|
197
|
+
- `text` is a single sentence, no markdown, no reviewer self-reference.
|
|
198
|
+
- Wrap the array in a ```json``` fenced block. No commentary outside it.
|
|
199
|
+
""".strip()
|
|
200
|
+
|
|
201
|
+
FINDING_SCORING_PROMPT = """\
|
|
202
|
+
Below are findings from other independent reviewers, presented with
|
|
203
|
+
neutral labels (Finding-A, Finding-B, …). Score each one on its
|
|
204
|
+
merits. You MUST emit a strict JSON array, one entry per finding,
|
|
205
|
+
in this shape:
|
|
206
|
+
|
|
207
|
+
{"finding_id": "Finding-A", "score": 1-10, "agree": true|false,
|
|
208
|
+
"reason": "<one-sentence justification>"}
|
|
209
|
+
|
|
210
|
+
Rules:
|
|
211
|
+
- `score` is an integer 1 (weak / irrelevant) to 10 (load-bearing /
|
|
212
|
+
must-address).
|
|
213
|
+
- `agree=true` means you would surface this same finding yourself;
|
|
214
|
+
`agree=false` means you think it is wrong, overstated, or off-topic.
|
|
215
|
+
- `reason` is a single sentence, no markdown.
|
|
216
|
+
- Wrap the array in a ```json``` fenced block. No commentary outside it.
|
|
217
|
+
|
|
218
|
+
You may not see your own findings in the list — that is by design.
|
|
219
|
+
""".strip()
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
# ── Synthesis templates (Phase 3 / F2) ────────────────────────────────
|
|
223
|
+
#
|
|
224
|
+
# Lens-aware synthesis prompts. Each entry maps a lens key onto the
|
|
225
|
+
# block the host agent should produce when summarising member responses.
|
|
226
|
+
# R4 Q4 split: decision lenses get a Karpathy-structured template;
|
|
227
|
+
# creative lenses (design / optimize) stay open-ended prose (empty
|
|
228
|
+
# string → renderer falls back to the bare "Convergence / Divergence"
|
|
229
|
+
# slot). Input modes (prompt / roadmap / diff / files) map onto the
|
|
230
|
+
# `default` decision template via `synthesis_template()`.
|
|
231
|
+
|
|
232
|
+
DEFAULT_SYNTHESIS = """\
|
|
233
|
+
Summarise the council using the structured shape below. Be terse,
|
|
234
|
+
cite reviewers by label, and refuse to invent agreement that is not
|
|
235
|
+
in the responses.
|
|
236
|
+
|
|
237
|
+
### Agreement
|
|
238
|
+
Points that two or more reviewers converged on, each as a single line.
|
|
239
|
+
|
|
240
|
+
### Clashes
|
|
241
|
+
Points where reviewers disagreed. State both sides with a one-line
|
|
242
|
+
reviewer-label citation per side.
|
|
243
|
+
|
|
244
|
+
### Blind spots
|
|
245
|
+
Items that none of the reviewers raised but that the artefact's
|
|
246
|
+
context suggests are load-bearing. Maximum three. Mark each as
|
|
247
|
+
`needs-verification` when the host agent inferred it rather than
|
|
248
|
+
read it directly from a response.
|
|
249
|
+
|
|
250
|
+
### Recommendation
|
|
251
|
+
A single sentence: which course the host agent should advise the
|
|
252
|
+
user to take, grounded in the strongest converged point.
|
|
253
|
+
|
|
254
|
+
### Next step
|
|
255
|
+
One concrete next action the user can take in their current turn.
|
|
256
|
+
""".strip()
|
|
257
|
+
|
|
258
|
+
PR_SYNTHESIS = """\
|
|
259
|
+
Summarise the council with the PR-review shape below.
|
|
260
|
+
|
|
261
|
+
### Consensus
|
|
262
|
+
Findings where two or more reviewers agreed, each one a single line.
|
|
263
|
+
|
|
264
|
+
### Conflicts
|
|
265
|
+
Findings where reviewers disagreed. State both sides with reviewer
|
|
266
|
+
labels; do not pick a winner here — that lives in the recommendation.
|
|
267
|
+
|
|
268
|
+
### Must-fix before merge
|
|
269
|
+
Items at least one reviewer marked `REQUEST_CHANGES` or `REJECT`
|
|
270
|
+
and the host agent confirms are load-bearing. Maximum five.
|
|
271
|
+
|
|
272
|
+
### Recommendation
|
|
273
|
+
APPROVE / REQUEST_CHANGES / REJECT and a single sentence justifying
|
|
274
|
+
the verdict, anchored on the strongest consensus or must-fix line.
|
|
275
|
+
""".strip()
|
|
276
|
+
|
|
277
|
+
ANALYSIS_SYNTHESIS = """\
|
|
278
|
+
Summarise the council with the analysis-lens shape below.
|
|
279
|
+
|
|
280
|
+
### Top-10 by consensus
|
|
281
|
+
Findings ranked by how many reviewers surfaced them. Format each
|
|
282
|
+
line as: `N. <finding> — cited by <reviewer labels> · evidence:
|
|
283
|
+
confirmed | inferred | speculative · roadmap-ready: yes | needs-discovery`.
|
|
284
|
+
Stop at ten or when only single-reviewer items remain, whichever
|
|
285
|
+
comes first.
|
|
286
|
+
|
|
287
|
+
### Supporting
|
|
288
|
+
Findings that one reviewer raised and at least one other treated as
|
|
289
|
+
plausible but did not independently surface. One line each, same
|
|
290
|
+
metadata shape as Top-10.
|
|
291
|
+
|
|
292
|
+
### Outliers
|
|
293
|
+
Single-reviewer findings the others did not engage with. Keep them
|
|
294
|
+
— they are signal for a future deeper analysis pass — but mark each
|
|
295
|
+
as `unverified-by-council`.
|
|
296
|
+
""".strip()
|
|
297
|
+
|
|
298
|
+
# Creative lenses — open-ended prose, no template. The renderer keeps
|
|
299
|
+
# the bare "Convergence / Divergence" slot so the host agent can write
|
|
300
|
+
# free-form synthesis.
|
|
301
|
+
_CREATIVE_PASSTHROUGH = ""
|
|
302
|
+
|
|
303
|
+
_SYNTHESIS_TABLE = {
|
|
304
|
+
"default": DEFAULT_SYNTHESIS,
|
|
305
|
+
"pr": PR_SYNTHESIS,
|
|
306
|
+
"analysis": ANALYSIS_SYNTHESIS,
|
|
307
|
+
"design": _CREATIVE_PASSTHROUGH,
|
|
308
|
+
"optimize": _CREATIVE_PASSTHROUGH,
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
# Input modes inherit the `default` decision template. Lens overrides
|
|
312
|
+
# (`pr`/`design`/`optimize`/`analysis`) pick their own row.
|
|
313
|
+
_INPUT_MODE_TO_SYNTHESIS_KEY = {
|
|
314
|
+
"prompt": "default",
|
|
315
|
+
"roadmap": "default",
|
|
316
|
+
"diff": "default",
|
|
317
|
+
"files": "default",
|
|
134
318
|
}
|
|
135
319
|
|
|
136
320
|
|
|
321
|
+
def synthesis_template(mode: str | None) -> str:
|
|
322
|
+
"""Return the synthesis-prompt body for a given mode.
|
|
323
|
+
|
|
324
|
+
`mode=None` collapses to the `default` decision template (back-
|
|
325
|
+
compat for callers that do not thread the lens through). Unknown
|
|
326
|
+
modes raise ValueError — fail closed, never silently passthrough.
|
|
327
|
+
|
|
328
|
+
Returns an empty string for creative lenses (`design`/`optimize`)
|
|
329
|
+
so callers can detect "no template, render bare" without a magic
|
|
330
|
+
sentinel.
|
|
331
|
+
"""
|
|
332
|
+
if mode is None:
|
|
333
|
+
return _SYNTHESIS_TABLE["default"]
|
|
334
|
+
if mode in _SYNTHESIS_TABLE:
|
|
335
|
+
return _SYNTHESIS_TABLE[mode]
|
|
336
|
+
if mode in _INPUT_MODE_TO_SYNTHESIS_KEY:
|
|
337
|
+
return _SYNTHESIS_TABLE[_INPUT_MODE_TO_SYNTHESIS_KEY[mode]]
|
|
338
|
+
raise ValueError(
|
|
339
|
+
f"Unknown synthesis mode {mode!r}. "
|
|
340
|
+
f"Expected one of: {sorted(set(_SYNTHESIS_TABLE) | set(_INPUT_MODE_TO_SYNTHESIS_KEY))}"
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def all_synthesis_modes() -> list[str]:
|
|
345
|
+
"""Return the lens keys that have explicit synthesis templates."""
|
|
346
|
+
return sorted(_SYNTHESIS_TABLE)
|
|
347
|
+
|
|
348
|
+
|
|
137
349
|
def _strip_host_identity(text: str) -> str:
|
|
138
350
|
"""Drop any *whole line* containing a host-agent identity substring.
|
|
139
351
|
|
|
@@ -230,3 +442,126 @@ def system_prompt_for(
|
|
|
230
442
|
|
|
231
443
|
def all_modes() -> list[str]:
|
|
232
444
|
return sorted(_MODE_TABLE)
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def advisor_system_prompt(
|
|
448
|
+
persona_text: str,
|
|
449
|
+
*,
|
|
450
|
+
project: ProjectContext | None = None,
|
|
451
|
+
original_ask: str = "",
|
|
452
|
+
) -> str:
|
|
453
|
+
"""Build the system prompt for an advisor-mode call (Phase 6).
|
|
454
|
+
|
|
455
|
+
Layout: neutral handoff preamble (same shape every council member
|
|
456
|
+
sees, regardless of mode) + the advisor's persona body. The
|
|
457
|
+
mode-specific addendum from ``_MODE_TABLE`` is intentionally
|
|
458
|
+
replaced — the persona file owns the full instructional surface
|
|
459
|
+
for an advisor call.
|
|
460
|
+
"""
|
|
461
|
+
head = handoff_preamble(project, original_ask)
|
|
462
|
+
body = (persona_text or "").strip()
|
|
463
|
+
if not body:
|
|
464
|
+
raise ValueError("advisor_system_prompt: persona_text is empty.")
|
|
465
|
+
return f"{head}\n\n{body}"
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def build_extraction_user_prompt(original_analysis: str) -> str:
|
|
470
|
+
"""User-message body for the finding-extraction pass.
|
|
471
|
+
|
|
472
|
+
Pairs the prior analysis text with the extraction-prompt rules so
|
|
473
|
+
the member re-emits its own findings in machine-readable form.
|
|
474
|
+
"""
|
|
475
|
+
cleaned = _strip_host_identity(original_analysis or "").strip()
|
|
476
|
+
return f"{FINDING_EXTRACTION_PROMPT}\n\n---\n\n{cleaned}"
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def build_scoring_user_prompt(anonymised: dict[str, str]) -> str:
|
|
480
|
+
"""User-message body for the scoring pass.
|
|
481
|
+
|
|
482
|
+
`anonymised` maps `Finding-A`/`Finding-B`/… → finding text. Author
|
|
483
|
+
identities MUST already be stripped — this function does NOT
|
|
484
|
+
re-anonymise, it just renders.
|
|
485
|
+
"""
|
|
486
|
+
lines = [FINDING_SCORING_PROMPT, "", "---", ""]
|
|
487
|
+
for label, text in anonymised.items():
|
|
488
|
+
lines.append(f"### {label}\n\n{text}")
|
|
489
|
+
return "\n\n".join(lines)
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
# ── Peer-review (Phase 5 / F1, Karpathy anonymous review) ────────────
|
|
493
|
+
#
|
|
494
|
+
# After the final deliberation round, each member sees the OTHER
|
|
495
|
+
# members' deliberation outputs under neutral `Response-A` / `Response-B`
|
|
496
|
+
# labels and produces a Karpathy-style critique: strongest response,
|
|
497
|
+
# weakest blind spot, what all of them missed. Provider identity is
|
|
498
|
+
# stripped (Iron Law of Neutrality § peer-review); advisor persona
|
|
499
|
+
# labels (Phase 6) are preserved by the caller via `anonymize_responses`.
|
|
500
|
+
#
|
|
501
|
+
# Reviewers never see their own response — that is by design (the
|
|
502
|
+
# orchestrator filters self before calling `build_peer_review_user_prompt`).
|
|
503
|
+
|
|
504
|
+
PEER_REVIEW_PROMPT = """\
|
|
505
|
+
Below are responses from other independent reviewers to the same
|
|
506
|
+
artefact you just reviewed. Each is labelled with a neutral identifier
|
|
507
|
+
(`Response-A`, `Response-B`, …). You do NOT know which model produced
|
|
508
|
+
which response. Critique them as a peer — your goal is to surface
|
|
509
|
+
signal the round-1 deliberation may have missed.
|
|
510
|
+
|
|
511
|
+
Respond in plain prose under exactly these four headings:
|
|
512
|
+
|
|
513
|
+
### Strongest response
|
|
514
|
+
Name the single response whose argument or evidence is most
|
|
515
|
+
load-bearing. Cite the label. One paragraph.
|
|
516
|
+
|
|
517
|
+
### Weakest blind spot
|
|
518
|
+
The single most important thing one specific response missed,
|
|
519
|
+
glossed over, or got wrong. Cite the label. One paragraph.
|
|
520
|
+
|
|
521
|
+
### What everyone missed
|
|
522
|
+
A point none of the responses raised but that the artefact's context
|
|
523
|
+
suggests is load-bearing. One paragraph. Mark as `needs-verification`
|
|
524
|
+
when you inferred it rather than read it directly from the artefact.
|
|
525
|
+
|
|
526
|
+
### Refinement
|
|
527
|
+
One sentence: which course the synthesizer should prefer in light of
|
|
528
|
+
the above, grounded in the strongest converged signal.
|
|
529
|
+
|
|
530
|
+
Rules:
|
|
531
|
+
- Cite labels exactly as given (`Response-A`, not `A` or `the first one`).
|
|
532
|
+
- Do not invent agreement or disagreement that is not visible in the
|
|
533
|
+
responses themselves.
|
|
534
|
+
- You may NOT see your own response in the list — that is by design.
|
|
535
|
+
""".strip()
|
|
536
|
+
|
|
537
|
+
PEER_REVIEW_SYNTHESIS_ADDENDUM = """\
|
|
538
|
+
|
|
539
|
+
### Peer-Review-Surfaced Blind Spots
|
|
540
|
+
Items the peer-review round surfaced that the round-1 responses did
|
|
541
|
+
not. Cite the peer-reviewer label and the targeted response label
|
|
542
|
+
(`Reviewer A on Response-B: <one-line summary>`). Maximum three.
|
|
543
|
+
""".rstrip()
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def build_peer_review_user_prompt(anonymised: dict[str, str]) -> str:
|
|
547
|
+
"""User-message body for the peer-review pass.
|
|
548
|
+
|
|
549
|
+
`anonymised` maps `Response-A` / `Response-B` / … → response text.
|
|
550
|
+
Provider identities MUST already be stripped by the caller (see
|
|
551
|
+
`consensus.anonymize_responses`); this function does NOT re-anonymise,
|
|
552
|
+
it just renders.
|
|
553
|
+
"""
|
|
554
|
+
lines = [PEER_REVIEW_PROMPT, "", "---", ""]
|
|
555
|
+
for label, text in anonymised.items():
|
|
556
|
+
lines.append(f"### {label}\n\n{text}")
|
|
557
|
+
return "\n\n".join(lines)
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def peer_review_synthesis_addendum() -> str:
|
|
561
|
+
"""Return the synthesis-template addendum used when peer-review fired.
|
|
562
|
+
|
|
563
|
+
Appended to the lens-specific synthesis template by the renderer.
|
|
564
|
+
Creative-lens (prose) runs receive only the bare section header so
|
|
565
|
+
the host agent can write free-form synthesis underneath it.
|
|
566
|
+
"""
|
|
567
|
+
return PEER_REVIEW_SYNTHESIS_ADDENDUM
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""Privacy floor for `agents/low-impact-decisions.md` (Phase 12).
|
|
2
|
+
|
|
3
|
+
Non-bypassable redactor invoked on intake (write-side) AND on
|
|
4
|
+
upstream (`/learn-low-impact`, leave-the-repo side). Both gates call
|
|
5
|
+
:func:`redact_low_impact_entry` and refuse to proceed when a forbidden
|
|
6
|
+
pattern fires.
|
|
7
|
+
|
|
8
|
+
Iron Law: nothing leaves the project repo until this redactor clears
|
|
9
|
+
the entry. See ``.augment/rules/low-impact-corpus-privacy-floor.md``.
|
|
10
|
+
|
|
11
|
+
Forbidden-content classes (per Phase 12 § Step 4):
|
|
12
|
+
|
|
13
|
+
1. Secrets — raw-key prefixes mirrored from
|
|
14
|
+
:data:`scripts.ai_council.config._RAW_KEY_PREFIXES`, plus a
|
|
15
|
+
generic ``api[-_]?key:\\s*<token>`` shape.
|
|
16
|
+
2. Emails — RFC-5322-ish shape, deliberately permissive.
|
|
17
|
+
3. Project-rooted paths — anything starting ``/Users/``, ``/home/``,
|
|
18
|
+
``/opt/``, ``/private/``, drive letters (``C:\\``), or the
|
|
19
|
+
configured repo root from ``.agent-settings.yml`` when supplied.
|
|
20
|
+
4. Customer / tenant names — caller passes a name list (project
|
|
21
|
+
policy); generic placeholders ``<customer>``, ``<tenant>``,
|
|
22
|
+
``<account>``, ``<user>`` survive.
|
|
23
|
+
5. Internal hostnames — ``*.internal``, ``*.local``, plus any
|
|
24
|
+
project-private domain the caller supplies.
|
|
25
|
+
6. Monetary amounts — ``$1,234`` / ``€500`` / ``USD 1000`` shapes
|
|
26
|
+
that look like business figures (lone ``$0.05`` cap mentions in
|
|
27
|
+
curly-brace context are skipped via the call-site, not here).
|
|
28
|
+
7. Business-context SQL identifiers — caller-supplied table /
|
|
29
|
+
column allow-list. Default empty.
|
|
30
|
+
8. Inline code excerpts > 40 chars — any backtick-fenced run > 40.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
from __future__ import annotations
|
|
34
|
+
|
|
35
|
+
import re
|
|
36
|
+
from dataclasses import dataclass, field
|
|
37
|
+
from pathlib import Path
|
|
38
|
+
from typing import Iterable
|
|
39
|
+
|
|
40
|
+
from scripts.ai_council.config import _RAW_KEY_PREFIXES
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass(frozen=True)
|
|
44
|
+
class RedactionViolation:
|
|
45
|
+
"""Single forbidden-pattern hit."""
|
|
46
|
+
|
|
47
|
+
category: str
|
|
48
|
+
snippet: str
|
|
49
|
+
note: str = ""
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass(frozen=True)
|
|
53
|
+
class RedactionResult:
|
|
54
|
+
"""Outcome of one redaction pass."""
|
|
55
|
+
|
|
56
|
+
ok: bool
|
|
57
|
+
violations: tuple[RedactionViolation, ...] = ()
|
|
58
|
+
|
|
59
|
+
def summary(self) -> str:
|
|
60
|
+
if self.ok:
|
|
61
|
+
return "redaction: clean"
|
|
62
|
+
parts = [f"{v.category}: {v.snippet!r}" for v in self.violations]
|
|
63
|
+
return "redaction REFUSED — " + "; ".join(parts)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
_EMAIL_RE = re.compile(r"\b[\w.+-]+@[\w-]+\.[\w.-]+\b")
|
|
67
|
+
_PATH_RE = re.compile(
|
|
68
|
+
r"(?:^|[\s\"'(])"
|
|
69
|
+
r"(?:/Users/|/home/|/opt/|/private/|[A-Z]:\\)"
|
|
70
|
+
r"[\w.\-/\\]+"
|
|
71
|
+
)
|
|
72
|
+
_INTERNAL_HOST_RE = re.compile(
|
|
73
|
+
r"\b[a-zA-Z0-9][\w.-]*\.(?:internal|local)\b",
|
|
74
|
+
re.IGNORECASE,
|
|
75
|
+
)
|
|
76
|
+
_MONEY_RE = re.compile(
|
|
77
|
+
r"(?:[\$€£¥]\s?\d{1,3}(?:[,.]\d{3})*(?:\.\d+)?"
|
|
78
|
+
r"|\b(?:USD|EUR|GBP|JPY)\s?\d+(?:[,.]\d+)?)"
|
|
79
|
+
)
|
|
80
|
+
_API_KEY_RE = re.compile(
|
|
81
|
+
r"(?i)\bapi[_-]?key\b\s*[:=]\s*[A-Za-z0-9+/=_\-]{12,}"
|
|
82
|
+
)
|
|
83
|
+
_CODE_FENCE_RE = re.compile(r"`([^`]{41,})`")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _check_secrets(text: str) -> list[RedactionViolation]:
|
|
87
|
+
hits: list[RedactionViolation] = []
|
|
88
|
+
for prefix in _RAW_KEY_PREFIXES:
|
|
89
|
+
pat = re.compile(re.escape(prefix) + r"[A-Za-z0-9_\-]{6,}")
|
|
90
|
+
m = pat.search(text)
|
|
91
|
+
if m:
|
|
92
|
+
hits.append(RedactionViolation(
|
|
93
|
+
"secret", m.group(0)[:8] + "…",
|
|
94
|
+
f"raw-key prefix {prefix!r}",
|
|
95
|
+
))
|
|
96
|
+
m = _API_KEY_RE.search(text)
|
|
97
|
+
if m:
|
|
98
|
+
hits.append(RedactionViolation("secret", m.group(0)[:20] + "…",
|
|
99
|
+
"inline api_key"))
|
|
100
|
+
return hits
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _check_patterns(text: str, repo_root: str | None,
|
|
104
|
+
private_domains: Iterable[str],
|
|
105
|
+
customer_names: Iterable[str],
|
|
106
|
+
sql_identifiers: Iterable[str]) -> list[RedactionViolation]:
|
|
107
|
+
hits: list[RedactionViolation] = []
|
|
108
|
+
for m in _EMAIL_RE.finditer(text):
|
|
109
|
+
hits.append(RedactionViolation("email", m.group(0)))
|
|
110
|
+
for m in _PATH_RE.finditer(text):
|
|
111
|
+
hits.append(RedactionViolation("project_path", m.group(0).strip()))
|
|
112
|
+
if repo_root and repo_root in text:
|
|
113
|
+
hits.append(RedactionViolation("project_path", repo_root,
|
|
114
|
+
"configured repo root"))
|
|
115
|
+
for m in _INTERNAL_HOST_RE.finditer(text):
|
|
116
|
+
hits.append(RedactionViolation("internal_hostname", m.group(0)))
|
|
117
|
+
for dom in private_domains:
|
|
118
|
+
if dom and dom in text:
|
|
119
|
+
hits.append(RedactionViolation("internal_hostname", dom,
|
|
120
|
+
"configured private domain"))
|
|
121
|
+
for m in _MONEY_RE.finditer(text):
|
|
122
|
+
hits.append(RedactionViolation("monetary_amount", m.group(0)))
|
|
123
|
+
for name in customer_names:
|
|
124
|
+
if name and re.search(rf"\b{re.escape(name)}\b", text, re.IGNORECASE):
|
|
125
|
+
hits.append(RedactionViolation("customer_name", name))
|
|
126
|
+
for ident in sql_identifiers:
|
|
127
|
+
if ident and re.search(rf"\b{re.escape(ident)}\b", text):
|
|
128
|
+
hits.append(RedactionViolation("sql_identifier", ident))
|
|
129
|
+
for m in _CODE_FENCE_RE.finditer(text):
|
|
130
|
+
hits.append(RedactionViolation("long_code_excerpt",
|
|
131
|
+
m.group(1)[:40] + "…",
|
|
132
|
+
f"{len(m.group(1))} chars"))
|
|
133
|
+
return hits
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def redact_low_impact_entry(
|
|
137
|
+
text: str,
|
|
138
|
+
*,
|
|
139
|
+
repo_root: str | None = None,
|
|
140
|
+
private_domains: Iterable[str] = (),
|
|
141
|
+
customer_names: Iterable[str] = (),
|
|
142
|
+
sql_identifiers: Iterable[str] = (),
|
|
143
|
+
) -> RedactionResult:
|
|
144
|
+
"""Run the privacy floor over ``text``. Returns clean or refused.
|
|
145
|
+
|
|
146
|
+
The redactor never auto-rewrites the entry — that would be a soft
|
|
147
|
+
privacy gate. It refuses + surfaces what to rephrase, which keeps
|
|
148
|
+
the user in the loop and the audit trail honest.
|
|
149
|
+
"""
|
|
150
|
+
violations: list[RedactionViolation] = []
|
|
151
|
+
violations.extend(_check_secrets(text))
|
|
152
|
+
violations.extend(_check_patterns(
|
|
153
|
+
text, repo_root, private_domains, customer_names, sql_identifiers,
|
|
154
|
+
))
|
|
155
|
+
return RedactionResult(ok=not violations, violations=tuple(violations))
|