@event4u/agent-config 4.9.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/.agent-src/commands/implement-ticket.md +5 -4
  2. package/.agent-src/rules/language-and-tone.md +4 -10
  3. package/.agent-src/skills/command-routing/SKILL.md +5 -4
  4. package/.claude-plugin/marketplace.json +1 -1
  5. package/CHANGELOG.md +73 -0
  6. package/CONTRIBUTING.md +19 -0
  7. package/README.md +11 -0
  8. package/dist/cli/registry.js +0 -2
  9. package/dist/cli/registry.js.map +1 -1
  10. package/dist/discovery/deprecation-report.md +1 -1
  11. package/dist/discovery/discovery-manifest.json +5 -5
  12. package/dist/discovery/discovery-manifest.json.sha256 +1 -1
  13. package/dist/discovery/discovery-manifest.summary.md +1 -1
  14. package/dist/discovery/orphan-report.md +1 -1
  15. package/dist/discovery/packs.json +2 -2
  16. package/dist/discovery/trust-report.md +1 -1
  17. package/dist/discovery/workspaces.json +2 -2
  18. package/dist/mcp/registry-manifest.json +2 -2
  19. package/dist/router.json +1 -1671
  20. package/docs/benchmark.md +20 -8
  21. package/docs/benchmarks.md +11 -0
  22. package/docs/contracts/benchmark-corpus-spec.md +31 -3
  23. package/docs/contracts/command-surface-tiers.md +1 -1
  24. package/docs/contracts/hook-architecture-v1.md +33 -0
  25. package/docs/contracts/migrate-command.md +197 -0
  26. package/docs/contracts/settings-api.md +2 -1
  27. package/docs/contracts/value-dashboard-spec.md +374 -0
  28. package/docs/contracts/value-report-schema.md +150 -0
  29. package/docs/decisions/ADR-031-validation-severity-tiers-and-projection-roundtrip.md +97 -0
  30. package/docs/decisions/INDEX.md +1 -0
  31. package/docs/guidelines/agent-infra/installed-tools-manifest.md +6 -3
  32. package/docs/guidelines/agent-infra/language-and-tone-examples.md +35 -0
  33. package/docs/migration/v1-to-v2.md +40 -27
  34. package/docs/value.md +84 -0
  35. package/package.json +8 -8
  36. package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
  37. package/scripts/_cli/cmd_migrate.py +264 -102
  38. package/scripts/_cli/cmd_settings_migrate.py +2 -1
  39. package/scripts/_dispatch.bash +147 -49
  40. package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
  41. package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
  42. package/scripts/_lib/install_regenerator.py +129 -0
  43. package/scripts/_lib/value_ladder.py +599 -0
  44. package/scripts/_lib/value_report.py +441 -0
  45. package/scripts/bench_rtk_savings.py +320 -0
  46. package/scripts/compile_router.py +19 -5
  47. package/scripts/expected_perms.json +1 -1
  48. package/scripts/first_run_gate_hook.py +178 -0
  49. package/scripts/hook_manifest.yaml +16 -7
  50. package/scripts/hooks/dispatch_hook.py +27 -0
  51. package/scripts/hooks/dispatch_issues.py +136 -0
  52. package/scripts/hooks_doctor.py +40 -1
  53. package/scripts/install.py +25 -21
  54. package/scripts/lint_agents_layout.py +5 -4
  55. package/scripts/lint_bench_corpus.py +86 -4
  56. package/scripts/lint_global_paths.py +4 -3
  57. package/scripts/lint_marketplace_install_completeness.py +188 -0
  58. package/scripts/lint_value_dashboard.py +218 -0
  59. package/scripts/render_benchmark_md.py +6 -2
  60. package/scripts/render_value_md.py +355 -0
  61. package/scripts/repro/repro_marketplace_install_gap.sh +161 -0
  62. package/scripts/roadmap_progress_hook.py +23 -0
  63. package/scripts/router_telemetry.py +470 -0
  64. package/scripts/validate_frontmatter.py +23 -9
  65. package/scripts/_cli/cmd_migrate_to_global.py +0 -415
@@ -0,0 +1,161 @@
1
+ #!/usr/bin/env bash
2
+ # Reproduce the silent marketplace-install gap that this roadmap fixes.
3
+ #
4
+ # Phase 0 of `road-to-hooks-actually-fire-in-consumers`.
5
+ #
6
+ # Simulates a consumer project that ran `/plugin install
7
+ # agent-config@event4u-agent-config` but NEVER ran `agent-config init`.
8
+ # The plugin's hooks.json fires under Claude's lifecycle, but every
9
+ # resolved command (`"$CLAUDE_PROJECT_DIR"/agent-config dispatch:hook
10
+ # …`) errors out silently because the prerequisites do not exist.
11
+ #
12
+ # Expected output (the bug):
13
+ # - Dispatcher exits 0 (never-block contract)
14
+ # - NO `agents/roadmaps-progress.md` written
15
+ # - NO state file under `agents/runtime/state/`
16
+ # - Hook tried, hook failed, no trace left behind
17
+ #
18
+ # Once Phases 1-4 land, the same script should produce a
19
+ # `dispatch-issues.jsonl` entry naming the missing artefact.
20
+
21
+ set -euo pipefail
22
+
23
+ REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
24
+ TMPDIR_BASE="${TMPDIR:-/tmp}"
25
+ CONSUMER_ROOT="$(mktemp -d "$TMPDIR_BASE/marketplace-install-gap-XXXXXX")"
26
+ EVIDENCE_FILE="$REPO_ROOT/agents/evidence/analysis/hooks-marketplace-gap-2026-05-29.md"
27
+
28
+ cleanup() {
29
+ rm -rf "$CONSUMER_ROOT" 2>/dev/null || true
30
+ }
31
+ trap cleanup EXIT
32
+
33
+ echo "==> Setting up synthetic marketplace-install consumer at: $CONSUMER_ROOT"
34
+
35
+ # 1. Write only the marketplace-install end-state (.claude/settings.json
36
+ # with the plugin enabled). Nothing else — no symlink, no regenerator,
37
+ # no .augment/, no agents/runtime/state/.
38
+ mkdir -p "$CONSUMER_ROOT/.claude"
39
+ cat > "$CONSUMER_ROOT/.claude/settings.json" <<'JSON'
40
+ {
41
+ "enabledPlugins": {
42
+ "agent-config@event4u-agent-config": true
43
+ }
44
+ }
45
+ JSON
46
+
47
+ # 2. Fake roadmap so a hook on path_prefix `agents/roadmaps/` has a
48
+ # target to react to. (For Phase 0 we don't actually run a hook
49
+ # that depends on this — but it documents the file layout.)
50
+ mkdir -p "$CONSUMER_ROOT/agents/roadmaps"
51
+ cat > "$CONSUMER_ROOT/agents/roadmaps/road-to-fake.md" <<'MD'
52
+ ---
53
+ complexity: lightweight
54
+ ---
55
+ # Roadmap: fake
56
+
57
+ ## Phase 1
58
+
59
+ - [ ] **Step 1:** placeholder
60
+ MD
61
+
62
+ # 3. Capture the missing-artefact inventory BEFORE we try anything.
63
+ echo
64
+ echo "==> Missing-artefact inventory:"
65
+ INVENTORY=""
66
+ for artefact in \
67
+ ".claude/settings.json (plugin enabled)" \
68
+ "agent-config symlink" \
69
+ ".augment/scripts/update_roadmap_progress.py" \
70
+ ".agent-src/scripts/update_roadmap_progress.py" \
71
+ ".agent-src.uncondensed/scripts/update_roadmap_progress.py" \
72
+ ".git/hooks/pre-commit" \
73
+ "agents/runtime/state/"
74
+ do
75
+ # Strip parenthetical for the existence check.
76
+ path_only="${artefact% (*}"
77
+ if [ -e "$CONSUMER_ROOT/$path_only" ] || [ -L "$CONSUMER_ROOT/$path_only" ]; then
78
+ status="present"
79
+ else
80
+ status="MISSING"
81
+ fi
82
+ line=" $status: $artefact"
83
+ INVENTORY="$INVENTORY$line"$'\n'
84
+ echo "$line"
85
+ done
86
+
87
+ # 4. Emit a synthetic PostToolUse JSON envelope on stdin to the
88
+ # dispatch hook, simulating what Claude Code would send when an
89
+ # agent writes to the fake roadmap.
90
+ echo
91
+ echo "==> Invoking dispatch hook with synthetic PostToolUse envelope..."
92
+ ENVELOPE=$(cat <<JSON
93
+ {
94
+ "session_id": "repro-marketplace-gap",
95
+ "transcript_path": "/dev/null",
96
+ "cwd": "$CONSUMER_ROOT",
97
+ "hook_event_name": "PostToolUse",
98
+ "tool_name": "Write",
99
+ "tool_input": {
100
+ "file_path": "$CONSUMER_ROOT/agents/roadmaps/road-to-fake.md"
101
+ },
102
+ "tool_response": {}
103
+ }
104
+ JSON
105
+ )
106
+
107
+ DISPATCH_STDERR="$(mktemp "$TMPDIR_BASE/dispatch-stderr-XXXXXX")"
108
+ DISPATCH_EXIT=0
109
+ echo "$ENVELOPE" | (
110
+ cd "$CONSUMER_ROOT"
111
+ CLAUDE_PROJECT_DIR="$CONSUMER_ROOT" python3 "$REPO_ROOT/scripts/hooks/dispatch_hook.py" \
112
+ --platform claude --event post_tool_use --native-event PostToolUse \
113
+ 2>"$DISPATCH_STDERR"
114
+ ) || DISPATCH_EXIT=$?
115
+
116
+ echo " dispatcher exit code: $DISPATCH_EXIT"
117
+ echo " dispatcher stderr:"
118
+ sed 's/^/ /' "$DISPATCH_STDERR" | head -20
119
+
120
+ # 5. Confirm the silent-no-op shape.
121
+ echo
122
+ echo "==> Verifying the silent no-op:"
123
+
124
+ DASHBOARD_EXISTS="no"
125
+ if [ -e "$CONSUMER_ROOT/agents/roadmaps-progress.md" ]; then
126
+ DASHBOARD_EXISTS="yes"
127
+ fi
128
+ echo " agents/roadmaps-progress.md written: $DASHBOARD_EXISTS (expected: no)"
129
+
130
+ STATE_FILES=0
131
+ if [ -d "$CONSUMER_ROOT/agents/runtime/state" ]; then
132
+ STATE_FILES=$(find "$CONSUMER_ROOT/agents/runtime/state" -type f 2>/dev/null | wc -l | tr -d ' ')
133
+ fi
134
+ echo " state files under agents/runtime/state/: $STATE_FILES (expected: 0)"
135
+
136
+ # Phase-1-aware check: after that phase lands, dispatch-issues.jsonl
137
+ # should exist with at least one entry. Today (pre-Phase-1) it does
138
+ # not. The script reports both shapes.
139
+ DISPATCH_ISSUES="no"
140
+ if [ -e "$CONSUMER_ROOT/agents/runtime/state/dispatch-issues.jsonl" ]; then
141
+ DISPATCH_ISSUES="yes"
142
+ fi
143
+ echo " agents/runtime/state/dispatch-issues.jsonl: $DISPATCH_ISSUES (pre-Phase-1: no; post-Phase-1: yes)"
144
+
145
+ rm -f "$DISPATCH_STDERR"
146
+
147
+ # 6. Append evidence.
148
+ mkdir -p "$(dirname "$EVIDENCE_FILE")"
149
+ {
150
+ printf '## %s — repro run\n\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
151
+ printf 'Tmp consumer root: `%s`\n\n' "$CONSUMER_ROOT"
152
+ printf 'Inventory:\n\n'
153
+ printf '```\n%s```\n\n' "$INVENTORY"
154
+ printf 'Dispatcher exit: `%s`\n' "$DISPATCH_EXIT"
155
+ printf 'Dashboard written: `%s`\n' "$DASHBOARD_EXISTS"
156
+ printf 'State files: `%s`\n' "$STATE_FILES"
157
+ printf 'dispatch-issues.jsonl: `%s`\n\n' "$DISPATCH_ISSUES"
158
+ } >> "$EVIDENCE_FILE"
159
+
160
+ echo
161
+ echo "==> Evidence appended to: ${EVIDENCE_FILE#$REPO_ROOT/}"
@@ -132,6 +132,29 @@ def run(stdin_text: str, *, consumer_root: Path, verbose: bool = False) -> int:
132
132
 
133
133
  script = _resolve_regenerator(consumer_root)
134
134
  if script is None:
135
+ # Phase 1 of road-to-hooks-actually-fire-in-consumers: log
136
+ # dispatch issue directly (this hook runs as a subprocess from
137
+ # the universal dispatcher; routing through the dispatcher
138
+ # would add latency for no benefit).
139
+ try:
140
+ sys.path.insert(0, str(Path(__file__).resolve().parent / "hooks"))
141
+ from dispatch_issues import log_dispatch_issue # noqa: PLC0415
142
+ log_dispatch_issue(
143
+ workspace_root=consumer_root,
144
+ hook="roadmap-progress",
145
+ issue="prerequisite_missing",
146
+ detail=(
147
+ "update_roadmap_progress.py not found at any of: "
148
+ ".augment/scripts/, .agent-src/scripts/, "
149
+ ".agent-src.uncondensed/scripts/"
150
+ ),
151
+ resolution=(
152
+ "./agent-config hooks:install --regen "
153
+ "(or ./agent-config init)"
154
+ ),
155
+ )
156
+ except (ImportError, OSError):
157
+ pass # observability never breaks the hook
135
158
  if verbose:
136
159
  print("roadmap-progress-hook: regenerator not found, skipping",
137
160
  file=sys.stderr)
@@ -0,0 +1,470 @@
1
+ #!/usr/bin/env python3
2
+ """Local replay of the router against a corpus — pure, no API spend.
3
+
4
+ Phase 3 of `agents/roadmaps/road-to-value-dashboard-netto-cuts.md`.
5
+
6
+ For each prompt in a corpus, applies the same trigger-match logic
7
+ agent hosts would apply at runtime against `dist/router.json`:
8
+
9
+ - kernel rules: always active (no triggers, always-on by definition).
10
+ - tier_1 + tier_2 rules: active iff any trigger matches the prompt
11
+ text (plus optional open-files / command context).
12
+
13
+ Trigger semantics implemented:
14
+
15
+ | Type | Match rule |
16
+ |----------------|------------------------------------------------------------------|
17
+ | `keyword` | case-insensitive substring inside the prompt text |
18
+ | `phrase` | case-insensitive substring (multi-word) inside the prompt text |
19
+ | `command` | case-sensitive prefix on `command:` field (optional context) |
20
+ | `intent` | informational only — never auto-matches; counted separately |
21
+ | `path_prefix` | prefix match against any path in `open_files` (optional context) |
22
+ | `file_pattern` | fnmatch against any path in `open_files` (optional context) |
23
+
24
+ Rules a task expects to fire only via `intent` (or a router coverage
25
+ gap) the static replay cannot see are declared in the corpus field
26
+ `replay_opaque_triggers`. They surface in `intended_vs_observed_match`
27
+ under `replay_opaque` and are excluded from both `missed_intended`
28
+ (no false drift) and `unintended_activations`.
29
+
30
+ Reports go to `internal/bench/reports/router-telemetry/<UTC>.json`
31
+ with three blocks:
32
+
33
+ - `per_trigger_hits` — count of times each trigger fired
34
+ - `per_rule_activations` — count of times each rule activated
35
+ - `panel_b_untouchable_rules` — tier-1 rules that activated on ≥ 1
36
+ Track B task; hard floor for Phase 5
37
+
38
+ Sample size is capped per corpus (`--sample-cap`, default 200).
39
+ Larger corpora are replayed deterministically over the first N
40
+ sorted-by-id prompts.
41
+
42
+ Honours `--quiet` per the script-output convention.
43
+ """
44
+ from __future__ import annotations
45
+
46
+ import argparse
47
+ import fnmatch
48
+ import json
49
+ import sys
50
+ from datetime import datetime, timezone
51
+ from pathlib import Path
52
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
53
+
54
+ try:
55
+ import yaml
56
+ except ImportError:
57
+ yaml = None # type: ignore[assignment]
58
+
59
+
60
+ REPO_ROOT = Path(__file__).resolve().parent.parent
61
+ ROUTER_JSON = REPO_ROOT / "dist" / "router.json"
62
+ DEFAULT_OUT_DIR = REPO_ROOT / "internal" / "bench" / "reports" / "router-telemetry"
63
+ DEFAULT_SAMPLE_CAP = 200
64
+
65
+ # Track B corpus = the Panel B evidence basis; rules that fire on its
66
+ # tasks are the attribution map and become the untouchable set.
67
+ TRACK_B_CORPUS_REL = "internal/bench/corpora/ab-trackb.yaml"
68
+
69
+
70
+ def _log(msg: str, quiet: bool, *, err: bool = False) -> None:
71
+ if err:
72
+ print(msg, file=sys.stderr)
73
+ elif not quiet:
74
+ print(msg)
75
+
76
+
77
+ def _utc_iso() -> str:
78
+ return datetime.now(timezone.utc).isoformat(timespec="seconds")
79
+
80
+
81
+ # ── Trigger matching ────────────────────────────────────────────────────
82
+
83
+
84
+ def trigger_matches(
85
+ trigger: Dict[str, Any],
86
+ prompt: str,
87
+ open_files: Optional[Iterable[str]] = None,
88
+ command: Optional[str] = None,
89
+ ) -> bool:
90
+ """Apply one trigger to a prompt + context; return True on match."""
91
+ prompt_lower = prompt.lower()
92
+ if "keyword" in trigger:
93
+ return str(trigger["keyword"]).lower() in prompt_lower
94
+ if "phrase" in trigger:
95
+ return str(trigger["phrase"]).lower() in prompt_lower
96
+ if "command" in trigger:
97
+ if not command:
98
+ return False
99
+ return command.startswith(str(trigger["command"]))
100
+ if "path_prefix" in trigger:
101
+ if not open_files:
102
+ return False
103
+ pref = str(trigger["path_prefix"])
104
+ return any(str(p).startswith(pref) for p in open_files)
105
+ if "file_pattern" in trigger:
106
+ if not open_files:
107
+ return False
108
+ pat = str(trigger["file_pattern"])
109
+ return any(fnmatch.fnmatch(str(p), pat) for p in open_files)
110
+ if "intent" in trigger:
111
+ # Intent triggers are informational and never auto-match.
112
+ return False
113
+ return False
114
+
115
+
116
+ def match_prompt(
117
+ router: Dict[str, Any],
118
+ prompt: str,
119
+ profile: str = "full",
120
+ open_files: Optional[Iterable[str]] = None,
121
+ command: Optional[str] = None,
122
+ ) -> Dict[str, Any]:
123
+ """Return the matched-triggers + activated-rules for one prompt.
124
+
125
+ Kernel rules are always active. tier_1 always considered. tier_2
126
+ only considered when `profile == 'full'`.
127
+ """
128
+ tiers = [("tier_1", router.get("tier_1", []) or [])]
129
+ if profile == "full":
130
+ tiers.append(("tier_2", router.get("tier_2", []) or []))
131
+
132
+ matched_triggers: List[Dict[str, Any]] = []
133
+ activated_rules: List[Dict[str, Any]] = []
134
+
135
+ for tier_name, rules in tiers:
136
+ for rule in rules:
137
+ rule_id = rule.get("id")
138
+ rule_triggers = rule.get("triggers", []) or []
139
+ rule_hit = False
140
+ for trig in rule_triggers:
141
+ if trigger_matches(trig, prompt, open_files, command):
142
+ matched_triggers.append({"tier": tier_name, "rule": rule_id, "trigger": trig})
143
+ rule_hit = True
144
+ if rule_hit:
145
+ activated_rules.append({"tier": tier_name, "rule": rule_id})
146
+
147
+ # Kernel rules are always active.
148
+ for kid in router.get("kernel", []) or []:
149
+ activated_rules.append({"tier": "kernel", "rule": kid})
150
+
151
+ return {
152
+ "matched_triggers": matched_triggers,
153
+ "activated_rules": activated_rules,
154
+ }
155
+
156
+
157
+ # ── Corpus loading ──────────────────────────────────────────────────────
158
+
159
+
160
+ def _safe_yaml_load(path: Path) -> Optional[Dict[str, Any]]:
161
+ if yaml is None or not path.exists():
162
+ return None
163
+ try:
164
+ return yaml.safe_load(path.read_text()) or {}
165
+ except yaml.YAMLError:
166
+ return None
167
+
168
+
169
+ def load_corpus_prompts(
170
+ corpus_path: Path, sample_cap: int
171
+ ) -> List[Dict[str, Any]]:
172
+ """Return per-prompt entries capped at sample_cap, sorted by id.
173
+
174
+ Each entry: `{id, text, intended_triggers, open_files, command}`.
175
+ All context fields beyond id/text are optional; missing → defaults.
176
+ """
177
+ data = _safe_yaml_load(corpus_path)
178
+ if not data:
179
+ return []
180
+ out: List[Dict[str, Any]] = []
181
+ # Track B uses `tasks:`, dev uses `prompts:`.
182
+ for key in ("tasks", "prompts"):
183
+ for entry in data.get(key, []) or []:
184
+ pid = str(entry.get("id", ""))
185
+ text = entry.get("prompt") or entry.get("text") or ""
186
+ intended = entry.get("intended_triggers") or []
187
+ opaque = entry.get("replay_opaque_triggers") or []
188
+ open_files = entry.get("open_files") or []
189
+ command = entry.get("command") or None
190
+ if not isinstance(intended, list):
191
+ intended = []
192
+ if not isinstance(opaque, list):
193
+ opaque = []
194
+ if not isinstance(open_files, list):
195
+ open_files = []
196
+ if pid and text:
197
+ out.append(
198
+ {
199
+ "id": pid,
200
+ "text": str(text),
201
+ "intended_triggers": [str(t) for t in intended],
202
+ "replay_opaque_triggers": [str(t) for t in opaque],
203
+ "open_files": [str(p) for p in open_files],
204
+ "command": str(command) if command else None,
205
+ }
206
+ )
207
+ out.sort(key=lambda x: x["id"])
208
+ return out[:sample_cap]
209
+
210
+
211
+ # ── Aggregation ─────────────────────────────────────────────────────────
212
+
213
+
214
+ def aggregate_replay(
215
+ router: Dict[str, Any],
216
+ corpora: List[Tuple[str, Path]],
217
+ sample_cap: int,
218
+ profile: str,
219
+ ) -> Dict[str, Any]:
220
+ """Replay every corpus through the router; aggregate hits."""
221
+ per_trigger_hits: Dict[str, int] = {}
222
+ per_rule_activations: Dict[str, Dict[str, int]] = {}
223
+ panel_b_seen_tier1: set = set()
224
+ panel_b_seen_tier2: set = set()
225
+ per_corpus_summary: List[Dict[str, Any]] = []
226
+ intended_vs_observed: List[Dict[str, Any]] = []
227
+ unintended_histogram: Dict[str, int] = {}
228
+
229
+ for corpus_name, corpus_path in corpora:
230
+ prompts = load_corpus_prompts(corpus_path, sample_cap)
231
+ corpus_rule_hits: Dict[str, int] = {}
232
+ for entry in prompts:
233
+ pid = entry["id"]
234
+ text = entry["text"]
235
+ intended = entry["intended_triggers"]
236
+ opaque = entry["replay_opaque_triggers"]
237
+ result = match_prompt(
238
+ router,
239
+ text,
240
+ profile=profile,
241
+ open_files=entry["open_files"] or None,
242
+ command=entry["command"],
243
+ )
244
+ for hit in result["matched_triggers"]:
245
+ key = f"{hit['rule']}::{json.dumps(hit['trigger'], sort_keys=True)}"
246
+ per_trigger_hits[key] = per_trigger_hits.get(key, 0) + 1
247
+ seen_in_prompt: set = set()
248
+ for act in result["activated_rules"]:
249
+ rid = act["rule"]
250
+ if rid is None or act["tier"] == "kernel":
251
+ # Skip kernel — always-on by definition, no signal.
252
+ continue
253
+ seen_in_prompt.add((act["tier"], rid))
254
+ activated_ids = {rid for _t, rid in seen_in_prompt}
255
+ for tier, rid in seen_in_prompt:
256
+ per_rule_activations.setdefault(tier, {})
257
+ per_rule_activations[tier][rid] = (
258
+ per_rule_activations[tier].get(rid, 0) + 1
259
+ )
260
+ corpus_rule_hits[rid] = corpus_rule_hits.get(rid, 0) + 1
261
+ if corpus_name == "ab-trackb":
262
+ if tier == "tier_1":
263
+ panel_b_seen_tier1.add(rid)
264
+ elif tier == "tier_2":
265
+ panel_b_seen_tier2.add(rid)
266
+ # Council R3 honesty floor: surface intended vs observed.
267
+ # `replay_opaque` rules fire at runtime only via `intent`
268
+ # triggers (or router gaps) the deterministic replay cannot
269
+ # see — they are NOT counted as `missed_intended` (that would
270
+ # be false drift) and NOT counted as `unintended_activations`.
271
+ if intended or opaque:
272
+ intended_set = set(intended)
273
+ opaque_set = set(opaque)
274
+ hit = sorted(intended_set & activated_ids)
275
+ miss = sorted(intended_set - activated_ids)
276
+ unintended = sorted(activated_ids - intended_set - opaque_set)
277
+ intended_vs_observed.append(
278
+ {
279
+ "corpus": corpus_name,
280
+ "task": pid,
281
+ "intended": sorted(intended),
282
+ "replay_opaque": sorted(opaque),
283
+ "hit": hit,
284
+ "missed_intended": miss,
285
+ "unintended_activations": unintended,
286
+ }
287
+ )
288
+ # Council R3 #3: inter-rule conflict histogram.
289
+ for rid in unintended:
290
+ unintended_histogram[rid] = unintended_histogram.get(rid, 0) + 1
291
+ per_corpus_summary.append(
292
+ {
293
+ "corpus": corpus_name,
294
+ "prompts_replayed": len(prompts),
295
+ "unique_rules_activated": len(corpus_rule_hits),
296
+ "top_rules": sorted(
297
+ corpus_rule_hits.items(), key=lambda x: -x[1]
298
+ )[:10],
299
+ }
300
+ )
301
+
302
+ panel_b_untouchable = sorted(panel_b_seen_tier1)
303
+ return {
304
+ "per_trigger_hits": per_trigger_hits,
305
+ "per_rule_activations": per_rule_activations,
306
+ "panel_b_untouchable_rules": panel_b_untouchable,
307
+ "panel_b_tier2_drivers": sorted(panel_b_seen_tier2),
308
+ "per_corpus_summary": per_corpus_summary,
309
+ "intended_vs_observed_match": intended_vs_observed,
310
+ "unintended_activation_histogram": sorted(
311
+ unintended_histogram.items(), key=lambda x: -x[1]
312
+ ),
313
+ }
314
+
315
+
316
+ # ── Reports ─────────────────────────────────────────────────────────────
317
+
318
+
319
+ def write_report(
320
+ aggregate: Dict[str, Any],
321
+ out_dir: Path,
322
+ corpora_paths: List[Path],
323
+ sample_cap: int,
324
+ profile: str,
325
+ ) -> Path:
326
+ out_dir.mkdir(parents=True, exist_ok=True)
327
+ stamp = _utc_iso().replace(":", "-")
328
+ out_path = out_dir / f"{stamp}.json"
329
+ latest = out_dir / "latest.json"
330
+ payload = {
331
+ "schema_version": 1,
332
+ "schema_id": "router-telemetry-v1",
333
+ "generated_at": _utc_iso(),
334
+ "config": {
335
+ "router": "dist/router.json",
336
+ "profile": profile,
337
+ "sample_cap_per_corpus": sample_cap,
338
+ "corpora": [str(p.relative_to(REPO_ROOT)) for p in corpora_paths],
339
+ },
340
+ **aggregate,
341
+ }
342
+ text = json.dumps(payload, indent=2, ensure_ascii=False) + "\n"
343
+ out_path.write_text(text)
344
+ latest.write_text(text)
345
+ return out_path
346
+
347
+
348
+ def find_never_matched_tier1(router: Dict[str, Any], activations: Dict[str, Any]) -> List[str]:
349
+ """Tier-1 rules with zero activations across all corpora — dead-rule candidates."""
350
+ tier_1_activations = activations.get("tier_1", {}) or {}
351
+ all_tier_1_ids = [r.get("id") for r in router.get("tier_1", []) if r.get("id")]
352
+ return sorted([rid for rid in all_tier_1_ids if rid not in tier_1_activations])
353
+
354
+
355
+ # ── Entry point ─────────────────────────────────────────────────────────
356
+
357
+
358
+ def run(
359
+ corpora: List[Tuple[str, Path]],
360
+ out_dir: Path = DEFAULT_OUT_DIR,
361
+ sample_cap: int = DEFAULT_SAMPLE_CAP,
362
+ profile: str = "full",
363
+ quiet: bool = False,
364
+ ) -> int:
365
+ if not ROUTER_JSON.exists():
366
+ _log(f"router not found: {ROUTER_JSON}", quiet, err=True)
367
+ return 1
368
+ try:
369
+ router = json.loads(ROUTER_JSON.read_text())
370
+ except json.JSONDecodeError as exc:
371
+ _log(f"failed to parse router: {exc}", quiet, err=True)
372
+ return 1
373
+
374
+ _log(
375
+ f"router_telemetry: replaying {len(corpora)} corpora · "
376
+ f"cap={sample_cap} prompts/corpus · profile={profile}",
377
+ quiet,
378
+ )
379
+ agg = aggregate_replay(router, corpora, sample_cap, profile)
380
+ never_matched = find_never_matched_tier1(router, agg["per_rule_activations"])
381
+ agg["never_matched_tier1"] = never_matched
382
+
383
+ out_path = write_report(
384
+ agg, out_dir, [p for _name, p in corpora], sample_cap, profile
385
+ )
386
+ relpath = out_path.relative_to(REPO_ROOT)
387
+ _log(
388
+ f"router_telemetry: wrote {relpath} · "
389
+ f"panel_b_untouchable={len(agg['panel_b_untouchable_rules'])} · "
390
+ f"never_matched_tier1={len(never_matched)}",
391
+ quiet=False,
392
+ )
393
+ return 0
394
+
395
+
396
+ def parse_args(argv: List[str]) -> argparse.Namespace:
397
+ p = argparse.ArgumentParser(description=__doc__.splitlines()[0])
398
+ p.add_argument(
399
+ "--corpus",
400
+ action="append",
401
+ default=[],
402
+ metavar="NAME:PATH",
403
+ help="Repeatable. NAME tags the corpus in the report; PATH is the YAML.",
404
+ )
405
+ p.add_argument(
406
+ "--sample-cap",
407
+ type=int,
408
+ default=DEFAULT_SAMPLE_CAP,
409
+ help="Max prompts per corpus (default %(default)s).",
410
+ )
411
+ p.add_argument(
412
+ "--profile",
413
+ choices=["balanced", "full"],
414
+ default="full",
415
+ help="Routing profile (default 'full' — includes tier-2 rules).",
416
+ )
417
+ p.add_argument(
418
+ "--out",
419
+ type=Path,
420
+ default=DEFAULT_OUT_DIR,
421
+ help="Output directory (default %(default)s).",
422
+ )
423
+ p.add_argument("--quiet", action="store_true")
424
+ return p.parse_args(argv)
425
+
426
+
427
+ def _default_corpora() -> List[Tuple[str, Path]]:
428
+ """The default manifest: original 3 corpora + every router-coverage file.
429
+
430
+ Phase 3 of road-to-corpus-expansion-evidence-based-cuts: adding a new
431
+ corpus file under `internal/bench/corpora/router-coverage/` no longer
432
+ requires editing this script — the manifest auto-discovers them.
433
+ """
434
+ corpora: List[Tuple[str, Path]] = [
435
+ ("ab-trackb", REPO_ROOT / TRACK_B_CORPUS_REL),
436
+ ("dev", REPO_ROOT / "tests/eval/corpus-dev.yaml"),
437
+ ("non-dev", REPO_ROOT / "tests/eval/corpus-non-dev.yaml"),
438
+ ]
439
+ coverage_dir = REPO_ROOT / "internal" / "bench" / "corpora" / "router-coverage"
440
+ if coverage_dir.is_dir():
441
+ for p in sorted(coverage_dir.glob("*.yaml")):
442
+ # Tag name: "router-coverage:<stem>" so the report distinguishes
443
+ # them from the original 3 corpora at a glance.
444
+ corpora.append((f"router-coverage:{p.stem}", p))
445
+ return corpora
446
+
447
+
448
+ def main(argv: List[str] | None = None) -> int:
449
+ args = parse_args(argv if argv is not None else sys.argv[1:])
450
+ if not args.corpus:
451
+ corpora = _default_corpora()
452
+ else:
453
+ corpora = []
454
+ for spec in args.corpus:
455
+ if ":" not in spec:
456
+ print(f"--corpus expects NAME:PATH, got {spec!r}", file=sys.stderr)
457
+ return 1
458
+ name, path = spec.split(":", 1)
459
+ corpora.append((name.strip(), Path(path.strip())))
460
+ return run(
461
+ corpora,
462
+ out_dir=args.out,
463
+ sample_cap=args.sample_cap,
464
+ profile=args.profile,
465
+ quiet=args.quiet,
466
+ )
467
+
468
+
469
+ if __name__ == "__main__":
470
+ raise SystemExit(main())