@event4u/agent-config 4.9.0 → 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/implement-ticket.md +5 -4
- package/.agent-src/contexts/execution/roadmap-process-loop.md +30 -4
- package/.agent-src/rules/language-and-tone.md +4 -10
- package/.agent-src/rules/linked-projects-onboarding-gate.md +82 -0
- package/.agent-src/rules/roadmap-progress-sync.md +39 -5
- package/.agent-src/scripts/update_roadmap_progress.py +63 -7
- package/.agent-src/skills/command-routing/SKILL.md +5 -4
- package/.agent-src/skills/roadmap-management/SKILL.md +121 -21
- package/.agent-src/skills/roadmap-writing/SKILL.md +63 -0
- package/.agent-src/templates/agent-settings.md +16 -0
- package/.agent-src/templates/roadmaps.md +22 -1
- package/.agent-src/templates/scripts/work_engine/_lib/agent_settings.py +20 -3
- package/.claude-plugin/marketplace.json +1 -1
- package/CHANGELOG.md +106 -0
- package/CONTRIBUTING.md +19 -0
- package/README.md +12 -1
- package/dist/cli/registry.js +0 -2
- package/dist/cli/registry.js.map +1 -1
- package/dist/discovery/deprecation-report.md +1 -1
- package/dist/discovery/discovery-manifest.json +36 -14
- package/dist/discovery/discovery-manifest.json.sha256 +1 -1
- package/dist/discovery/discovery-manifest.summary.md +3 -3
- package/dist/discovery/orphan-report.md +1 -1
- package/dist/discovery/packs.json +6 -5
- package/dist/discovery/trust-report.md +3 -3
- package/dist/discovery/workspaces.json +5 -4
- package/dist/mcp/registry-manifest.json +3 -3
- package/dist/router.json +1 -1671
- package/docs/architecture.md +1 -1
- package/docs/benchmark.md +20 -8
- package/docs/benchmarks.md +11 -0
- package/docs/catalog.md +3 -2
- package/docs/contracts/benchmark-corpus-spec.md +31 -3
- package/docs/contracts/command-surface-tiers.md +1 -1
- package/docs/contracts/hook-architecture-v1.md +33 -0
- package/docs/contracts/migrate-command.md +197 -0
- package/docs/contracts/settings-api.md +2 -1
- package/docs/contracts/value-dashboard-spec.md +374 -0
- package/docs/contracts/value-report-schema.md +150 -0
- package/docs/decisions/ADR-031-validation-severity-tiers-and-projection-roundtrip.md +97 -0
- package/docs/decisions/ADR-032-linked-projects-scope.md +118 -0
- package/docs/decisions/INDEX.md +2 -0
- package/docs/getting-started.md +1 -1
- package/docs/guidelines/agent-infra/installed-tools-manifest.md +6 -3
- package/docs/guidelines/agent-infra/language-and-tone-examples.md +35 -0
- package/docs/guides/cross-repo-linked-projects.md +86 -0
- package/docs/migration/v1-to-v2.md +40 -27
- package/docs/value.md +84 -0
- package/package.json +8 -8
- package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
- package/scripts/_cli/cmd_migrate.py +264 -102
- package/scripts/_cli/cmd_settings_migrate.py +2 -1
- package/scripts/_dispatch.bash +147 -49
- package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
- package/scripts/_lib/agent_settings.py +20 -3
- package/scripts/_lib/install_regenerator.py +129 -0
- package/scripts/_lib/linked_projects.py +238 -0
- package/scripts/_lib/value_ladder.py +599 -0
- package/scripts/_lib/value_report.py +441 -0
- package/scripts/bench_rtk_savings.py +320 -0
- package/scripts/check_no_local_settings_committed.py +51 -0
- package/scripts/compile_router.py +19 -5
- package/scripts/expected_perms.json +1 -1
- package/scripts/first_run_gate_hook.py +178 -0
- package/scripts/hook_manifest.yaml +16 -7
- package/scripts/hooks/dispatch_hook.py +27 -0
- package/scripts/hooks/dispatch_issues.py +136 -0
- package/scripts/hooks_doctor.py +40 -1
- package/scripts/install.py +25 -21
- package/scripts/lint_agents_layout.py +5 -4
- package/scripts/lint_bench_corpus.py +86 -4
- package/scripts/lint_global_paths.py +4 -3
- package/scripts/lint_marketplace_install_completeness.py +188 -0
- package/scripts/lint_value_dashboard.py +218 -0
- package/scripts/render_benchmark_md.py +6 -2
- package/scripts/render_value_md.py +355 -0
- package/scripts/repro/repro_marketplace_install_gap.sh +161 -0
- package/scripts/roadmap_progress_hook.py +23 -0
- package/scripts/router_telemetry.py +470 -0
- package/scripts/validate_frontmatter.py +23 -9
- package/scripts/_cli/cmd_migrate_to_global.py +0 -415
|
@@ -0,0 +1,470 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Local replay of the router against a corpus — pure, no API spend.
|
|
3
|
+
|
|
4
|
+
Phase 3 of `agents/roadmaps/road-to-value-dashboard-netto-cuts.md`.
|
|
5
|
+
|
|
6
|
+
For each prompt in a corpus, applies the same trigger-match logic
|
|
7
|
+
agent hosts would apply at runtime against `dist/router.json`:
|
|
8
|
+
|
|
9
|
+
- kernel rules: always active (no triggers, always-on by definition).
|
|
10
|
+
- tier_1 + tier_2 rules: active iff any trigger matches the prompt
|
|
11
|
+
text (plus optional open-files / command context).
|
|
12
|
+
|
|
13
|
+
Trigger semantics implemented:
|
|
14
|
+
|
|
15
|
+
| Type | Match rule |
|
|
16
|
+
|----------------|------------------------------------------------------------------|
|
|
17
|
+
| `keyword` | case-insensitive substring inside the prompt text |
|
|
18
|
+
| `phrase` | case-insensitive substring (multi-word) inside the prompt text |
|
|
19
|
+
| `command` | case-sensitive prefix on `command:` field (optional context) |
|
|
20
|
+
| `intent` | informational only — never auto-matches; counted separately |
|
|
21
|
+
| `path_prefix` | prefix match against any path in `open_files` (optional context) |
|
|
22
|
+
| `file_pattern` | fnmatch against any path in `open_files` (optional context) |
|
|
23
|
+
|
|
24
|
+
Rules a task expects to fire only via `intent` (or a router coverage
|
|
25
|
+
gap) the static replay cannot see are declared in the corpus field
|
|
26
|
+
`replay_opaque_triggers`. They surface in `intended_vs_observed_match`
|
|
27
|
+
under `replay_opaque` and are excluded from both `missed_intended`
|
|
28
|
+
(no false drift) and `unintended_activations`.
|
|
29
|
+
|
|
30
|
+
Reports go to `internal/bench/reports/router-telemetry/<UTC>.json`
|
|
31
|
+
with three blocks:
|
|
32
|
+
|
|
33
|
+
- `per_trigger_hits` — count of times each trigger fired
|
|
34
|
+
- `per_rule_activations` — count of times each rule activated
|
|
35
|
+
- `panel_b_untouchable_rules` — tier-1 rules that activated on ≥ 1
|
|
36
|
+
Track B task; hard floor for Phase 5
|
|
37
|
+
|
|
38
|
+
Sample size is capped per corpus (`--sample-cap`, default 200).
|
|
39
|
+
Larger corpora are replayed deterministically over the first N
|
|
40
|
+
sorted-by-id prompts.
|
|
41
|
+
|
|
42
|
+
Honours `--quiet` per the script-output convention.
|
|
43
|
+
"""
|
|
44
|
+
from __future__ import annotations
|
|
45
|
+
|
|
46
|
+
import argparse
|
|
47
|
+
import fnmatch
|
|
48
|
+
import json
|
|
49
|
+
import sys
|
|
50
|
+
from datetime import datetime, timezone
|
|
51
|
+
from pathlib import Path
|
|
52
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
import yaml
|
|
56
|
+
except ImportError:
|
|
57
|
+
yaml = None # type: ignore[assignment]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
61
|
+
ROUTER_JSON = REPO_ROOT / "dist" / "router.json"
|
|
62
|
+
DEFAULT_OUT_DIR = REPO_ROOT / "internal" / "bench" / "reports" / "router-telemetry"
|
|
63
|
+
DEFAULT_SAMPLE_CAP = 200
|
|
64
|
+
|
|
65
|
+
# Track B corpus = the Panel B evidence basis; rules that fire on its
|
|
66
|
+
# tasks are the attribution map and become the untouchable set.
|
|
67
|
+
TRACK_B_CORPUS_REL = "internal/bench/corpora/ab-trackb.yaml"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _log(msg: str, quiet: bool, *, err: bool = False) -> None:
|
|
71
|
+
if err:
|
|
72
|
+
print(msg, file=sys.stderr)
|
|
73
|
+
elif not quiet:
|
|
74
|
+
print(msg)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _utc_iso() -> str:
|
|
78
|
+
return datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# ── Trigger matching ────────────────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def trigger_matches(
|
|
85
|
+
trigger: Dict[str, Any],
|
|
86
|
+
prompt: str,
|
|
87
|
+
open_files: Optional[Iterable[str]] = None,
|
|
88
|
+
command: Optional[str] = None,
|
|
89
|
+
) -> bool:
|
|
90
|
+
"""Apply one trigger to a prompt + context; return True on match."""
|
|
91
|
+
prompt_lower = prompt.lower()
|
|
92
|
+
if "keyword" in trigger:
|
|
93
|
+
return str(trigger["keyword"]).lower() in prompt_lower
|
|
94
|
+
if "phrase" in trigger:
|
|
95
|
+
return str(trigger["phrase"]).lower() in prompt_lower
|
|
96
|
+
if "command" in trigger:
|
|
97
|
+
if not command:
|
|
98
|
+
return False
|
|
99
|
+
return command.startswith(str(trigger["command"]))
|
|
100
|
+
if "path_prefix" in trigger:
|
|
101
|
+
if not open_files:
|
|
102
|
+
return False
|
|
103
|
+
pref = str(trigger["path_prefix"])
|
|
104
|
+
return any(str(p).startswith(pref) for p in open_files)
|
|
105
|
+
if "file_pattern" in trigger:
|
|
106
|
+
if not open_files:
|
|
107
|
+
return False
|
|
108
|
+
pat = str(trigger["file_pattern"])
|
|
109
|
+
return any(fnmatch.fnmatch(str(p), pat) for p in open_files)
|
|
110
|
+
if "intent" in trigger:
|
|
111
|
+
# Intent triggers are informational and never auto-match.
|
|
112
|
+
return False
|
|
113
|
+
return False
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def match_prompt(
|
|
117
|
+
router: Dict[str, Any],
|
|
118
|
+
prompt: str,
|
|
119
|
+
profile: str = "full",
|
|
120
|
+
open_files: Optional[Iterable[str]] = None,
|
|
121
|
+
command: Optional[str] = None,
|
|
122
|
+
) -> Dict[str, Any]:
|
|
123
|
+
"""Return the matched-triggers + activated-rules for one prompt.
|
|
124
|
+
|
|
125
|
+
Kernel rules are always active. tier_1 always considered. tier_2
|
|
126
|
+
only considered when `profile == 'full'`.
|
|
127
|
+
"""
|
|
128
|
+
tiers = [("tier_1", router.get("tier_1", []) or [])]
|
|
129
|
+
if profile == "full":
|
|
130
|
+
tiers.append(("tier_2", router.get("tier_2", []) or []))
|
|
131
|
+
|
|
132
|
+
matched_triggers: List[Dict[str, Any]] = []
|
|
133
|
+
activated_rules: List[Dict[str, Any]] = []
|
|
134
|
+
|
|
135
|
+
for tier_name, rules in tiers:
|
|
136
|
+
for rule in rules:
|
|
137
|
+
rule_id = rule.get("id")
|
|
138
|
+
rule_triggers = rule.get("triggers", []) or []
|
|
139
|
+
rule_hit = False
|
|
140
|
+
for trig in rule_triggers:
|
|
141
|
+
if trigger_matches(trig, prompt, open_files, command):
|
|
142
|
+
matched_triggers.append({"tier": tier_name, "rule": rule_id, "trigger": trig})
|
|
143
|
+
rule_hit = True
|
|
144
|
+
if rule_hit:
|
|
145
|
+
activated_rules.append({"tier": tier_name, "rule": rule_id})
|
|
146
|
+
|
|
147
|
+
# Kernel rules are always active.
|
|
148
|
+
for kid in router.get("kernel", []) or []:
|
|
149
|
+
activated_rules.append({"tier": "kernel", "rule": kid})
|
|
150
|
+
|
|
151
|
+
return {
|
|
152
|
+
"matched_triggers": matched_triggers,
|
|
153
|
+
"activated_rules": activated_rules,
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# ── Corpus loading ──────────────────────────────────────────────────────
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _safe_yaml_load(path: Path) -> Optional[Dict[str, Any]]:
|
|
161
|
+
if yaml is None or not path.exists():
|
|
162
|
+
return None
|
|
163
|
+
try:
|
|
164
|
+
return yaml.safe_load(path.read_text()) or {}
|
|
165
|
+
except yaml.YAMLError:
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def load_corpus_prompts(
|
|
170
|
+
corpus_path: Path, sample_cap: int
|
|
171
|
+
) -> List[Dict[str, Any]]:
|
|
172
|
+
"""Return per-prompt entries capped at sample_cap, sorted by id.
|
|
173
|
+
|
|
174
|
+
Each entry: `{id, text, intended_triggers, open_files, command}`.
|
|
175
|
+
All context fields beyond id/text are optional; missing → defaults.
|
|
176
|
+
"""
|
|
177
|
+
data = _safe_yaml_load(corpus_path)
|
|
178
|
+
if not data:
|
|
179
|
+
return []
|
|
180
|
+
out: List[Dict[str, Any]] = []
|
|
181
|
+
# Track B uses `tasks:`, dev uses `prompts:`.
|
|
182
|
+
for key in ("tasks", "prompts"):
|
|
183
|
+
for entry in data.get(key, []) or []:
|
|
184
|
+
pid = str(entry.get("id", ""))
|
|
185
|
+
text = entry.get("prompt") or entry.get("text") or ""
|
|
186
|
+
intended = entry.get("intended_triggers") or []
|
|
187
|
+
opaque = entry.get("replay_opaque_triggers") or []
|
|
188
|
+
open_files = entry.get("open_files") or []
|
|
189
|
+
command = entry.get("command") or None
|
|
190
|
+
if not isinstance(intended, list):
|
|
191
|
+
intended = []
|
|
192
|
+
if not isinstance(opaque, list):
|
|
193
|
+
opaque = []
|
|
194
|
+
if not isinstance(open_files, list):
|
|
195
|
+
open_files = []
|
|
196
|
+
if pid and text:
|
|
197
|
+
out.append(
|
|
198
|
+
{
|
|
199
|
+
"id": pid,
|
|
200
|
+
"text": str(text),
|
|
201
|
+
"intended_triggers": [str(t) for t in intended],
|
|
202
|
+
"replay_opaque_triggers": [str(t) for t in opaque],
|
|
203
|
+
"open_files": [str(p) for p in open_files],
|
|
204
|
+
"command": str(command) if command else None,
|
|
205
|
+
}
|
|
206
|
+
)
|
|
207
|
+
out.sort(key=lambda x: x["id"])
|
|
208
|
+
return out[:sample_cap]
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# ── Aggregation ─────────────────────────────────────────────────────────
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def aggregate_replay(
|
|
215
|
+
router: Dict[str, Any],
|
|
216
|
+
corpora: List[Tuple[str, Path]],
|
|
217
|
+
sample_cap: int,
|
|
218
|
+
profile: str,
|
|
219
|
+
) -> Dict[str, Any]:
|
|
220
|
+
"""Replay every corpus through the router; aggregate hits."""
|
|
221
|
+
per_trigger_hits: Dict[str, int] = {}
|
|
222
|
+
per_rule_activations: Dict[str, Dict[str, int]] = {}
|
|
223
|
+
panel_b_seen_tier1: set = set()
|
|
224
|
+
panel_b_seen_tier2: set = set()
|
|
225
|
+
per_corpus_summary: List[Dict[str, Any]] = []
|
|
226
|
+
intended_vs_observed: List[Dict[str, Any]] = []
|
|
227
|
+
unintended_histogram: Dict[str, int] = {}
|
|
228
|
+
|
|
229
|
+
for corpus_name, corpus_path in corpora:
|
|
230
|
+
prompts = load_corpus_prompts(corpus_path, sample_cap)
|
|
231
|
+
corpus_rule_hits: Dict[str, int] = {}
|
|
232
|
+
for entry in prompts:
|
|
233
|
+
pid = entry["id"]
|
|
234
|
+
text = entry["text"]
|
|
235
|
+
intended = entry["intended_triggers"]
|
|
236
|
+
opaque = entry["replay_opaque_triggers"]
|
|
237
|
+
result = match_prompt(
|
|
238
|
+
router,
|
|
239
|
+
text,
|
|
240
|
+
profile=profile,
|
|
241
|
+
open_files=entry["open_files"] or None,
|
|
242
|
+
command=entry["command"],
|
|
243
|
+
)
|
|
244
|
+
for hit in result["matched_triggers"]:
|
|
245
|
+
key = f"{hit['rule']}::{json.dumps(hit['trigger'], sort_keys=True)}"
|
|
246
|
+
per_trigger_hits[key] = per_trigger_hits.get(key, 0) + 1
|
|
247
|
+
seen_in_prompt: set = set()
|
|
248
|
+
for act in result["activated_rules"]:
|
|
249
|
+
rid = act["rule"]
|
|
250
|
+
if rid is None or act["tier"] == "kernel":
|
|
251
|
+
# Skip kernel — always-on by definition, no signal.
|
|
252
|
+
continue
|
|
253
|
+
seen_in_prompt.add((act["tier"], rid))
|
|
254
|
+
activated_ids = {rid for _t, rid in seen_in_prompt}
|
|
255
|
+
for tier, rid in seen_in_prompt:
|
|
256
|
+
per_rule_activations.setdefault(tier, {})
|
|
257
|
+
per_rule_activations[tier][rid] = (
|
|
258
|
+
per_rule_activations[tier].get(rid, 0) + 1
|
|
259
|
+
)
|
|
260
|
+
corpus_rule_hits[rid] = corpus_rule_hits.get(rid, 0) + 1
|
|
261
|
+
if corpus_name == "ab-trackb":
|
|
262
|
+
if tier == "tier_1":
|
|
263
|
+
panel_b_seen_tier1.add(rid)
|
|
264
|
+
elif tier == "tier_2":
|
|
265
|
+
panel_b_seen_tier2.add(rid)
|
|
266
|
+
# Council R3 honesty floor: surface intended vs observed.
|
|
267
|
+
# `replay_opaque` rules fire at runtime only via `intent`
|
|
268
|
+
# triggers (or router gaps) the deterministic replay cannot
|
|
269
|
+
# see — they are NOT counted as `missed_intended` (that would
|
|
270
|
+
# be false drift) and NOT counted as `unintended_activations`.
|
|
271
|
+
if intended or opaque:
|
|
272
|
+
intended_set = set(intended)
|
|
273
|
+
opaque_set = set(opaque)
|
|
274
|
+
hit = sorted(intended_set & activated_ids)
|
|
275
|
+
miss = sorted(intended_set - activated_ids)
|
|
276
|
+
unintended = sorted(activated_ids - intended_set - opaque_set)
|
|
277
|
+
intended_vs_observed.append(
|
|
278
|
+
{
|
|
279
|
+
"corpus": corpus_name,
|
|
280
|
+
"task": pid,
|
|
281
|
+
"intended": sorted(intended),
|
|
282
|
+
"replay_opaque": sorted(opaque),
|
|
283
|
+
"hit": hit,
|
|
284
|
+
"missed_intended": miss,
|
|
285
|
+
"unintended_activations": unintended,
|
|
286
|
+
}
|
|
287
|
+
)
|
|
288
|
+
# Council R3 #3: inter-rule conflict histogram.
|
|
289
|
+
for rid in unintended:
|
|
290
|
+
unintended_histogram[rid] = unintended_histogram.get(rid, 0) + 1
|
|
291
|
+
per_corpus_summary.append(
|
|
292
|
+
{
|
|
293
|
+
"corpus": corpus_name,
|
|
294
|
+
"prompts_replayed": len(prompts),
|
|
295
|
+
"unique_rules_activated": len(corpus_rule_hits),
|
|
296
|
+
"top_rules": sorted(
|
|
297
|
+
corpus_rule_hits.items(), key=lambda x: -x[1]
|
|
298
|
+
)[:10],
|
|
299
|
+
}
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
panel_b_untouchable = sorted(panel_b_seen_tier1)
|
|
303
|
+
return {
|
|
304
|
+
"per_trigger_hits": per_trigger_hits,
|
|
305
|
+
"per_rule_activations": per_rule_activations,
|
|
306
|
+
"panel_b_untouchable_rules": panel_b_untouchable,
|
|
307
|
+
"panel_b_tier2_drivers": sorted(panel_b_seen_tier2),
|
|
308
|
+
"per_corpus_summary": per_corpus_summary,
|
|
309
|
+
"intended_vs_observed_match": intended_vs_observed,
|
|
310
|
+
"unintended_activation_histogram": sorted(
|
|
311
|
+
unintended_histogram.items(), key=lambda x: -x[1]
|
|
312
|
+
),
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
# ── Reports ─────────────────────────────────────────────────────────────
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def write_report(
|
|
320
|
+
aggregate: Dict[str, Any],
|
|
321
|
+
out_dir: Path,
|
|
322
|
+
corpora_paths: List[Path],
|
|
323
|
+
sample_cap: int,
|
|
324
|
+
profile: str,
|
|
325
|
+
) -> Path:
|
|
326
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
327
|
+
stamp = _utc_iso().replace(":", "-")
|
|
328
|
+
out_path = out_dir / f"{stamp}.json"
|
|
329
|
+
latest = out_dir / "latest.json"
|
|
330
|
+
payload = {
|
|
331
|
+
"schema_version": 1,
|
|
332
|
+
"schema_id": "router-telemetry-v1",
|
|
333
|
+
"generated_at": _utc_iso(),
|
|
334
|
+
"config": {
|
|
335
|
+
"router": "dist/router.json",
|
|
336
|
+
"profile": profile,
|
|
337
|
+
"sample_cap_per_corpus": sample_cap,
|
|
338
|
+
"corpora": [str(p.relative_to(REPO_ROOT)) for p in corpora_paths],
|
|
339
|
+
},
|
|
340
|
+
**aggregate,
|
|
341
|
+
}
|
|
342
|
+
text = json.dumps(payload, indent=2, ensure_ascii=False) + "\n"
|
|
343
|
+
out_path.write_text(text)
|
|
344
|
+
latest.write_text(text)
|
|
345
|
+
return out_path
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def find_never_matched_tier1(router: Dict[str, Any], activations: Dict[str, Any]) -> List[str]:
|
|
349
|
+
"""Tier-1 rules with zero activations across all corpora — dead-rule candidates."""
|
|
350
|
+
tier_1_activations = activations.get("tier_1", {}) or {}
|
|
351
|
+
all_tier_1_ids = [r.get("id") for r in router.get("tier_1", []) if r.get("id")]
|
|
352
|
+
return sorted([rid for rid in all_tier_1_ids if rid not in tier_1_activations])
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
# ── Entry point ─────────────────────────────────────────────────────────
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def run(
|
|
359
|
+
corpora: List[Tuple[str, Path]],
|
|
360
|
+
out_dir: Path = DEFAULT_OUT_DIR,
|
|
361
|
+
sample_cap: int = DEFAULT_SAMPLE_CAP,
|
|
362
|
+
profile: str = "full",
|
|
363
|
+
quiet: bool = False,
|
|
364
|
+
) -> int:
|
|
365
|
+
if not ROUTER_JSON.exists():
|
|
366
|
+
_log(f"router not found: {ROUTER_JSON}", quiet, err=True)
|
|
367
|
+
return 1
|
|
368
|
+
try:
|
|
369
|
+
router = json.loads(ROUTER_JSON.read_text())
|
|
370
|
+
except json.JSONDecodeError as exc:
|
|
371
|
+
_log(f"failed to parse router: {exc}", quiet, err=True)
|
|
372
|
+
return 1
|
|
373
|
+
|
|
374
|
+
_log(
|
|
375
|
+
f"router_telemetry: replaying {len(corpora)} corpora · "
|
|
376
|
+
f"cap={sample_cap} prompts/corpus · profile={profile}",
|
|
377
|
+
quiet,
|
|
378
|
+
)
|
|
379
|
+
agg = aggregate_replay(router, corpora, sample_cap, profile)
|
|
380
|
+
never_matched = find_never_matched_tier1(router, agg["per_rule_activations"])
|
|
381
|
+
agg["never_matched_tier1"] = never_matched
|
|
382
|
+
|
|
383
|
+
out_path = write_report(
|
|
384
|
+
agg, out_dir, [p for _name, p in corpora], sample_cap, profile
|
|
385
|
+
)
|
|
386
|
+
relpath = out_path.relative_to(REPO_ROOT)
|
|
387
|
+
_log(
|
|
388
|
+
f"router_telemetry: wrote {relpath} · "
|
|
389
|
+
f"panel_b_untouchable={len(agg['panel_b_untouchable_rules'])} · "
|
|
390
|
+
f"never_matched_tier1={len(never_matched)}",
|
|
391
|
+
quiet=False,
|
|
392
|
+
)
|
|
393
|
+
return 0
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def parse_args(argv: List[str]) -> argparse.Namespace:
|
|
397
|
+
p = argparse.ArgumentParser(description=__doc__.splitlines()[0])
|
|
398
|
+
p.add_argument(
|
|
399
|
+
"--corpus",
|
|
400
|
+
action="append",
|
|
401
|
+
default=[],
|
|
402
|
+
metavar="NAME:PATH",
|
|
403
|
+
help="Repeatable. NAME tags the corpus in the report; PATH is the YAML.",
|
|
404
|
+
)
|
|
405
|
+
p.add_argument(
|
|
406
|
+
"--sample-cap",
|
|
407
|
+
type=int,
|
|
408
|
+
default=DEFAULT_SAMPLE_CAP,
|
|
409
|
+
help="Max prompts per corpus (default %(default)s).",
|
|
410
|
+
)
|
|
411
|
+
p.add_argument(
|
|
412
|
+
"--profile",
|
|
413
|
+
choices=["balanced", "full"],
|
|
414
|
+
default="full",
|
|
415
|
+
help="Routing profile (default 'full' — includes tier-2 rules).",
|
|
416
|
+
)
|
|
417
|
+
p.add_argument(
|
|
418
|
+
"--out",
|
|
419
|
+
type=Path,
|
|
420
|
+
default=DEFAULT_OUT_DIR,
|
|
421
|
+
help="Output directory (default %(default)s).",
|
|
422
|
+
)
|
|
423
|
+
p.add_argument("--quiet", action="store_true")
|
|
424
|
+
return p.parse_args(argv)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def _default_corpora() -> List[Tuple[str, Path]]:
|
|
428
|
+
"""The default manifest: original 3 corpora + every router-coverage file.
|
|
429
|
+
|
|
430
|
+
Phase 3 of road-to-corpus-expansion-evidence-based-cuts: adding a new
|
|
431
|
+
corpus file under `internal/bench/corpora/router-coverage/` no longer
|
|
432
|
+
requires editing this script — the manifest auto-discovers them.
|
|
433
|
+
"""
|
|
434
|
+
corpora: List[Tuple[str, Path]] = [
|
|
435
|
+
("ab-trackb", REPO_ROOT / TRACK_B_CORPUS_REL),
|
|
436
|
+
("dev", REPO_ROOT / "tests/eval/corpus-dev.yaml"),
|
|
437
|
+
("non-dev", REPO_ROOT / "tests/eval/corpus-non-dev.yaml"),
|
|
438
|
+
]
|
|
439
|
+
coverage_dir = REPO_ROOT / "internal" / "bench" / "corpora" / "router-coverage"
|
|
440
|
+
if coverage_dir.is_dir():
|
|
441
|
+
for p in sorted(coverage_dir.glob("*.yaml")):
|
|
442
|
+
# Tag name: "router-coverage:<stem>" so the report distinguishes
|
|
443
|
+
# them from the original 3 corpora at a glance.
|
|
444
|
+
corpora.append((f"router-coverage:{p.stem}", p))
|
|
445
|
+
return corpora
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def main(argv: List[str] | None = None) -> int:
|
|
449
|
+
args = parse_args(argv if argv is not None else sys.argv[1:])
|
|
450
|
+
if not args.corpus:
|
|
451
|
+
corpora = _default_corpora()
|
|
452
|
+
else:
|
|
453
|
+
corpora = []
|
|
454
|
+
for spec in args.corpus:
|
|
455
|
+
if ":" not in spec:
|
|
456
|
+
print(f"--corpus expects NAME:PATH, got {spec!r}", file=sys.stderr)
|
|
457
|
+
return 1
|
|
458
|
+
name, path = spec.split(":", 1)
|
|
459
|
+
corpora.append((name.strip(), Path(path.strip())))
|
|
460
|
+
return run(
|
|
461
|
+
corpora,
|
|
462
|
+
out_dir=args.out,
|
|
463
|
+
sample_cap=args.sample_cap,
|
|
464
|
+
profile=args.profile,
|
|
465
|
+
quiet=args.quiet,
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
if __name__ == "__main__":
|
|
470
|
+
raise SystemExit(main())
|
|
@@ -12,7 +12,11 @@ Supported keywords: ``type``, ``required``, ``properties``,
|
|
|
12
12
|
|
|
13
13
|
The goal is a **better error surface**: each violation comes back as a
|
|
14
14
|
``SchemaError`` with ``path`` (dotted JSON pointer), ``rule`` (the schema
|
|
15
|
-
keyword that failed),
|
|
15
|
+
keyword that failed), a human-readable message, and a ``severity``
|
|
16
|
+
(``"error"`` = fatal / fails CI, ``"warning"`` = advisory). Structural
|
|
17
|
+
keywords (``required``/``type``/``enum``/``pattern``/``additionalProperties``/
|
|
18
|
+
``minItems``) are fatal; length keywords (``minLength``/``maxLength``) are
|
|
19
|
+
advisory warnings. See ADR-031.
|
|
16
20
|
"""
|
|
17
21
|
|
|
18
22
|
from __future__ import annotations
|
|
@@ -39,12 +43,14 @@ class SchemaError:
|
|
|
39
43
|
path: str
|
|
40
44
|
rule: str
|
|
41
45
|
message: str
|
|
46
|
+
severity: str = "error" # "error" (fatal, fails CI) | "warning" (advisory)
|
|
42
47
|
|
|
43
48
|
def format(self, file: str | None = None, line: int | None = None) -> str:
|
|
44
49
|
prefix = file or "<data>"
|
|
45
50
|
if line is not None:
|
|
46
51
|
prefix = f"{prefix}:{line}"
|
|
47
|
-
|
|
52
|
+
marker = "⚠️ " if self.severity == "warning" else ""
|
|
53
|
+
return f"{prefix} – {marker}{self.rule} at {self.path} – {self.message}"
|
|
48
54
|
|
|
49
55
|
|
|
50
56
|
# --- Frontmatter parser (stdlib-only, YAML subset) -------------------------
|
|
@@ -318,12 +324,14 @@ def _validate_string(data: str, schema: dict[str, Any], path: str, errors: list[
|
|
|
318
324
|
pattern = schema.get("pattern")
|
|
319
325
|
if pattern is not None and not re.search(pattern, data):
|
|
320
326
|
errors.append(SchemaError(path, "pattern", f"Value {data!r} does not match /{pattern}/"))
|
|
327
|
+
# Length constraints are advisory (quality, not structural correctness):
|
|
328
|
+
# they surface as warnings, not fatal CI failures. See ADR-031.
|
|
321
329
|
min_len = schema.get("minLength")
|
|
322
330
|
if min_len is not None and len(data) < min_len:
|
|
323
|
-
errors.append(SchemaError(path, "minLength", f"String length {len(data)} < {min_len}"))
|
|
331
|
+
errors.append(SchemaError(path, "minLength", f"String length {len(data)} < {min_len}", severity="warning"))
|
|
324
332
|
max_len = schema.get("maxLength")
|
|
325
333
|
if max_len is not None and len(data) > max_len:
|
|
326
|
-
errors.append(SchemaError(path, "maxLength", f"String length {len(data)} > {max_len}"))
|
|
334
|
+
errors.append(SchemaError(path, "maxLength", f"String length {len(data)} > {max_len}", severity="warning"))
|
|
327
335
|
|
|
328
336
|
|
|
329
337
|
def _validate_integer(data: int, schema: dict[str, Any], path: str, errors: list[SchemaError]) -> None:
|
|
@@ -458,6 +466,7 @@ def _main() -> int:
|
|
|
458
466
|
|
|
459
467
|
total = 0
|
|
460
468
|
failing = 0
|
|
469
|
+
warned = 0
|
|
461
470
|
for root in roots:
|
|
462
471
|
for artefact_type, path in _iter_artefacts(root):
|
|
463
472
|
total += 1
|
|
@@ -468,14 +477,19 @@ def _main() -> int:
|
|
|
468
477
|
continue
|
|
469
478
|
schema = load_schema(artefact_type)
|
|
470
479
|
errors = validate(data, schema)
|
|
471
|
-
if
|
|
480
|
+
fatal = [e for e in errors if e.severity == "error"]
|
|
481
|
+
warnings = [e for e in errors if e.severity == "warning"]
|
|
482
|
+
if fatal:
|
|
472
483
|
failing += 1
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
484
|
+
if warnings:
|
|
485
|
+
warned += 1
|
|
486
|
+
for error in errors:
|
|
487
|
+
marker = "⚠️ " if error.severity == "warning" else "❌ "
|
|
488
|
+
print(f"{marker}[{artefact_type}] {path}: {error.rule} at "
|
|
489
|
+
f"{error.path} – {error.message}")
|
|
476
490
|
|
|
477
491
|
print(f"\n== Frontmatter schema: {total} artefacts, "
|
|
478
|
-
f"{failing} failing ==")
|
|
492
|
+
f"{failing} failing, {warned} with warnings ==")
|
|
479
493
|
return 1 if failing else 0
|
|
480
494
|
|
|
481
495
|
|