@event4u/agent-config 5.5.0 → 5.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/image/analyse.md +51 -0
- package/.agent-src/commands/image/create.md +53 -0
- package/.agent-src/commands/image/verify.md +48 -0
- package/.agent-src/commands/image.md +69 -0
- package/.agent-src/commands/video/from-song.md +40 -6
- package/.agent-src/contexts/authority/commit-mechanics.md +8 -0
- package/.agent-src/rules/commit-policy.md +3 -8
- package/.agent-src/rules/media-sync-ground-truth.md +58 -0
- package/.agent-src/skills/image-analyser/SKILL.md +121 -0
- package/.agent-src/skills/image-analyser/canon-spec.md +109 -0
- package/.agent-src/skills/image-analyser/evals/triggers.json +16 -0
- package/.agent-src/skills/image-creator/SKILL.md +117 -0
- package/.agent-src/skills/image-creator/evals/triggers.json +16 -0
- package/.agent-src/skills/song-to-script/SKILL.md +36 -13
- package/.claude-plugin/marketplace.json +7 -1
- package/CHANGELOG.md +56 -0
- package/README.md +2 -2
- package/config/agent-settings.template.yml +18 -0
- package/dist/discovery/deprecation-report.md +1 -1
- package/dist/discovery/discovery-manifest.json +171 -18
- package/dist/discovery/discovery-manifest.json.sha256 +1 -1
- package/dist/discovery/discovery-manifest.summary.md +4 -4
- package/dist/discovery/orphan-report.md +1 -1
- package/dist/discovery/packs.json +15 -8
- package/dist/discovery/trust-report.md +3 -3
- package/dist/discovery/workspaces.json +13 -6
- package/dist/mcp/registry-manifest.json +3 -3
- package/dist/router.json +1 -1
- package/dist/server/schemas/settings.js +4 -0
- package/dist/server/schemas/settings.js.map +1 -1
- package/docs/architecture.md +3 -3
- package/docs/catalog.md +20 -6
- package/docs/contracts/benchmark-report-schema.md +12 -10
- package/docs/contracts/command-clusters.md +1 -0
- package/docs/contracts/rule-router.md +39 -0
- package/docs/contracts/value-dashboard-spec.md +7 -3
- package/docs/contracts/value-report-schema.md +6 -1
- package/docs/getting-started.md +2 -2
- package/docs/value.md +17 -17
- package/package.json +1 -1
- package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
- package/scripts/_lib/bench_report.py +13 -14
- package/scripts/_lib/bench_telegraph_report.py +1 -2
- package/scripts/_lib/token_count.py +95 -0
- package/scripts/_lib/value_report.py +3 -3
- package/scripts/ai-video/adapters/higgsfield.sh +163 -6
- package/scripts/ai-video/adapters/openai-images.sh +92 -6
- package/scripts/audit_auto_rules.py +22 -6
- package/scripts/audit_command_surface.py +6 -1
- package/scripts/audit_initial_context.py +210 -0
- package/scripts/bench_ab_diff.py +4 -11
- package/scripts/bench_run.py +2 -3
- package/scripts/bench_runner.py +2 -2
- package/scripts/condense.py +44 -3
- package/scripts/iron_law_sha.py +14 -5
- package/scripts/measure_rule_budget.py +15 -0
- package/scripts/pack_mcp_content.py +1 -1
- package/scripts/project_thin_rules.py +168 -0
- package/scripts/render_value_md.py +14 -23
- package/scripts/schemas/command.schema.json +1 -1
- package/scripts/schemas/rule.schema.json +1 -1
- package/scripts/schemas/skill.schema.json +2 -2
- package/scripts/trigger_coverage.py +129 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Thin-projection of the rule layer (lean-initial-context build-out, Phase 3.1).
|
|
3
|
+
|
|
4
|
+
The dominant always-on cost is rule BODIES (~58k GPT tok; kernel only ~6.5k).
|
|
5
|
+
0B.6 verdict: demote every non-kernel rule body to a progressive-disclosure
|
|
6
|
+
pointer the agent resolves on trigger-match (the one mechanism 0B.5 confirmed
|
|
7
|
+
works for the primary tool — like skills). The kernel stays full-bodied.
|
|
8
|
+
|
|
9
|
+
A **thin** rule entry keeps the matching signal (frontmatter `description` +
|
|
10
|
+
`triggers`) so the router still selects it, and replaces the body with a
|
|
11
|
+
one-line pointer to the full text. The agent loads the body on match.
|
|
12
|
+
|
|
13
|
+
This module is the mechanism + a measurement harness. It writes to a target
|
|
14
|
+
dir of your choosing — it never overwrites the live `.claude/` / `.augment/`
|
|
15
|
+
projections. condense.py reads `lean_projection.mode` (default `eager-all`)
|
|
16
|
+
to decide whether the real generate-tools path calls in here; until that flag
|
|
17
|
+
is flipped + live-A/B-validated, the default projection is unchanged.
|
|
18
|
+
|
|
19
|
+
Usage:
|
|
20
|
+
python3 scripts/project_thin_rules.py --measure # measure delta, no write
|
|
21
|
+
python3 scripts/project_thin_rules.py --out <dir> # write thin rules to <dir>
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import argparse
|
|
27
|
+
import json
|
|
28
|
+
import re
|
|
29
|
+
import sys
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
|
|
32
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
33
|
+
sys.path.insert(0, str(REPO_ROOT / "scripts"))
|
|
34
|
+
from _lib import token_count # noqa: E402
|
|
35
|
+
|
|
36
|
+
RULES_SOURCE = REPO_ROOT / ".agent-src" / "rules"
|
|
37
|
+
ROUTER = REPO_ROOT / "dist" / "router.json"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def kernel_ids() -> set[str]:
|
|
41
|
+
"""The always-full-bodied set — authoritative kernel list from the router."""
|
|
42
|
+
return set(json.loads(ROUTER.read_text(encoding="utf-8")).get("kernel", []))
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def split_frontmatter(text: str) -> tuple[str, str]:
|
|
46
|
+
"""Return (frontmatter_including_fences, body). Empty fm if none."""
|
|
47
|
+
if text.startswith("---\n"):
|
|
48
|
+
end = text.find("\n---\n", 4)
|
|
49
|
+
if end != -1:
|
|
50
|
+
return text[: end + 5], text[end + 5 :]
|
|
51
|
+
return "", text
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _description(fm: str) -> str:
|
|
55
|
+
m = re.search(r'^description:\s*"?(.+?)"?\s*$', fm, re.MULTILINE)
|
|
56
|
+
return m.group(1).strip() if m else ""
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# How many trigger keywords/phrases to surface as the always-on match hint.
|
|
60
|
+
# The full trigger set lives in dist/router.json (compiled from source) — the
|
|
61
|
+
# projected entry only needs enough signal for the agent to recognise a match
|
|
62
|
+
# and load the body. The router, not this list, drives actual selection.
|
|
63
|
+
_TRIGGER_HINT_LIMIT = 6
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _trigger_hint(fm: str) -> str:
|
|
67
|
+
"""A short, comma-joined sample of the rule's trigger keywords/phrases."""
|
|
68
|
+
hits: list[str] = []
|
|
69
|
+
for m in re.finditer(r'^\s*-\s*(?:keyword|phrase|intent):\s*"?(.+?)"?\s*$', fm, re.MULTILINE):
|
|
70
|
+
hits.append(m.group(1).strip())
|
|
71
|
+
if len(hits) >= _TRIGGER_HINT_LIMIT:
|
|
72
|
+
break
|
|
73
|
+
return ", ".join(hits)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def thin_entry(rule_id: str, text: str) -> str:
|
|
77
|
+
"""Build the minimal progressive-disclosure pointer for a non-kernel rule.
|
|
78
|
+
|
|
79
|
+
The always-on layer keeps only the match signal (description + a short
|
|
80
|
+
trigger hint) and a pointer to the full body — NOT the full frontmatter.
|
|
81
|
+
The router (dist/router.json, compiled from source) holds the complete
|
|
82
|
+
`triggers:` / `routes_to:`; selection is unchanged. Dropping the inlined
|
|
83
|
+
frontmatter is where the bulk of the token saving comes from.
|
|
84
|
+
"""
|
|
85
|
+
fm, _body = split_frontmatter(text)
|
|
86
|
+
desc = _description(fm)
|
|
87
|
+
hint = _trigger_hint(fm)
|
|
88
|
+
title = rule_id.replace("-", " ").title()
|
|
89
|
+
fires = f" Fires on: {hint}." if hint else ""
|
|
90
|
+
return (
|
|
91
|
+
f"## {title}\n"
|
|
92
|
+
f"> Routed rule — load the body on trigger-match.{fires} {desc} "
|
|
93
|
+
f"Body: [`{rule_id}`](../../.agent-src.uncondensed/rules/{rule_id}.md)\n"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def build_thin(rules_dir: Path = RULES_SOURCE) -> dict[str, str]:
|
|
98
|
+
"""Map {filename: thin_or_full_text} for every rule. Kernel stays full."""
|
|
99
|
+
kernel = kernel_ids()
|
|
100
|
+
out: dict[str, str] = {}
|
|
101
|
+
for p in sorted(rules_dir.glob("*.md")):
|
|
102
|
+
text = p.read_text(encoding="utf-8")
|
|
103
|
+
out[p.name] = text if p.stem in kernel else thin_entry(p.stem, text)
|
|
104
|
+
return out
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def measure(rules_dir: Path = RULES_SOURCE) -> dict:
|
|
108
|
+
"""Eager vs thin token footprint for the rule layer."""
|
|
109
|
+
kernel = kernel_ids()
|
|
110
|
+
eager_blob = "".join(
|
|
111
|
+
p.read_text(encoding="utf-8") for p in sorted(rules_dir.glob("*.md"))
|
|
112
|
+
)
|
|
113
|
+
thin_blob = "".join(build_thin(rules_dir).values())
|
|
114
|
+
eager = token_count.measure(eager_blob)
|
|
115
|
+
thin = token_count.measure(thin_blob)
|
|
116
|
+
n = len(list(rules_dir.glob("*.md")))
|
|
117
|
+
return {
|
|
118
|
+
"rules_total": n,
|
|
119
|
+
"kernel_full": len(kernel & {p.stem for p in rules_dir.glob("*.md")}),
|
|
120
|
+
"non_kernel_thinned": n - len(kernel & {p.stem for p in rules_dir.glob("*.md")}),
|
|
121
|
+
"eager_gpt": eager["tokens_gpt"],
|
|
122
|
+
"thin_gpt": thin["tokens_gpt"],
|
|
123
|
+
"saved_gpt": eager["tokens_gpt"] - thin["tokens_gpt"],
|
|
124
|
+
"saved_pct": round(
|
|
125
|
+
100 * (eager["tokens_gpt"] - thin["tokens_gpt"]) / eager["tokens_gpt"], 1
|
|
126
|
+
)
|
|
127
|
+
if eager["tokens_gpt"]
|
|
128
|
+
else 0.0,
|
|
129
|
+
"eager_chars": eager["chars"],
|
|
130
|
+
"thin_chars": thin["chars"],
|
|
131
|
+
"token_method": token_count.method_note(),
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def write_thin(out_dir: Path, rules_dir: Path = RULES_SOURCE) -> int:
|
|
136
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
137
|
+
files = build_thin(rules_dir)
|
|
138
|
+
for name, text in files.items():
|
|
139
|
+
(out_dir / name).write_text(text, encoding="utf-8")
|
|
140
|
+
return len(files)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def main(argv: list[str] | None = None) -> int:
|
|
144
|
+
ap = argparse.ArgumentParser(description=__doc__.splitlines()[0])
|
|
145
|
+
ap.add_argument("--measure", action="store_true", help="print the eager-vs-thin token delta")
|
|
146
|
+
ap.add_argument("--out", type=Path, help="write thin rule files to this dir")
|
|
147
|
+
ap.add_argument("--json", action="store_true")
|
|
148
|
+
args = ap.parse_args(argv)
|
|
149
|
+
|
|
150
|
+
if args.out:
|
|
151
|
+
n = write_thin(args.out)
|
|
152
|
+
print(f"wrote {n} thin rule files → {args.out}")
|
|
153
|
+
return 0
|
|
154
|
+
|
|
155
|
+
m = measure()
|
|
156
|
+
if args.json:
|
|
157
|
+
print(json.dumps(m, indent=2, sort_keys=True))
|
|
158
|
+
else:
|
|
159
|
+
print(f"Rule-layer thin projection (kernel full-bodied + {m['non_kernel_thinned']} non-kernel pointers):")
|
|
160
|
+
print(f" eager: {m['eager_gpt']:>6} GPT tok ({m['eager_chars']:,} chars)")
|
|
161
|
+
print(f" thin: {m['thin_gpt']:>6} GPT tok ({m['thin_chars']:,} chars)")
|
|
162
|
+
print(f" saved: {m['saved_gpt']:>6} GPT tok ({m['saved_pct']}% of the rule layer)")
|
|
163
|
+
print(f" method: {m['token_method']}")
|
|
164
|
+
return 0
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
if __name__ == "__main__":
|
|
168
|
+
sys.exit(main())
|
|
@@ -57,10 +57,6 @@ def fmt_signed_int(value: int) -> str:
|
|
|
57
57
|
return f"{value:+,}".replace(",", " ")
|
|
58
58
|
|
|
59
59
|
|
|
60
|
-
def fmt_eur(value: float) -> str:
|
|
61
|
-
return f"{value:+.2f} €"
|
|
62
|
-
|
|
63
|
-
|
|
64
60
|
def fmt_pct(value: float) -> str:
|
|
65
61
|
return f"{value:+.2f}%"
|
|
66
62
|
|
|
@@ -89,7 +85,6 @@ def render_intro(report: Dict[str, Any]) -> str:
|
|
|
89
85
|
avg_in = ref.get("avg_input_tokens", 8000)
|
|
90
86
|
avg_out = ref.get("avg_output_tokens", 600)
|
|
91
87
|
tier = ref.get("model_tier", "sonnet")
|
|
92
|
-
sourced = ref.get("pricing_sourced_on", "—")
|
|
93
88
|
return (
|
|
94
89
|
f"# Value Dashboard — was kostet das Paket, was bringt es?\n"
|
|
95
90
|
"\n"
|
|
@@ -101,11 +96,12 @@ def render_intro(report: Dict[str, Any]) -> str:
|
|
|
101
96
|
"\n"
|
|
102
97
|
"## Wie diese Seite zu lesen ist\n"
|
|
103
98
|
"\n"
|
|
104
|
-
"**Panel A (
|
|
99
|
+
"**Panel A (Token-Leiter)** — von oben nach unten lesen. Jede "
|
|
105
100
|
"Stufe sagt: *was sie macht*, *wie viele Input-Tokens sie pro "
|
|
106
|
-
"Request hinzufügt oder spart*, *
|
|
107
|
-
|
|
108
|
-
"
|
|
101
|
+
"Request hinzufügt oder spart*, und *wo wir kumulativ stehen*. "
|
|
102
|
+
"Die fett gedruckte **NETTO**-Zeile am Ende ist die Antwort. "
|
|
103
|
+
"Bewusst rein in Tokens — kein €-Vergleich, da Abo-Nutzer keine "
|
|
104
|
+
"Per-Request-API-Preise zahlen.\n"
|
|
109
105
|
"\n"
|
|
110
106
|
"**Panel B (Verhalten)** — vier reale Vergleiche, *mit* vs. "
|
|
111
107
|
"*ohne* Paket. Hier liegt der nicht-Token-Wert: passende Skill-"
|
|
@@ -122,8 +118,7 @@ def render_intro(report: Dict[str, Any]) -> str:
|
|
|
122
118
|
f"- **{requests:,}** Requests, durchschnittlich "
|
|
123
119
|
f"**{avg_in:,}** Input-Tokens und **{avg_out:,}** Output-Tokens "
|
|
124
120
|
"pro Request\n"
|
|
125
|
-
f"- Modell-Tier: `{tier}
|
|
126
|
-
f"Preisstand `{sourced}` (Quelle: `internal/bench/pricing.yaml`)\n"
|
|
121
|
+
f"- Modell-Tier (Workload-Annahme): `{tier}`\n"
|
|
127
122
|
"- Wer einen anderen Workload fährt, rechnet selbst nach — die "
|
|
128
123
|
"Methodik ist offengelegt; nichts ist hardcodiert versteckt.\n"
|
|
129
124
|
)
|
|
@@ -135,8 +130,8 @@ def render_panel_a(report: Dict[str, Any]) -> str:
|
|
|
135
130
|
"Liest sich von oben nach unten. Positive Δ-Werte = das Paket "
|
|
136
131
|
"*kostet* Tokens (Regel-Load ist die ehrliche Up-Front-Steuer); "
|
|
137
132
|
"negative Δ-Werte = das Paket *spart* Tokens.\n",
|
|
138
|
-
"| Stufe | Was sie tut | Δ Tokens |
|
|
139
|
-
"
|
|
133
|
+
"| Stufe | Was sie tut | Δ Tokens | Kumulativ | Quelle |",
|
|
134
|
+
"|---|---|---:|---:|---|",
|
|
140
135
|
]
|
|
141
136
|
for rung in report.get("cost_ladder", []):
|
|
142
137
|
if rung["id"] == "baseline":
|
|
@@ -145,7 +140,6 @@ def render_panel_a(report: Dict[str, Any]) -> str:
|
|
|
145
140
|
label_cell = rung["label"]
|
|
146
141
|
what = rung.get("what_it_does", "")
|
|
147
142
|
token_delta = int(rung.get("token_delta", 0))
|
|
148
|
-
eur_delta = float(rung.get("eur_delta", 0.0))
|
|
149
143
|
cum = float(rung.get("cumulative_pct", 0.0))
|
|
150
144
|
conf = confidence_badge(rung.get("confidence", "pending"))
|
|
151
145
|
source = rung.get("source_report", "")
|
|
@@ -154,17 +148,16 @@ def render_panel_a(report: Dict[str, Any]) -> str:
|
|
|
154
148
|
what = f"{what} ⚠️ erst teurer"
|
|
155
149
|
lines.append(
|
|
156
150
|
f"| {label_cell} | {what} | "
|
|
157
|
-
f"{fmt_signed_int(token_delta)} |
|
|
151
|
+
f"{fmt_signed_int(token_delta)} | "
|
|
158
152
|
f"{fmt_pct(cum)} | `{source}` · {conf} |"
|
|
159
153
|
)
|
|
160
154
|
if rung.get("footnote"):
|
|
161
155
|
lines.append(
|
|
162
|
-
f"| | _Fußnote:_ {rung['footnote']} | | | |
|
|
156
|
+
f"| | _Fußnote:_ {rung['footnote']} | | | |"
|
|
163
157
|
)
|
|
164
158
|
|
|
165
159
|
totals = report.get("totals", {})
|
|
166
160
|
cum_tokens = int(totals.get("cumulative_token_delta", 0))
|
|
167
|
-
cum_eur = float(totals.get("cumulative_eur_delta", 0.0))
|
|
168
161
|
cum_pct = float(totals.get("cumulative_pct", 0.0))
|
|
169
162
|
verdict = totals.get("net_verdict", "—")
|
|
170
163
|
verdict_label = {
|
|
@@ -177,8 +170,6 @@ def render_panel_a(report: Dict[str, Any]) -> str:
|
|
|
177
170
|
"",
|
|
178
171
|
f"{verdict_label} — "
|
|
179
172
|
f"**{fmt_signed_int(cum_tokens)} Tokens / Request**, "
|
|
180
|
-
f"**{fmt_eur(cum_eur)}** auf "
|
|
181
|
-
f"{report.get('reference_scale', {}).get('requests', 1000):,} Requests, "
|
|
182
173
|
f"kumulativ **{fmt_pct(cum_pct)}** vs. Baseline.\n",
|
|
183
174
|
]
|
|
184
175
|
)
|
|
@@ -250,10 +241,10 @@ def render_glossary() -> str:
|
|
|
250
241
|
"nutzt. Spart Output-Tokens — wenn der Korpus es belohnt.\n"
|
|
251
242
|
"- **Ohne Paket / Mit Paket** — *without the package* / *with "
|
|
252
243
|
"the package* — die zwei Arme des A/B-Vergleichs.\n"
|
|
253
|
-
"-
|
|
254
|
-
"
|
|
255
|
-
"
|
|
256
|
-
"
|
|
244
|
+
"- **Δ Tokens** — Input-Token-Differenz pro Request gegenüber der "
|
|
245
|
+
"Baseline. Bewusst die einzige Kosten-Einheit: ein €-Vergleich "
|
|
246
|
+
"würde Per-Request-API-Preise unterstellen, die Abo-Nutzer nicht "
|
|
247
|
+
"zahlen.\n"
|
|
257
248
|
)
|
|
258
249
|
|
|
259
250
|
|
|
@@ -15,8 +15,8 @@
|
|
|
15
15
|
"description": {
|
|
16
16
|
"type": "string",
|
|
17
17
|
"minLength": 1,
|
|
18
|
-
"maxLength":
|
|
19
|
-
"description": "Trigger phrase; ≤ 200 chars recommended,
|
|
18
|
+
"maxLength": 220,
|
|
19
|
+
"description": "Trigger phrase; ≤ 200 chars recommended, 220 is the ceiling (lean-initial-context: descriptions load eagerly via progressive disclosure). Over-cap is a soft warning, not a hard fail — a warning window so authors adapt."
|
|
20
20
|
},
|
|
21
21
|
"source": {
|
|
22
22
|
"type": "string",
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Trigger-coverage suite (roadmap Phase 2.1 / 2.2).
|
|
3
|
+
|
|
4
|
+
The deterministic *must-load* floor for the lean-initial-context migration.
|
|
5
|
+
Before any auto-tier rule body is demoted to a router-resolved pointer
|
|
6
|
+
(Phase 3), this suite proves the router still fires that rule on
|
|
7
|
+
representative task phrasings — so a needed rule can never silently fail
|
|
8
|
+
to surface.
|
|
9
|
+
|
|
10
|
+
Cases live in `tests/eval/trigger-coverage.yaml` and have the shape:
|
|
11
|
+
|
|
12
|
+
- id: secrets-edit
|
|
13
|
+
prompt: "add a webhook secret to the billing service auth flow"
|
|
14
|
+
expect: [security-sensitive-stop] # MUST be in the fired set
|
|
15
|
+
|
|
16
|
+
Matching is deterministic against `dist/router.json` (NOT the semantic
|
|
17
|
+
production router — this is a reproducible floor that catches a removed
|
|
18
|
+
trigger in CI):
|
|
19
|
+
|
|
20
|
+
- kernel rules always fire (always-on layer).
|
|
21
|
+
- a tier rule fires iff any of its triggers matches the prompt:
|
|
22
|
+
- `keyword` → case-insensitive substring.
|
|
23
|
+
- `intent` → every alpha word (len>2) of the intent phrase appears as a
|
|
24
|
+
token in the prompt (so "structural decision" fires on a prompt that
|
|
25
|
+
contains both "structural" and "decision").
|
|
26
|
+
|
|
27
|
+
A case fails when an expected rule is NOT in the fired set. Exit 1 on any
|
|
28
|
+
miss → the merge that would have shrunk the rule is blocked (2.2).
|
|
29
|
+
|
|
30
|
+
Usage:
|
|
31
|
+
python3 scripts/trigger_coverage.py # run, human report
|
|
32
|
+
python3 scripts/trigger_coverage.py --json
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
from __future__ import annotations
|
|
36
|
+
|
|
37
|
+
import argparse
|
|
38
|
+
import json
|
|
39
|
+
import re
|
|
40
|
+
import sys
|
|
41
|
+
from pathlib import Path
|
|
42
|
+
|
|
43
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
44
|
+
ROUTER = REPO_ROOT / "dist" / "router.json"
|
|
45
|
+
CORPUS = REPO_ROOT / "tests" / "eval" / "trigger-coverage.yaml"
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
import yaml
|
|
49
|
+
except ImportError: # pragma: no cover
|
|
50
|
+
sys.stderr.write("error: PyYAML required (pip install pyyaml)\n")
|
|
51
|
+
sys.exit(2)
|
|
52
|
+
|
|
53
|
+
_WORD = re.compile(r"[a-z][a-z0-9_]+")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _tokens(text: str) -> set[str]:
|
|
57
|
+
return {w for w in _WORD.findall(text.lower()) if len(w) > 2}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def load_router() -> dict:
|
|
61
|
+
return json.loads(ROUTER.read_text(encoding="utf-8"))
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def fired_rules(prompt: str, router: dict) -> set[str]:
|
|
65
|
+
"""Return every rule id the router would surface for `prompt`."""
|
|
66
|
+
low = prompt.lower()
|
|
67
|
+
toks = _tokens(prompt)
|
|
68
|
+
fired: set[str] = set(router.get("kernel", []))
|
|
69
|
+
for tier in ("tier_1", "tier_2"):
|
|
70
|
+
for entry in router.get(tier, []):
|
|
71
|
+
for trig in entry.get("triggers", []):
|
|
72
|
+
if "keyword" in trig:
|
|
73
|
+
if trig["keyword"].lower() in low:
|
|
74
|
+
fired.add(entry["id"])
|
|
75
|
+
break
|
|
76
|
+
elif "intent" in trig:
|
|
77
|
+
words = _tokens(trig["intent"])
|
|
78
|
+
if words and words <= toks:
|
|
79
|
+
fired.add(entry["id"])
|
|
80
|
+
break
|
|
81
|
+
return fired
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def run(corpus: list[dict], router: dict) -> tuple[list[dict], int]:
|
|
85
|
+
results = []
|
|
86
|
+
misses = 0
|
|
87
|
+
for case in corpus:
|
|
88
|
+
fired = fired_rules(case["prompt"], router)
|
|
89
|
+
expected = case.get("expect", [])
|
|
90
|
+
missing = [r for r in expected if r not in fired]
|
|
91
|
+
ok = not missing
|
|
92
|
+
if not ok:
|
|
93
|
+
misses += 1
|
|
94
|
+
results.append({"id": case["id"], "ok": ok, "missing": missing,
|
|
95
|
+
"expect": expected})
|
|
96
|
+
return results, misses
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def main(argv: list[str] | None = None) -> int:
|
|
100
|
+
ap = argparse.ArgumentParser(description=__doc__.splitlines()[0])
|
|
101
|
+
ap.add_argument("--json", action="store_true")
|
|
102
|
+
args = ap.parse_args(argv)
|
|
103
|
+
|
|
104
|
+
if not ROUTER.is_file():
|
|
105
|
+
sys.stderr.write(f"error: {ROUTER} missing — run compile_router first\n")
|
|
106
|
+
return 2
|
|
107
|
+
corpus = yaml.safe_load(CORPUS.read_text(encoding="utf-8")) or []
|
|
108
|
+
router = load_router()
|
|
109
|
+
results, misses = run(corpus, router)
|
|
110
|
+
|
|
111
|
+
if args.json:
|
|
112
|
+
print(json.dumps({"cases": len(results), "misses": misses,
|
|
113
|
+
"results": results}, indent=2, sort_keys=True))
|
|
114
|
+
else:
|
|
115
|
+
for r in results:
|
|
116
|
+
mark = "✅" if r["ok"] else "❌"
|
|
117
|
+
detail = "" if r["ok"] else f" MISSING: {', '.join(r['missing'])}"
|
|
118
|
+
print(f" {mark} {r['id']}{detail}")
|
|
119
|
+
print()
|
|
120
|
+
if misses:
|
|
121
|
+
print(f"❌ trigger-coverage: {misses}/{len(results)} case(s) failed — "
|
|
122
|
+
"a required rule does not fire. Blocking.")
|
|
123
|
+
else:
|
|
124
|
+
print(f"✅ trigger-coverage: {len(results)}/{len(results)} pass")
|
|
125
|
+
return 1 if misses else 0
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
if __name__ == "__main__":
|
|
129
|
+
sys.exit(main())
|