@event4u/agent-config 4.8.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/implement-ticket.md +5 -4
- package/.agent-src/rules/language-and-tone.md +4 -10
- package/.agent-src/skills/command-routing/SKILL.md +5 -4
- package/.claude-plugin/marketplace.json +1 -1
- package/CHANGELOG.md +86 -0
- package/CONTRIBUTING.md +19 -0
- package/README.md +11 -0
- package/dist/cli/registry.js +0 -2
- package/dist/cli/registry.js.map +1 -1
- package/dist/discovery/deprecation-report.md +1 -1
- package/dist/discovery/discovery-manifest.json +5 -5
- package/dist/discovery/discovery-manifest.json.sha256 +1 -1
- package/dist/discovery/discovery-manifest.summary.md +1 -1
- package/dist/discovery/orphan-report.md +1 -1
- package/dist/discovery/packs.json +2 -2
- package/dist/discovery/trust-report.md +1 -1
- package/dist/discovery/workspaces.json +2 -2
- package/dist/mcp/registry-manifest.json +2 -2
- package/dist/router.json +1 -1671
- package/docs/benchmark.md +20 -8
- package/docs/benchmarks.md +11 -0
- package/docs/contracts/benchmark-corpus-spec.md +31 -3
- package/docs/contracts/command-surface-tiers.md +1 -1
- package/docs/contracts/hook-architecture-v1.md +33 -0
- package/docs/contracts/migrate-command.md +197 -0
- package/docs/contracts/settings-api.md +2 -1
- package/docs/contracts/value-dashboard-spec.md +374 -0
- package/docs/contracts/value-report-schema.md +150 -0
- package/docs/decisions/ADR-031-validation-severity-tiers-and-projection-roundtrip.md +97 -0
- package/docs/decisions/INDEX.md +1 -0
- package/docs/guidelines/agent-infra/installed-tools-manifest.md +6 -3
- package/docs/guidelines/agent-infra/language-and-tone-examples.md +35 -0
- package/docs/migration/v1-to-v2.md +40 -27
- package/docs/value.md +84 -0
- package/package.json +8 -8
- package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
- package/scripts/_cli/cmd_migrate.py +264 -102
- package/scripts/_cli/cmd_settings_migrate.py +2 -1
- package/scripts/_dispatch.bash +147 -49
- package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
- package/scripts/_lib/install_regenerator.py +129 -0
- package/scripts/_lib/value_ladder.py +599 -0
- package/scripts/_lib/value_report.py +441 -0
- package/scripts/bench_rtk_savings.py +320 -0
- package/scripts/compile_router.py +19 -5
- package/scripts/expected_perms.json +1 -1
- package/scripts/first_run_gate_hook.py +178 -0
- package/scripts/hook_manifest.yaml +16 -7
- package/scripts/hooks/dispatch_hook.py +27 -0
- package/scripts/hooks/dispatch_issues.py +136 -0
- package/scripts/hooks_doctor.py +40 -1
- package/scripts/install.py +25 -21
- package/scripts/inventory_abstraction_budget.py +616 -0
- package/scripts/lint_agents_layout.py +5 -4
- package/scripts/lint_bench_corpus.py +86 -4
- package/scripts/lint_global_paths.py +4 -3
- package/scripts/lint_marketplace_install_completeness.py +188 -0
- package/scripts/lint_value_dashboard.py +218 -0
- package/scripts/render_benchmark_md.py +6 -2
- package/scripts/render_value_md.py +355 -0
- package/scripts/repro/repro_marketplace_install_gap.sh +161 -0
- package/scripts/roadmap_progress_hook.py +23 -0
- package/scripts/router_telemetry.py +470 -0
- package/scripts/validate_frontmatter.py +23 -9
- package/scripts/_cli/cmd_migrate_to_global.py +0 -415
|
@@ -23,6 +23,7 @@ Flags:
|
|
|
23
23
|
"""
|
|
24
24
|
from __future__ import annotations
|
|
25
25
|
|
|
26
|
+
import json
|
|
26
27
|
import re
|
|
27
28
|
import sys
|
|
28
29
|
from pathlib import Path
|
|
@@ -38,6 +39,8 @@ REQUIRE_FULL = "--require-full" in sys.argv
|
|
|
38
39
|
|
|
39
40
|
REPO = Path(__file__).resolve().parents[1]
|
|
40
41
|
CORPUS_DIR = REPO / "tests" / "eval"
|
|
42
|
+
ROUTER_COVERAGE_DIR = REPO / "internal" / "bench" / "corpora" / "router-coverage"
|
|
43
|
+
ROUTER_JSON = REPO / "dist" / "router.json"
|
|
41
44
|
|
|
42
45
|
# Live skill directories live under every artefact root post-monorepo
|
|
43
46
|
# Phase 4 (legacy + packages/*/.agent-src.uncondensed/skills/).
|
|
@@ -46,7 +49,7 @@ from _lib.agent_src import artefact_roots # noqa: E402
|
|
|
46
49
|
|
|
47
50
|
SKILLS_DIRS = [root / "skills" for root in artefact_roots() if (root / "skills").is_dir()]
|
|
48
51
|
|
|
49
|
-
VALID_CATEGORIES = frozenset({"canonical", "ambiguous", "destructive", "long-context"})
|
|
52
|
+
VALID_CATEGORIES = frozenset({"canonical", "ambiguous", "destructive", "long-context", "router-coverage"})
|
|
50
53
|
# Non-dev corpus (pre-spec) uses legacy categories — accept them so the
|
|
51
54
|
# new linter does not break that file. Migration is a follow-up.
|
|
52
55
|
LEGACY_CATEGORIES = frozenset({"content", "consulting", "finance", "ops", "safety"})
|
|
@@ -66,7 +69,40 @@ def live_skills() -> set[str]:
|
|
|
66
69
|
return slugs
|
|
67
70
|
|
|
68
71
|
|
|
69
|
-
def
|
|
72
|
+
def live_rule_ids() -> set[str] | None:
|
|
73
|
+
"""Return all rule ids known to dist/router.json (kernel + tier_1 + tier_2).
|
|
74
|
+
|
|
75
|
+
Returns ``None`` (not an empty set) when the router is missing or
|
|
76
|
+
unparseable, signalling "cannot validate rule ids — skip the
|
|
77
|
+
unknown-trigger checks" rather than "every referenced id is unknown".
|
|
78
|
+
A missing router is expected on a fresh clone before ``task sync``;
|
|
79
|
+
returning an empty set there would falsely flag every intended /
|
|
80
|
+
opaque trigger as ``unknown_intended_trigger``.
|
|
81
|
+
"""
|
|
82
|
+
if not ROUTER_JSON.exists():
|
|
83
|
+
sys.stderr.write(
|
|
84
|
+
f"warning: {ROUTER_JSON.relative_to(REPO)} missing — skipping "
|
|
85
|
+
"trigger rule-id validation (run `task sync` to generate it)\n"
|
|
86
|
+
)
|
|
87
|
+
return None
|
|
88
|
+
try:
|
|
89
|
+
data = json.loads(ROUTER_JSON.read_text(encoding="utf-8"))
|
|
90
|
+
except json.JSONDecodeError:
|
|
91
|
+
sys.stderr.write(
|
|
92
|
+
f"warning: {ROUTER_JSON.relative_to(REPO)} unparseable — "
|
|
93
|
+
"skipping trigger rule-id validation\n"
|
|
94
|
+
)
|
|
95
|
+
return None
|
|
96
|
+
ids: set[str] = set()
|
|
97
|
+
ids.update(data.get("kernel", []) or [])
|
|
98
|
+
for tier in ("tier_1", "tier_2"):
|
|
99
|
+
ids.update(
|
|
100
|
+
r.get("id") for r in (data.get(tier, []) or []) if r.get("id")
|
|
101
|
+
)
|
|
102
|
+
return ids
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def lint_corpus(path: Path, skills: set[str], rule_ids: set[str] | None = None) -> list[str]:
|
|
70
106
|
errors: list[str] = []
|
|
71
107
|
try:
|
|
72
108
|
data = yaml.safe_load(path.read_text(encoding="utf-8"))
|
|
@@ -121,7 +157,12 @@ def lint_corpus(path: Path, skills: set[str]) -> list[str]:
|
|
|
121
157
|
errors.append(f"{loc}: empty_prompt")
|
|
122
158
|
|
|
123
159
|
expected = p.get("expected_skills") or []
|
|
124
|
-
if not isinstance(expected, list)
|
|
160
|
+
if not isinstance(expected, list):
|
|
161
|
+
errors.append(f"{loc}: bad_expected_shape")
|
|
162
|
+
elif not expected and cat != "router-coverage":
|
|
163
|
+
# router-coverage corpora can have empty expected_skills —
|
|
164
|
+
# the focus is rule-trigger activation, not skill selection.
|
|
165
|
+
# The intended_triggers field is the load-bearing assertion.
|
|
125
166
|
errors.append(f"{loc}: empty_expected")
|
|
126
167
|
else:
|
|
127
168
|
for slug in expected:
|
|
@@ -133,6 +174,40 @@ def lint_corpus(path: Path, skills: set[str]) -> list[str]:
|
|
|
133
174
|
if not isinstance(carve, list) or not carve:
|
|
134
175
|
errors.append(f"{loc}: missing_carve_out")
|
|
135
176
|
|
|
177
|
+
# router-coverage invariants (Council R3 honesty floor).
|
|
178
|
+
# A task's trigger prediction lives in two buckets:
|
|
179
|
+
# intended_triggers — deterministically replayable (keyword /
|
|
180
|
+
# phrase / command / path with supplied
|
|
181
|
+
# open_files or command context).
|
|
182
|
+
# replay_opaque_triggers — fires at runtime only via an `intent`
|
|
183
|
+
# trigger (or a router coverage gap) the
|
|
184
|
+
# static replay cannot verify. Declared so
|
|
185
|
+
# the telemetry reports it separately, not
|
|
186
|
+
# as false `missed_intended` drift.
|
|
187
|
+
# router-coverage requires at least one bucket non-empty.
|
|
188
|
+
intended = p.get("intended_triggers")
|
|
189
|
+
opaque = p.get("replay_opaque_triggers")
|
|
190
|
+
intended_list = intended if isinstance(intended, list) else []
|
|
191
|
+
opaque_list = opaque if isinstance(opaque, list) else []
|
|
192
|
+
|
|
193
|
+
if intended is not None and not isinstance(intended, list):
|
|
194
|
+
errors.append(f"{loc}: bad_intended_triggers_shape")
|
|
195
|
+
if opaque is not None and not isinstance(opaque, list):
|
|
196
|
+
errors.append(f"{loc}: bad_replay_opaque_triggers_shape")
|
|
197
|
+
|
|
198
|
+
if cat == "router-coverage" and not intended_list and not opaque_list:
|
|
199
|
+
errors.append(f"{loc}: missing_intended_triggers")
|
|
200
|
+
|
|
201
|
+
# A rule belongs to exactly one bucket — both is a contradiction.
|
|
202
|
+
for rid in sorted(set(intended_list) & set(opaque_list)):
|
|
203
|
+
errors.append(f"{loc}: trigger_in_both_buckets: {rid}")
|
|
204
|
+
|
|
205
|
+
# Every referenced id (either bucket) must be a real router rule id.
|
|
206
|
+
if rule_ids is not None:
|
|
207
|
+
for rid in intended_list + opaque_list:
|
|
208
|
+
if rid not in rule_ids:
|
|
209
|
+
errors.append(f"{loc}: unknown_intended_trigger: {rid}")
|
|
210
|
+
|
|
136
211
|
if REQUIRE_FULL and not is_legacy:
|
|
137
212
|
for bucket, want in FULL_COUNTS.items():
|
|
138
213
|
have = bucket_counts.get(bucket, 0)
|
|
@@ -147,14 +222,21 @@ def main() -> int:
|
|
|
147
222
|
sys.stderr.write(f"error: corpus dir missing: {CORPUS_DIR}\n")
|
|
148
223
|
return 2
|
|
149
224
|
corpora = sorted(CORPUS_DIR.glob("corpus-*.yaml"))
|
|
225
|
+
# Phase 2 of road-to-corpus-expansion-evidence-based-cuts adds a second
|
|
226
|
+
# corpus tree under internal/bench/corpora/router-coverage/. Linter scans
|
|
227
|
+
# both with the same invariants — router-coverage corpora additionally
|
|
228
|
+
# require `intended_triggers` per prompt.
|
|
229
|
+
if ROUTER_COVERAGE_DIR.is_dir():
|
|
230
|
+
corpora.extend(sorted(ROUTER_COVERAGE_DIR.glob("*.yaml")))
|
|
150
231
|
if not corpora:
|
|
151
232
|
sys.stderr.write("error: no corpora found\n")
|
|
152
233
|
return 2
|
|
153
234
|
|
|
154
235
|
skills = live_skills()
|
|
236
|
+
rule_ids = live_rule_ids()
|
|
155
237
|
all_errors: list[str] = []
|
|
156
238
|
for path in corpora:
|
|
157
|
-
errs = lint_corpus(path, skills)
|
|
239
|
+
errs = lint_corpus(path, skills, rule_ids)
|
|
158
240
|
if errs:
|
|
159
241
|
all_errors.extend(errs)
|
|
160
242
|
elif not QUIET:
|
|
@@ -3,9 +3,10 @@
|
|
|
3
3
|
|
|
4
4
|
Phase 5.0 / amendment A7 of road-to-global-only-install. Runs BEFORE
|
|
5
5
|
any legacy snapshot write so a perms leak cannot be created by the
|
|
6
|
-
migration itself
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
migration itself. Historically invoked by `agent-config migrate-to-global`;
|
|
7
|
+
that command was collapsed into `agent-config migrate` (see
|
|
8
|
+
`docs/contracts/migrate-command.md`). The audit now runs standalone via
|
|
9
|
+
`agent-config doctor` or directly through this script.
|
|
9
10
|
|
|
10
11
|
Policy source: scripts/expected_perms.json (parameterised so the policy
|
|
11
12
|
can evolve without code changes).
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Lint that every command in `hooks/hooks.json` resolves to a real
|
|
3
|
+
dispatcher subcommand in `scripts/_dispatch.bash`.
|
|
4
|
+
|
|
5
|
+
Phase 6 of `road-to-hooks-actually-fire-in-consumers`.
|
|
6
|
+
|
|
7
|
+
The linter checks **plugin-side completeness** — the package ships a
|
|
8
|
+
valid `hooks.json` whose every command line points at a subcommand
|
|
9
|
+
the dispatcher knows about. It does NOT check consumer-side
|
|
10
|
+
scaffolding (that's the runtime `dispatch-issues.jsonl` log's job
|
|
11
|
+
from Phase 1).
|
|
12
|
+
|
|
13
|
+
This distinction is load-bearing — see Council R3 finding #1:
|
|
14
|
+
"A valid plugin against an unscaffolded consumer is a PASS;
|
|
15
|
+
the linter must not produce a false-positive on that state."
|
|
16
|
+
|
|
17
|
+
Exit codes:
|
|
18
|
+
0 — every command resolves; clean.
|
|
19
|
+
1 — at least one command references an unknown subcommand.
|
|
20
|
+
2 — schema / file error.
|
|
21
|
+
"""
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import argparse
|
|
25
|
+
import json
|
|
26
|
+
import re
|
|
27
|
+
import sys
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
32
|
+
HOOKS_JSON = REPO_ROOT / "hooks" / "hooks.json"
|
|
33
|
+
DISPATCH_BASH = REPO_ROOT / "scripts" / "_dispatch.bash"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# Map agent-config-cli subcommand → dispatcher function name. The
|
|
37
|
+
# subcommand is what appears after `./agent-config <subcommand>` in
|
|
38
|
+
# the hooks.json command line; the function is what's defined in
|
|
39
|
+
# _dispatch.bash. The user-facing subcommand uses colons; the
|
|
40
|
+
# function uses underscores (e.g. `dispatch:hook` → `cmd_dispatch_hook`).
|
|
41
|
+
def subcommand_to_function(subcommand: str) -> str:
|
|
42
|
+
# Normalise: replace `:` and `-` with `_`.
|
|
43
|
+
sanitised = subcommand.replace(":", "_").replace("-", "_")
|
|
44
|
+
return f"cmd_{sanitised}"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def load_hook_commands(hooks_path: Path) -> list[tuple[str, str]]:
|
|
48
|
+
"""Return [(event_name, command_line)] for every hook entry."""
|
|
49
|
+
try:
|
|
50
|
+
data = json.loads(hooks_path.read_text(encoding="utf-8"))
|
|
51
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
52
|
+
raise SystemExit(f"lint-marketplace-install: cannot read {hooks_path}: {exc}")
|
|
53
|
+
|
|
54
|
+
hooks = data.get("hooks") or {}
|
|
55
|
+
if not isinstance(hooks, dict):
|
|
56
|
+
raise SystemExit(f"lint-marketplace-install: {hooks_path} `hooks` is not an object")
|
|
57
|
+
|
|
58
|
+
out: list[tuple[str, str]] = []
|
|
59
|
+
for event, groups in hooks.items():
|
|
60
|
+
if not isinstance(groups, list):
|
|
61
|
+
continue
|
|
62
|
+
for group in groups:
|
|
63
|
+
if not isinstance(group, dict):
|
|
64
|
+
continue
|
|
65
|
+
for entry in group.get("hooks", []) or []:
|
|
66
|
+
if not isinstance(entry, dict):
|
|
67
|
+
continue
|
|
68
|
+
cmd = entry.get("command")
|
|
69
|
+
if isinstance(cmd, str) and cmd.strip():
|
|
70
|
+
out.append((str(event), cmd))
|
|
71
|
+
return out
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# Pattern: `"$CLAUDE_PROJECT_DIR"/agent-config <subcommand> [args...]`.
|
|
75
|
+
# Accepts both quoted and bare CLAUDE_PROJECT_DIR.
|
|
76
|
+
_CMD_RE = re.compile(
|
|
77
|
+
r'(?:"?\$\{?CLAUDE_PROJECT_DIR\}?"?/)?agent-config\s+([a-zA-Z0-9:_-]+)'
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def extract_subcommand(command_line: str) -> str | None:
|
|
82
|
+
"""Pull the agent-config subcommand out of a hooks.json command line."""
|
|
83
|
+
m = _CMD_RE.search(command_line)
|
|
84
|
+
if m:
|
|
85
|
+
return m.group(1)
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def load_dispatcher_subcommands(dispatch_path: Path) -> set[str]:
|
|
90
|
+
"""Return the set of subcommand identifiers the dispatcher knows.
|
|
91
|
+
|
|
92
|
+
Reads `cmd_<name>` function definitions from _dispatch.bash and
|
|
93
|
+
converts back to subcommand form (underscores → colons / hyphens
|
|
94
|
+
is ambiguous, so we keep BOTH forms in the set — `dispatch_hook`
|
|
95
|
+
AND `dispatch:hook` — so the linter accepts either).
|
|
96
|
+
"""
|
|
97
|
+
try:
|
|
98
|
+
text = dispatch_path.read_text(encoding="utf-8")
|
|
99
|
+
except OSError as exc:
|
|
100
|
+
raise SystemExit(f"lint-marketplace-install: cannot read {dispatch_path}: {exc}")
|
|
101
|
+
|
|
102
|
+
out: set[str] = set()
|
|
103
|
+
for match in re.finditer(r"^cmd_([a-zA-Z0-9_]+)\(\)", text, flags=re.MULTILINE):
|
|
104
|
+
ident = match.group(1)
|
|
105
|
+
# Add the underscore form.
|
|
106
|
+
out.add(ident)
|
|
107
|
+
# Also add a colon-substituted variant — agent-config supports
|
|
108
|
+
# `:` in user-facing subcommand names; the function strips them
|
|
109
|
+
# to underscores. We accept either spelling on the hook side.
|
|
110
|
+
# First _ → `:`, the rest stay (heuristic; covers `dispatch:hook`,
|
|
111
|
+
# `mcp:render`, `hooks:install` etc.).
|
|
112
|
+
if "_" in ident:
|
|
113
|
+
head, _, tail = ident.partition("_")
|
|
114
|
+
out.add(f"{head}:{tail}")
|
|
115
|
+
return out
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def lint(hooks_path: Path = HOOKS_JSON, dispatch_path: Path = DISPATCH_BASH) -> int:
|
|
119
|
+
if not hooks_path.is_file():
|
|
120
|
+
sys.stderr.write(f"lint-marketplace-install: {hooks_path} not found\n")
|
|
121
|
+
return 2
|
|
122
|
+
if not dispatch_path.is_file():
|
|
123
|
+
sys.stderr.write(f"lint-marketplace-install: {dispatch_path} not found\n")
|
|
124
|
+
return 2
|
|
125
|
+
|
|
126
|
+
commands = load_hook_commands(hooks_path)
|
|
127
|
+
known = load_dispatcher_subcommands(dispatch_path)
|
|
128
|
+
|
|
129
|
+
issues: list[str] = []
|
|
130
|
+
checked = 0
|
|
131
|
+
for event, cmd in commands:
|
|
132
|
+
sub = extract_subcommand(cmd)
|
|
133
|
+
if sub is None:
|
|
134
|
+
issues.append(
|
|
135
|
+
f" {event}: command does not reference `agent-config <subcommand>`: "
|
|
136
|
+
f"{cmd!r}"
|
|
137
|
+
)
|
|
138
|
+
continue
|
|
139
|
+
checked += 1
|
|
140
|
+
if sub not in known:
|
|
141
|
+
issues.append(
|
|
142
|
+
f" {event}: unknown_dispatcher_subcommand: {sub!r} "
|
|
143
|
+
f"(not in scripts/_dispatch.bash)"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
if issues:
|
|
147
|
+
try:
|
|
148
|
+
relative = hooks_path.resolve().relative_to(REPO_ROOT)
|
|
149
|
+
except ValueError:
|
|
150
|
+
relative = hooks_path
|
|
151
|
+
sys.stderr.write(
|
|
152
|
+
f"lint-marketplace-install: {len(issues)} issue(s) in {relative}:\n"
|
|
153
|
+
)
|
|
154
|
+
for line in issues:
|
|
155
|
+
sys.stderr.write(line + "\n")
|
|
156
|
+
return 1
|
|
157
|
+
|
|
158
|
+
print(
|
|
159
|
+
f"✅ lint-marketplace-install: {checked} hook command(s) checked, "
|
|
160
|
+
f"all resolve to known dispatcher subcommands."
|
|
161
|
+
)
|
|
162
|
+
return 0
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def parse_args(argv: list[str]) -> argparse.Namespace:
|
|
166
|
+
p = argparse.ArgumentParser(description=__doc__.splitlines()[0])
|
|
167
|
+
p.add_argument(
|
|
168
|
+
"--hooks-json",
|
|
169
|
+
type=Path,
|
|
170
|
+
default=HOOKS_JSON,
|
|
171
|
+
help="Path to hooks/hooks.json (default: %(default)s)",
|
|
172
|
+
)
|
|
173
|
+
p.add_argument(
|
|
174
|
+
"--dispatch-bash",
|
|
175
|
+
type=Path,
|
|
176
|
+
default=DISPATCH_BASH,
|
|
177
|
+
help="Path to scripts/_dispatch.bash (default: %(default)s)",
|
|
178
|
+
)
|
|
179
|
+
return p.parse_args(argv)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def main(argv: list[str] | None = None) -> int:
|
|
183
|
+
args = parse_args(argv if argv is not None else sys.argv[1:])
|
|
184
|
+
return lint(args.hooks_json, args.dispatch_bash)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
if __name__ == "__main__":
|
|
188
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Lint `docs/value.md` for structural invariants.
|
|
3
|
+
|
|
4
|
+
Phase 5 Step 3 of `agents/roadmaps/road-to-readable-value-dashboard.md`.
|
|
5
|
+
|
|
6
|
+
Invariants enforced (any violation → exit 1):
|
|
7
|
+
|
|
8
|
+
1. Required sections present (intro / Reference scale / Panel A / Panel B
|
|
9
|
+
/ Glossar / NETTO line).
|
|
10
|
+
2. Every cost-ladder rung row cites a `source_report` (or `n/a` for the
|
|
11
|
+
baseline rung) — no rung sneaks in without traceability.
|
|
12
|
+
3. No `measured` rung renders a `pending` source — internal consistency
|
|
13
|
+
of confidence ↔ source state.
|
|
14
|
+
4. No negative-saving label: the literal string "Ersparnis" must not
|
|
15
|
+
appear in a row where the displayed Δ-token value is positive (the
|
|
16
|
+
load + terse rungs are *costs*, not savings; mislabelling either is
|
|
17
|
+
a credibility failure the page explicitly forbids).
|
|
18
|
+
5. The `latest.json` exists and its `cost_ladder` rung ids match the
|
|
19
|
+
five canonical rungs — the renderer cannot silently drop a rung.
|
|
20
|
+
|
|
21
|
+
The linter loads `internal/bench/reports/value/latest.json` directly
|
|
22
|
+
(not just the rendered `.md`) for items (3) and (5) — the rendered
|
|
23
|
+
text alone is too lossy.
|
|
24
|
+
|
|
25
|
+
Output: one violation per line in non-quiet mode; one-line summary in
|
|
26
|
+
quiet mode. Exit 0 on clean, 1 on any violation.
|
|
27
|
+
"""
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import argparse
|
|
31
|
+
import json
|
|
32
|
+
import re
|
|
33
|
+
import sys
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
from typing import Any, Dict, List
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
39
|
+
DASHBOARD = REPO_ROOT / "docs" / "value.md"
|
|
40
|
+
LATEST = REPO_ROOT / "internal" / "bench" / "reports" / "value" / "latest.json"
|
|
41
|
+
|
|
42
|
+
REQUIRED_SECTIONS = (
|
|
43
|
+
"# Value Dashboard",
|
|
44
|
+
"## Reference scale",
|
|
45
|
+
"## Panel A",
|
|
46
|
+
"## Panel B",
|
|
47
|
+
"## Glossar",
|
|
48
|
+
"**NETTO",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
CANONICAL_RUNG_IDS = ("baseline", "load", "condense", "rtk", "terse")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _log(msg: str, quiet: bool, *, err: bool = False) -> None:
|
|
55
|
+
if err:
|
|
56
|
+
print(msg, file=sys.stderr)
|
|
57
|
+
elif not quiet:
|
|
58
|
+
print(msg)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def check_required_sections(text: str) -> List[str]:
|
|
62
|
+
return [
|
|
63
|
+
f"missing required section: '{section}'"
|
|
64
|
+
for section in REQUIRED_SECTIONS
|
|
65
|
+
if section not in text
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def check_source_citations(report: Dict[str, Any]) -> List[str]:
|
|
70
|
+
violations = []
|
|
71
|
+
for rung in report.get("cost_ladder", []) or []:
|
|
72
|
+
source = rung.get("source_report")
|
|
73
|
+
if not source:
|
|
74
|
+
violations.append(
|
|
75
|
+
f"rung '{rung.get('id')}' has no source_report field"
|
|
76
|
+
)
|
|
77
|
+
continue
|
|
78
|
+
if not isinstance(source, str) or not source.strip():
|
|
79
|
+
violations.append(
|
|
80
|
+
f"rung '{rung.get('id')}' has empty source_report"
|
|
81
|
+
)
|
|
82
|
+
return violations
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def check_confidence_vs_source(report: Dict[str, Any]) -> List[str]:
|
|
86
|
+
"""A `measured` rung's source_report must exist on disk."""
|
|
87
|
+
violations = []
|
|
88
|
+
for rung in report.get("cost_ladder", []) or []:
|
|
89
|
+
if rung.get("confidence") != "measured":
|
|
90
|
+
continue
|
|
91
|
+
source = rung.get("source_report") or ""
|
|
92
|
+
if source in ("", "n/a"):
|
|
93
|
+
continue # baseline rung
|
|
94
|
+
path = REPO_ROOT / source
|
|
95
|
+
if not path.exists():
|
|
96
|
+
violations.append(
|
|
97
|
+
f"rung '{rung.get('id')}' is 'measured' but its "
|
|
98
|
+
f"source_report does not exist: {source}"
|
|
99
|
+
)
|
|
100
|
+
return violations
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def check_no_negative_savings(text: str) -> List[str]:
|
|
104
|
+
"""A rung whose Δ-token value is positive must not be labelled a saving.
|
|
105
|
+
|
|
106
|
+
Heuristic: scan Panel A's rows; flag any row that contains the
|
|
107
|
+
German word "Ersparnis" with a positive token-delta in the same row.
|
|
108
|
+
"""
|
|
109
|
+
violations = []
|
|
110
|
+
# Panel A rows are pipe-delimited; we read every line starting with "|"
|
|
111
|
+
# inside the cost ladder section.
|
|
112
|
+
in_panel_a = False
|
|
113
|
+
for line in text.splitlines():
|
|
114
|
+
if line.startswith("## Panel A"):
|
|
115
|
+
in_panel_a = True
|
|
116
|
+
continue
|
|
117
|
+
if in_panel_a and line.startswith("## "):
|
|
118
|
+
break
|
|
119
|
+
if not in_panel_a or not line.startswith("|"):
|
|
120
|
+
continue
|
|
121
|
+
if "Ersparnis" not in line:
|
|
122
|
+
continue
|
|
123
|
+
# Look for a "+" sign at the start of an integer-shaped delta.
|
|
124
|
+
# The format renders deltas as "+4 843" / "-186".
|
|
125
|
+
m = re.search(r"\|\s*([+-][0-9 ]+)\s*\|", line)
|
|
126
|
+
if m and m.group(1).strip().startswith("+"):
|
|
127
|
+
token_value = m.group(1).strip()
|
|
128
|
+
violations.append(
|
|
129
|
+
"row labelled 'Ersparnis' has a positive Δ-token value: "
|
|
130
|
+
f"{token_value!r} — positive deltas are costs, not savings."
|
|
131
|
+
)
|
|
132
|
+
return violations
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def check_canonical_rung_set(report: Dict[str, Any]) -> List[str]:
|
|
136
|
+
rungs = report.get("cost_ladder", []) or []
|
|
137
|
+
ids = [r.get("id") for r in rungs]
|
|
138
|
+
if list(ids) != list(CANONICAL_RUNG_IDS):
|
|
139
|
+
return [
|
|
140
|
+
f"cost_ladder rung ids must be {CANONICAL_RUNG_IDS}, "
|
|
141
|
+
f"got {tuple(ids)}"
|
|
142
|
+
]
|
|
143
|
+
return []
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def lint(quiet: bool = False) -> int:
|
|
147
|
+
violations: List[str] = []
|
|
148
|
+
|
|
149
|
+
if not DASHBOARD.exists():
|
|
150
|
+
_log(
|
|
151
|
+
f"FAIL: dashboard not found: {DASHBOARD.relative_to(REPO_ROOT)}",
|
|
152
|
+
quiet,
|
|
153
|
+
err=True,
|
|
154
|
+
)
|
|
155
|
+
return 1
|
|
156
|
+
text = DASHBOARD.read_text()
|
|
157
|
+
violations.extend(check_required_sections(text))
|
|
158
|
+
violations.extend(check_no_negative_savings(text))
|
|
159
|
+
|
|
160
|
+
if not LATEST.exists():
|
|
161
|
+
# No JSON to deep-check — that's a placeholder dashboard.
|
|
162
|
+
# Required-sections check still applies; we degrade gracefully.
|
|
163
|
+
if violations:
|
|
164
|
+
for v in violations:
|
|
165
|
+
_log(f"FAIL: {v}", quiet, err=True)
|
|
166
|
+
return 1
|
|
167
|
+
_log(
|
|
168
|
+
"lint_value_dashboard: dashboard is a placeholder "
|
|
169
|
+
"(no value-v1.json yet) — structural checks pass.",
|
|
170
|
+
quiet=False,
|
|
171
|
+
)
|
|
172
|
+
return 0
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
report = json.loads(LATEST.read_text())
|
|
176
|
+
except json.JSONDecodeError as exc:
|
|
177
|
+
_log(f"FAIL: {LATEST.name} is not valid JSON: {exc}", quiet, err=True)
|
|
178
|
+
return 1
|
|
179
|
+
|
|
180
|
+
violations.extend(check_source_citations(report))
|
|
181
|
+
violations.extend(check_confidence_vs_source(report))
|
|
182
|
+
violations.extend(check_canonical_rung_set(report))
|
|
183
|
+
|
|
184
|
+
if violations:
|
|
185
|
+
for v in violations:
|
|
186
|
+
_log(f"FAIL: {v}", quiet, err=True)
|
|
187
|
+
return 1
|
|
188
|
+
_log(
|
|
189
|
+
(
|
|
190
|
+
"lint_value_dashboard: OK — "
|
|
191
|
+
f"{len(report.get('cost_ladder', []))} rungs, "
|
|
192
|
+
f"{len(report.get('behaviour', []))} behaviour metrics, all "
|
|
193
|
+
"sections present, all sources cited."
|
|
194
|
+
),
|
|
195
|
+
quiet=False,
|
|
196
|
+
)
|
|
197
|
+
return 0
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def parse_args(argv: List[str]) -> argparse.Namespace:
|
|
201
|
+
parser = argparse.ArgumentParser(
|
|
202
|
+
description="Lint docs/value.md for structural invariants."
|
|
203
|
+
)
|
|
204
|
+
parser.add_argument(
|
|
205
|
+
"--quiet",
|
|
206
|
+
action="store_true",
|
|
207
|
+
help="Suppress non-error output.",
|
|
208
|
+
)
|
|
209
|
+
return parser.parse_args(argv)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def main(argv: List[str] | None = None) -> int:
|
|
213
|
+
args = parse_args(argv if argv is not None else sys.argv[1:])
|
|
214
|
+
return lint(quiet=args.quiet)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
if __name__ == "__main__":
|
|
218
|
+
raise SystemExit(main())
|
|
@@ -103,10 +103,14 @@ def render_headline(track_a: dict, track_b: dict) -> str:
|
|
|
103
103
|
lines = [
|
|
104
104
|
"## Headline",
|
|
105
105
|
"",
|
|
106
|
+
"> **Track A confirms surface availability** — a precondition, not an impact metric. "
|
|
107
|
+
"For the impact view (cost-ladder + behaviour with vs. without), see "
|
|
108
|
+
"[`docs/value.md`](value.md).",
|
|
109
|
+
"",
|
|
106
110
|
"| Metric | with | without | delta |",
|
|
107
111
|
"|---|---|---|---|",
|
|
108
|
-
f"| Track A
|
|
109
|
-
f"{fmt_pct((a_with_acc or 0) - (a_wo_acc or 0))} |",
|
|
112
|
+
f"| Track A surface-availability | {fmt_pct(a_with_acc)} | {fmt_pct(a_wo_acc)} | "
|
|
113
|
+
f"{fmt_pct((a_with_acc or 0) - (a_wo_acc or 0))} _(structural — files present)_ |",
|
|
110
114
|
f"| Track B completion-rate | {fmt_pct(b_with_comp)} | {fmt_pct(b_wo_comp)} | "
|
|
111
115
|
f"{fmt_pct((b_with_comp or 0) - (b_wo_comp or 0))} |",
|
|
112
116
|
f"| Track B mean wall-time | {fmt_num(b_results.get('mean_wall_time'))}s "
|