@event4u/agent-config 2.19.0 → 2.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/agent-status.md +29 -0
- package/.agent-src/commands/onboard.md +221 -81
- package/.agent-src/packs/README.md +49 -0
- package/.agent-src/packs/agency-delivery.yml +63 -0
- package/.agent-src/packs/content-engine.yml +53 -0
- package/.agent-src/packs/founder-mvp.yml +51 -0
- package/.agent-src/presets/README.md +26 -0
- package/.agent-src/presets/balanced.yml +34 -0
- package/.agent-src/presets/fast.yml +31 -0
- package/.agent-src/presets/strict.yml +38 -0
- package/.agent-src/profiles/README.md +29 -0
- package/.agent-src/profiles/agency.yml +27 -0
- package/.agent-src/profiles/content_creator.yml +25 -0
- package/.agent-src/profiles/developer.yml +26 -0
- package/.agent-src/profiles/finance.yml +24 -0
- package/.agent-src/profiles/founder.yml +25 -0
- package/.agent-src/profiles/ops.yml +25 -0
- package/.agent-src/rules/no-cheap-questions.md +25 -17
- package/.agent-src/skills/adr-create/SKILL.md +78 -68
- package/.agent-src/skills/subagent-orchestration/SKILL.md +33 -0
- package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
- package/.agent-src/templates/skill-archive-note.md +101 -0
- package/.claude-plugin/marketplace.json +1 -1
- package/CHANGELOG.md +52 -30
- package/README.md +68 -72
- package/config/agent-settings.template.yml +22 -0
- package/docs/adrs/caveman/0001-default-off-until-bench.md +93 -0
- package/docs/adrs/caveman/README.md +9 -0
- package/docs/adrs/cost/0001-hard-stop-hook.md +114 -0
- package/docs/adrs/cost/README.md +9 -0
- package/docs/adrs/memory/0001-consumer-side-snapshot.md +111 -0
- package/docs/adrs/memory/README.md +9 -0
- package/docs/adrs/router/0001-three-tier-routing.md +119 -0
- package/docs/adrs/router/README.md +9 -0
- package/docs/adrs/schema/0001-json-schema-frontmatter.md +102 -0
- package/docs/adrs/schema/README.md +9 -0
- package/docs/adrs/smoke/0001-per-tier-smoke-scripts.md +99 -0
- package/docs/adrs/smoke/README.md +9 -0
- package/docs/architecture/current-onboard-baseline.md +126 -0
- package/docs/architecture/current-safety-behavior.md +137 -0
- package/docs/archive/CHANGELOG-pre-2.16.0.md +48 -0
- package/docs/contracts/adr-layout.md +108 -0
- package/docs/contracts/benchmark-corpus-spec.md +97 -0
- package/docs/contracts/benchmark-report-schema.md +111 -0
- package/docs/contracts/command-clusters.md +1 -0
- package/docs/contracts/command-taxonomy.md +137 -0
- package/docs/contracts/compression-default-kill-criterion.md +69 -0
- package/docs/contracts/config-presets.md +144 -0
- package/docs/contracts/cost-dashboard.md +143 -0
- package/docs/contracts/cost-enforcement.md +134 -0
- package/docs/contracts/file-ownership-matrix.json +0 -7
- package/docs/contracts/mcp-tool-inventory.md +53 -0
- package/docs/contracts/measurement-baseline.md +102 -0
- package/docs/contracts/namespace.md +125 -0
- package/docs/contracts/profile-system.md +142 -0
- package/docs/contracts/safety-model.md +129 -0
- package/docs/contracts/smoke-contracts.md +144 -0
- package/docs/contracts/workflow-packs.md +121 -0
- package/docs/decisions/ADR-010-profile-pack-preset-boundary.md +132 -0
- package/docs/decisions/INDEX.md +1 -0
- package/docs/featured-commands.md +27 -0
- package/docs/parity/bench-ruflo.json +58 -0
- package/docs/parity/bench.json +41 -0
- package/docs/parity/ruflo.md +46 -0
- package/docs/profiles.md +91 -0
- package/package.json +1 -1
- package/scripts/_cli/cmd_explain.py +250 -0
- package/scripts/_lib/bench_cost.py +138 -0
- package/scripts/_lib/bench_quality.py +118 -0
- package/scripts/_lib/bench_report.py +150 -0
- package/scripts/agent-config +13 -0
- package/scripts/audit_adr_coverage.py +175 -0
- package/scripts/audit_mcp_tools.py +146 -0
- package/scripts/bench_baseline_ready.py +108 -0
- package/scripts/bench_drift_check.py +151 -0
- package/scripts/bench_per_tool.py +216 -0
- package/scripts/bench_run.py +155 -0
- package/scripts/config/__init__.py +9 -0
- package/scripts/config/presets.py +206 -0
- package/scripts/config/profiles.py +173 -0
- package/scripts/cost/budget.mjs +73 -12
- package/scripts/cost/preflight.mjs +89 -0
- package/scripts/lint_archived_skills.py +143 -0
- package/scripts/lint_bench_corpus.py +161 -0
- package/scripts/lint_namespace.py +135 -0
- package/scripts/skill_overlap.py +204 -0
- package/scripts/skill_usage_collect.py +191 -0
- package/scripts/skill_usage_report.py +162 -0
- package/scripts/smoke/kernel.sh +101 -0
- package/scripts/smoke/router.sh +129 -0
- package/scripts/smoke/schema.sh +71 -0
- package/scripts/smoke/skills.sh +101 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# Report emitter for `scripts/bench_run.py` — step-4 Phase 2 Step 4.
|
|
2
|
+
#
|
|
3
|
+
# Serializes the unified report dict to JSON + Markdown per
|
|
4
|
+
# docs/contracts/benchmark-report-schema.md. Filename format:
|
|
5
|
+
# `bench/reports/<UTC ISO-8601 with : -> ->-<corpus_id>.{json,md}`.
|
|
6
|
+
"""Report emitter for the bench runner."""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def utc_now_filename_stamp() -> str:
|
|
16
|
+
"""Sortable lexicographic stamp — drop ':' so filenames stay portable."""
|
|
17
|
+
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%SZ")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def utc_now_iso() -> str:
|
|
21
|
+
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def report_paths(reports_dir: Path, corpus_id: str, stamp: str) -> tuple[Path, Path]:
|
|
25
|
+
base = f"{stamp}-{corpus_id}"
|
|
26
|
+
return reports_dir / f"{base}.json", reports_dir / f"{base}.md"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def write_json(path: Path, report: dict[str, Any]) -> None:
|
|
30
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
31
|
+
path.write_text(json.dumps(report, indent=2) + "\n", encoding="utf-8")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _selection_section(selection: dict[str, Any]) -> str:
|
|
35
|
+
lines = [
|
|
36
|
+
"## Selection accuracy",
|
|
37
|
+
"",
|
|
38
|
+
f"- top-K = **{selection['top_k']}** · "
|
|
39
|
+
f"hit **{selection['prompts_hit']} / {selection['prompts_total']}** · "
|
|
40
|
+
f"accuracy **{selection['selection_accuracy']:.2%}** · "
|
|
41
|
+
f"target **{selection['target']:.2%}** · "
|
|
42
|
+
f"verdict **{'PASS' if selection['passed'] else 'FAIL'}**",
|
|
43
|
+
"",
|
|
44
|
+
"| id | hit | expected | top-K ranked |",
|
|
45
|
+
"|---|---|---|---|",
|
|
46
|
+
]
|
|
47
|
+
for r in selection.get("per_prompt", []):
|
|
48
|
+
mark = "✅" if r["hit"] else "❌"
|
|
49
|
+
expected = ", ".join(r.get("expected_skills") or []) or "—"
|
|
50
|
+
ranked = ", ".join(r.get("top_k_ranked") or []) or "—"
|
|
51
|
+
lines.append(f"| `{r['id']}` | {mark} | {expected} | {ranked} |")
|
|
52
|
+
return "\n".join(lines)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _cost_section(cost: dict[str, Any]) -> str:
|
|
56
|
+
if cost.get("source") == "unavailable":
|
|
57
|
+
return (
|
|
58
|
+
"## Cost capture\n\n"
|
|
59
|
+
f"- **source:** `unavailable` ({cost.get('reason', 'unknown')})\n"
|
|
60
|
+
f"- **scanned:** `{cost.get('scanned_path', '—')}`\n"
|
|
61
|
+
f"- **pricing sourced on:** {cost.get('pricing_sourced_on') or '—'}\n\n"
|
|
62
|
+
"_No session jsonl available. Run `node scripts/cost/track.mjs` "
|
|
63
|
+
"from a real Claude Code session to populate agents/cost-tracking/sessions.jsonl._\n"
|
|
64
|
+
)
|
|
65
|
+
totals = cost["totals"]
|
|
66
|
+
lines = [
|
|
67
|
+
"## Cost capture",
|
|
68
|
+
"",
|
|
69
|
+
f"- **source:** `{cost['source']}` · sessions scanned: **{cost['sessions_scanned']}**",
|
|
70
|
+
f"- **pricing sourced on:** {cost.get('pricing_sourced_on') or '—'}",
|
|
71
|
+
f"- **total cost:** **${totals['total_cost_usd']:.6f}**",
|
|
72
|
+
"",
|
|
73
|
+
"| tier | messages | cost (USD) |",
|
|
74
|
+
"|---|---:|---:|",
|
|
75
|
+
]
|
|
76
|
+
for tier, slot in cost["per_tier"].items():
|
|
77
|
+
if slot["messages"] == 0 and slot["cost_usd"] == 0.0:
|
|
78
|
+
continue
|
|
79
|
+
lines.append(f"| {tier} | {slot['messages']} | ${slot['cost_usd']:.6f} |")
|
|
80
|
+
lines += [
|
|
81
|
+
"",
|
|
82
|
+
"| metric | value |",
|
|
83
|
+
"|---|---:|",
|
|
84
|
+
f"| input_tokens | {totals['input_tokens']} |",
|
|
85
|
+
f"| output_tokens | {totals['output_tokens']} |",
|
|
86
|
+
f"| cache_read_input_tokens | {totals['cache_read_input_tokens']} |",
|
|
87
|
+
f"| cache_creation_input_tokens | {totals['cache_creation_input_tokens']} |",
|
|
88
|
+
]
|
|
89
|
+
return "\n".join(lines)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _quality_section(quality: dict[str, Any]) -> str:
|
|
93
|
+
if quality["source"] == "not_collected":
|
|
94
|
+
return (
|
|
95
|
+
"## Quality probe\n\n"
|
|
96
|
+
f"- **source:** `not_collected` · assertions declared: "
|
|
97
|
+
f"**{quality['prompts_with_assertion']}**\n"
|
|
98
|
+
"- _Pass `--agent-output <path-to-outputs.json>` (map of `id -> str`) "
|
|
99
|
+
"to score the rubrics. Schema invariant: missing output keeps "
|
|
100
|
+
"`verdict.overall` at `partial`._\n"
|
|
101
|
+
)
|
|
102
|
+
lines = [
|
|
103
|
+
"## Quality probe",
|
|
104
|
+
"",
|
|
105
|
+
f"- **source:** `{quality['source']}` · "
|
|
106
|
+
f"passing **{quality['prompts_passing']} / {quality['prompts_with_assertion']}** · "
|
|
107
|
+
f"score **{quality['quality_score']:.2%}**",
|
|
108
|
+
"",
|
|
109
|
+
"| id | kind | passed | assertion |",
|
|
110
|
+
"|---|---|---|---|",
|
|
111
|
+
]
|
|
112
|
+
for r in quality.get("per_prompt", []):
|
|
113
|
+
mark = "✅" if r["passed"] is True else ("❌" if r["passed"] is False else "—")
|
|
114
|
+
lines.append(f"| `{r['id']}` | {r['assertion_kind']} | {mark} | `{r['assertion']}` |")
|
|
115
|
+
return "\n".join(lines)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def render_markdown(report: dict[str, Any]) -> str:
|
|
119
|
+
corpus = report["corpus"]
|
|
120
|
+
sel = report["selection"]
|
|
121
|
+
cost = report["cost"]
|
|
122
|
+
qual = report["quality"]
|
|
123
|
+
verdict = report["verdict"]
|
|
124
|
+
headline = (
|
|
125
|
+
f"# Benchmark Report — `{corpus['id']}` · {report['generated_at']}\n\n"
|
|
126
|
+
"## Headline\n\n"
|
|
127
|
+
f"- **selection** {sel['selection_accuracy']:.2%} (target {sel['target']:.2%}) → **{verdict['selection']}**\n"
|
|
128
|
+
f"- **cost** ${cost['totals']['total_cost_usd']:.6f} "
|
|
129
|
+
f"({'sessions=' + str(cost['sessions_scanned']) if cost['source'] != 'unavailable' else cost['source']})\n"
|
|
130
|
+
f"- **quality** {qual['quality_score']:.2%} → **{verdict['quality']}**\n"
|
|
131
|
+
f"- **overall** → **{verdict['overall']}**\n"
|
|
132
|
+
)
|
|
133
|
+
notes = (
|
|
134
|
+
"## Notes\n\n"
|
|
135
|
+
f"- corpus path: `{corpus['path']}` · prompts: **{corpus['prompt_count']}**\n"
|
|
136
|
+
f"- pricing: `bench/pricing.yaml`\n"
|
|
137
|
+
f"- baseline collector: `{report['runner']['baseline_collector']}`\n"
|
|
138
|
+
)
|
|
139
|
+
return "\n\n".join([
|
|
140
|
+
headline,
|
|
141
|
+
_selection_section(sel),
|
|
142
|
+
_cost_section(cost),
|
|
143
|
+
_quality_section(qual),
|
|
144
|
+
notes,
|
|
145
|
+
]) + "\n"
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def write_markdown(path: Path, report: dict[str, Any]) -> None:
|
|
149
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
150
|
+
path.write_text(render_markdown(report), encoding="utf-8")
|
package/scripts/agent-config
CHANGED
|
@@ -97,6 +97,10 @@ Tier 1 — power-user (release shape, audit, migration):
|
|
|
97
97
|
Lists missing, modified, and foreign files.
|
|
98
98
|
Exits 1 on drift, 2 on missing lockfile.
|
|
99
99
|
Flags: --json | --project=<path>
|
|
100
|
+
explain Read-only decision-chain trace.
|
|
101
|
+
Usage: explain config | explain rule <name>
|
|
102
|
+
| explain route "<text>"
|
|
103
|
+
Flags: --json | --project=<path>
|
|
100
104
|
migrate One-shot migration off legacy composer / npm install paths
|
|
101
105
|
Flags: --dry-run (detect only)
|
|
102
106
|
first-run Guided first-run setup — cost profile, settings, tooling
|
|
@@ -749,6 +753,14 @@ cmd_versions() {
|
|
|
749
753
|
exec env PYTHONPATH="$PACKAGE_ROOT" python3 -m scripts._cli.cmd_versions "$@"
|
|
750
754
|
}
|
|
751
755
|
|
|
756
|
+
# `agent-config explain <config|rule|route>` — print the decision chain
|
|
757
|
+
# behind a configuration or routing outcome. Read-only diagnostic; never
|
|
758
|
+
# edits state. See scripts/_cli/cmd_explain.py.
|
|
759
|
+
cmd_explain() {
|
|
760
|
+
require_python3
|
|
761
|
+
exec env PYTHONPATH="$PACKAGE_ROOT" python3 -m scripts._cli.cmd_explain "$@"
|
|
762
|
+
}
|
|
763
|
+
|
|
752
764
|
main() {
|
|
753
765
|
local cmd="${1-}"
|
|
754
766
|
[[ $# -gt 0 ]] && shift || true
|
|
@@ -801,6 +813,7 @@ main() {
|
|
|
801
813
|
prune) cmd_prune "$@" ;;
|
|
802
814
|
doctor) cmd_doctor "$@" ;;
|
|
803
815
|
versions) cmd_versions "$@" ;;
|
|
816
|
+
explain) cmd_explain "$@" ;;
|
|
804
817
|
help|--help|-h|"")
|
|
805
818
|
# Optional `--tier=0|1|all` filter (default 0).
|
|
806
819
|
local tier_arg="0"
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Audit per-area ADR coverage against docs/contracts/ and the canonical
|
|
3
|
+
AREAS inventory. Contract: docs/contracts/adr-layout.md.
|
|
4
|
+
|
|
5
|
+
Modes:
|
|
6
|
+
--report (default) one-shot inventory: which areas exist, ADR count
|
|
7
|
+
per area, contracts missing a bootstrap ADR.
|
|
8
|
+
--check exit 1 on hard failures (number gaps, missing area README,
|
|
9
|
+
broken supersedes); exit 0 with warnings on missing
|
|
10
|
+
bootstrap ADRs and dangling references.
|
|
11
|
+
--regen-area-readme <area>
|
|
12
|
+
rewrite docs/adrs/<area>/README.md from the area's ADR
|
|
13
|
+
frontmatter. Idempotent.
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import argparse, re, sys
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
ROOT = Path(__file__).resolve().parent.parent
|
|
20
|
+
ADR_ROOT = ROOT / "docs" / "adrs"
|
|
21
|
+
CONTRACT_ROOT = ROOT / "docs" / "contracts"
|
|
22
|
+
|
|
23
|
+
# Canonical area inventory. To add an area: add it here, then run
|
|
24
|
+
# `python3 scripts/audit_adr_coverage.py --check` in the same PR.
|
|
25
|
+
AREAS: dict[str, dict[str, str]] = {
|
|
26
|
+
"cost": {"contract": "cost-enforcement.md",
|
|
27
|
+
"scope": "Budget ladder, hard-stop hook, cost reporting and dashboards."},
|
|
28
|
+
"caveman": {"contract": "compression-default-kill-criterion.md",
|
|
29
|
+
"scope": "Caveman-speak compression, decompression, reversibility guards."},
|
|
30
|
+
"schema": {"contract": "agents/docs/frontmatter-contract.md",
|
|
31
|
+
"scope": "Frontmatter schemas, v2 rigor, lint behaviour for skills / rules / commands."},
|
|
32
|
+
"router": {"contract": "rule-router.md",
|
|
33
|
+
"scope": "router.json shape, tier semantics, dispatch precedence."},
|
|
34
|
+
"smoke": {"contract": "smoke-contracts.md",
|
|
35
|
+
"scope": "Per-tier smoke contracts, baseline locks, regression gates."},
|
|
36
|
+
"memory": {"contract": "agent-memory-contract.md",
|
|
37
|
+
"scope": "Memory MCP, propose / promote / poison flow, runtime-trust scoring."},
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
NAMED = re.compile(r"^(\d{4})-([a-z0-9-]+)\.md$")
|
|
41
|
+
FM = re.compile(r"^---\n(.*?)\n---", re.DOTALL)
|
|
42
|
+
FIELD = re.compile(r"^([a-z_]+):\s*(.+?)\s*$", re.MULTILINE)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def parse_fm(text: str) -> dict[str, str]:
|
|
46
|
+
m = FM.search(text)
|
|
47
|
+
if not m:
|
|
48
|
+
return {}
|
|
49
|
+
return {k: v.strip(" \"'") for k, v in FIELD.findall(m.group(1))}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def scan_area(area: str) -> tuple[list[dict], list[str]]:
|
|
53
|
+
"""Return (adrs, errors). adrs sorted by number."""
|
|
54
|
+
area_dir = ADR_ROOT / area
|
|
55
|
+
errs: list[str] = []
|
|
56
|
+
if not area_dir.exists():
|
|
57
|
+
return [], errs
|
|
58
|
+
adrs: list[dict] = []
|
|
59
|
+
for p in sorted(area_dir.glob("*.md")):
|
|
60
|
+
if p.name == "README.md":
|
|
61
|
+
continue
|
|
62
|
+
m = NAMED.match(p.name)
|
|
63
|
+
if not m:
|
|
64
|
+
errs.append(f"{area}/{p.name}: filename does not match NNNN-<slug>.md")
|
|
65
|
+
continue
|
|
66
|
+
fm = parse_fm(p.read_text(encoding="utf-8"))
|
|
67
|
+
adrs.append({"num": m.group(1), "slug": m.group(2),
|
|
68
|
+
"path": p.name, **fm})
|
|
69
|
+
# Gap check.
|
|
70
|
+
nums = [int(a["num"]) for a in adrs]
|
|
71
|
+
for i, n in enumerate(nums, start=1):
|
|
72
|
+
if n != i:
|
|
73
|
+
errs.append(f"{area}/: number gap at position {i} (got {n:04d})")
|
|
74
|
+
break
|
|
75
|
+
return adrs, errs
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _contract_path(meta: dict[str, str]) -> Path:
|
|
79
|
+
"""Resolve a contract reference. Plain filename → docs/contracts/<file>;
|
|
80
|
+
a path with separators → repo-relative."""
|
|
81
|
+
c = meta["contract"]
|
|
82
|
+
return (ROOT / c) if "/" in c else (CONTRACT_ROOT / c)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def render_area_readme(area: str, meta: dict[str, str], adrs: list[dict]) -> str:
|
|
86
|
+
lines = [f"# ADRs — `{area}`", "",
|
|
87
|
+
f"> {meta['scope']}", ""]
|
|
88
|
+
contract_path = _contract_path(meta)
|
|
89
|
+
repo_rel = contract_path.relative_to(ROOT) if contract_path.exists() else Path(
|
|
90
|
+
meta["contract"] if "/" in meta["contract"] else f"docs/contracts/{meta['contract']}")
|
|
91
|
+
# Link target is relative to docs/adrs/<area>/README.md (2 levels up from area dir).
|
|
92
|
+
link_target = Path("..") / ".." / ".." / repo_rel
|
|
93
|
+
if contract_path.exists():
|
|
94
|
+
lines.append(f"Contract: [`{repo_rel}`]({link_target}).")
|
|
95
|
+
else:
|
|
96
|
+
lines.append(f"Contract: _not yet published_ (`{repo_rel}`).")
|
|
97
|
+
lines += ["",
|
|
98
|
+
"| # | Title | Status | Date | Supersedes |",
|
|
99
|
+
"|---|---|---|---|---|"]
|
|
100
|
+
for a in adrs:
|
|
101
|
+
title = a.get("decision", a["slug"]).replace("-", " ").title()
|
|
102
|
+
lines.append(f"| [{a['num']}]({a['path']}) | {title} | "
|
|
103
|
+
f"{a.get('status','—')} | {a.get('date','—')} | "
|
|
104
|
+
f"{a.get('supersedes','—')} |")
|
|
105
|
+
if not adrs:
|
|
106
|
+
lines.append("| _none yet_ | — | — | — | — |")
|
|
107
|
+
return "\n".join(lines) + "\n"
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def cmd_report(args) -> int:
|
|
111
|
+
print("## ADR coverage report")
|
|
112
|
+
print()
|
|
113
|
+
print("| Area | Contract | ADRs | README | Status |")
|
|
114
|
+
print("|---|---|---:|:---:|---|")
|
|
115
|
+
missing_bootstrap = 0
|
|
116
|
+
for area, meta in AREAS.items():
|
|
117
|
+
adrs, _ = scan_area(area)
|
|
118
|
+
readme = "✅" if (ADR_ROOT / area / "README.md").exists() else "—"
|
|
119
|
+
contract_present = _contract_path(meta).exists()
|
|
120
|
+
status = "ok" if adrs else "missing bootstrap"
|
|
121
|
+
if not adrs:
|
|
122
|
+
missing_bootstrap += 1
|
|
123
|
+
contract_cell = meta["contract"] if contract_present else f"_{meta['contract']}_ (no contract)"
|
|
124
|
+
print(f"| `{area}` | {contract_cell} | {len(adrs)} | {readme} | {status} |")
|
|
125
|
+
print()
|
|
126
|
+
print(f"BASELINE: {len(AREAS)} canonical areas · {missing_bootstrap} missing bootstrap ADR(s)")
|
|
127
|
+
return 0
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def cmd_check(args) -> int:
|
|
131
|
+
hard = 0
|
|
132
|
+
warn = 0
|
|
133
|
+
for area, meta in AREAS.items():
|
|
134
|
+
adrs, errs = scan_area(area)
|
|
135
|
+
for e in errs:
|
|
136
|
+
print(f"❌ {e}", file=sys.stderr); hard += 1
|
|
137
|
+
if adrs and not (ADR_ROOT / area / "README.md").exists():
|
|
138
|
+
print(f"❌ {area}/: README.md missing", file=sys.stderr); hard += 1
|
|
139
|
+
if not adrs:
|
|
140
|
+
print(f"⚠️ {area}/: no bootstrap ADR yet (contract: {meta['contract']})", file=sys.stderr)
|
|
141
|
+
warn += 1
|
|
142
|
+
print(f"BASELINE: {hard} hard fail(s) · {warn} warn(s)")
|
|
143
|
+
return 1 if hard else 0
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def cmd_regen_area_readme(args) -> int:
|
|
147
|
+
area = args.regen_area_readme
|
|
148
|
+
if area not in AREAS:
|
|
149
|
+
print(f"❌ unknown area '{area}' — add to AREAS inventory first", file=sys.stderr)
|
|
150
|
+
return 1
|
|
151
|
+
adrs, errs = scan_area(area)
|
|
152
|
+
for e in errs:
|
|
153
|
+
print(f"❌ {e}", file=sys.stderr)
|
|
154
|
+
out = ADR_ROOT / area / "README.md"
|
|
155
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
156
|
+
out.write_text(render_area_readme(area, AREAS[area], adrs), encoding="utf-8")
|
|
157
|
+
print(f"wrote {out.relative_to(ROOT)}")
|
|
158
|
+
return 0
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def main() -> int:
|
|
162
|
+
ap = argparse.ArgumentParser(description=__doc__)
|
|
163
|
+
grp = ap.add_mutually_exclusive_group()
|
|
164
|
+
grp.add_argument("--check", action="store_true")
|
|
165
|
+
grp.add_argument("--regen-area-readme", metavar="AREA")
|
|
166
|
+
args = ap.parse_args()
|
|
167
|
+
if args.check:
|
|
168
|
+
return cmd_check(args)
|
|
169
|
+
if args.regen_area_readme:
|
|
170
|
+
return cmd_regen_area_readme(args)
|
|
171
|
+
return cmd_report(args)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
if __name__ == "__main__":
|
|
175
|
+
sys.exit(main())
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""MCP-tool inventory generator. Reads the source-of-truth catalog at
|
|
3
|
+
`scripts/mcp_server/consumer_tool_catalog.json` and the handler
|
|
4
|
+
registry at `scripts/mcp_server/tools.py`, emits
|
|
5
|
+
`docs/contracts/mcp-tool-inventory.md` with every tool cited by
|
|
6
|
+
`<file>:<line>`. README's MCP-tool count line links here; the bare
|
|
7
|
+
claim is banned.
|
|
8
|
+
|
|
9
|
+
Contract: step-11 Phase 5 Step 3
|
|
10
|
+
(agents/roadmaps/step-11-ruflo-parity.md).
|
|
11
|
+
|
|
12
|
+
Modes:
|
|
13
|
+
--check exit non-zero if the generated inventory drifts from
|
|
14
|
+
the on-disk file (CI gate).
|
|
15
|
+
--write regenerate the inventory file in-place (default).
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
import argparse, json, re, sys
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
ROOT = Path(__file__).resolve().parent.parent
|
|
22
|
+
CATALOG = ROOT / "scripts/mcp_server/consumer_tool_catalog.json"
|
|
23
|
+
TOOLS_PY = ROOT / "scripts/mcp_server/tools.py"
|
|
24
|
+
OUT = ROOT / "docs/contracts/mcp-tool-inventory.md"
|
|
25
|
+
|
|
26
|
+
# Match `"<name>": BuiltinTool(` in the ALLOWLIST dict.
|
|
27
|
+
HANDLER_RE = re.compile(r'^\s*"([a-z_]+)"\s*:\s*BuiltinTool\(')
|
|
28
|
+
# Match `"name": "<name>",` in the catalog json (for catalog citations).
|
|
29
|
+
CATALOG_NAME_RE = re.compile(r'^\s*"name"\s*:\s*"([a-z_]+)"\s*,?\s*$')
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _index_handlers() -> dict[str, int]:
|
|
33
|
+
out: dict[str, int] = {}
|
|
34
|
+
for i, line in enumerate(TOOLS_PY.read_text(encoding="utf-8").splitlines(), 1):
|
|
35
|
+
m = HANDLER_RE.match(line)
|
|
36
|
+
if m:
|
|
37
|
+
out[m.group(1)] = i
|
|
38
|
+
return out
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _index_catalog_lines() -> dict[str, int]:
|
|
42
|
+
out: dict[str, int] = {}
|
|
43
|
+
for i, line in enumerate(CATALOG.read_text(encoding="utf-8").splitlines(), 1):
|
|
44
|
+
m = CATALOG_NAME_RE.match(line)
|
|
45
|
+
if m and m.group(1) not in out:
|
|
46
|
+
out[m.group(1)] = i
|
|
47
|
+
return out
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _render(catalog: dict, handlers: dict[str, int], cat_lines: dict[str, int]) -> str:
|
|
51
|
+
tools = catalog["tools"]
|
|
52
|
+
total = len(tools)
|
|
53
|
+
by_transport: dict[str, int] = {}
|
|
54
|
+
by_side_effect: dict[str, int] = {}
|
|
55
|
+
for t in tools:
|
|
56
|
+
for tr in t["implemented_on"]:
|
|
57
|
+
by_transport[tr] = by_transport.get(tr, 0) + 1
|
|
58
|
+
by_side_effect[t["side_effect"]] = by_side_effect.get(t["side_effect"], 0) + 1
|
|
59
|
+
stub_count = sum(1 for t in tools if not t["implemented_on"])
|
|
60
|
+
transport_summary = ", ".join(f"{k}={v}" for k, v in sorted(by_transport.items())) or "none"
|
|
61
|
+
side_effect_summary = ", ".join(f"{k}={v}" for k, v in sorted(by_side_effect.items()))
|
|
62
|
+
|
|
63
|
+
lines: list[str] = []
|
|
64
|
+
lines.append("---")
|
|
65
|
+
lines.append("stability: beta")
|
|
66
|
+
lines.append("keep-beta-until: 2026-08-14")
|
|
67
|
+
lines.append("---")
|
|
68
|
+
lines.append("")
|
|
69
|
+
lines.append("# MCP tool inventory")
|
|
70
|
+
lines.append("")
|
|
71
|
+
lines.append("> Generated by [`scripts/audit_mcp_tools.py`](../../scripts/audit_mcp_tools.py)")
|
|
72
|
+
lines.append("> from the source-of-truth catalog")
|
|
73
|
+
lines.append("> [`scripts/mcp_server/consumer_tool_catalog.json`](../../scripts/mcp_server/consumer_tool_catalog.json).")
|
|
74
|
+
lines.append("> Do **not** hand-edit; rerun `python3 scripts/audit_mcp_tools.py --write`.")
|
|
75
|
+
lines.append(">")
|
|
76
|
+
lines.append("> Step-11 Phase 5 Step 3 (`step-11-ruflo-parity.md`).")
|
|
77
|
+
lines.append("")
|
|
78
|
+
lines.append("## Summary")
|
|
79
|
+
lines.append("")
|
|
80
|
+
lines.append(f"- **Total tools:** {total}")
|
|
81
|
+
lines.append(f"- **By transport:** {transport_summary}")
|
|
82
|
+
lines.append(f"- **By side-effect:** {side_effect_summary}")
|
|
83
|
+
lines.append(f"- **Discovery-only stubs (no implementation):** {stub_count}")
|
|
84
|
+
lines.append("")
|
|
85
|
+
lines.append("## Tools")
|
|
86
|
+
lines.append("")
|
|
87
|
+
lines.append("| Tool | Side-effect | Transports | Catalog | Handler |")
|
|
88
|
+
lines.append("|---|---|---|---|---|")
|
|
89
|
+
for t in tools:
|
|
90
|
+
name = t["name"]
|
|
91
|
+
side = t["side_effect"]
|
|
92
|
+
transports = ", ".join(t["implemented_on"]) if t["implemented_on"] else "_(stub)_"
|
|
93
|
+
cat_line = cat_lines.get(name)
|
|
94
|
+
cat_cite = (
|
|
95
|
+
f"[`consumer_tool_catalog.json:{cat_line}`](../../scripts/mcp_server/consumer_tool_catalog.json#L{cat_line})"
|
|
96
|
+
if cat_line else "_missing_"
|
|
97
|
+
)
|
|
98
|
+
h_line = handlers.get(name)
|
|
99
|
+
h_cite = (
|
|
100
|
+
f"[`tools.py:{h_line}`](../../scripts/mcp_server/tools.py#L{h_line})"
|
|
101
|
+
if h_line else "_stub-only_"
|
|
102
|
+
)
|
|
103
|
+
lines.append(f"| `{name}` | `{side}` | {transports} | {cat_cite} | {h_cite} |")
|
|
104
|
+
lines.append("")
|
|
105
|
+
lines.append("## Glossary")
|
|
106
|
+
lines.append("")
|
|
107
|
+
lines.append("- **Side-effect** — `ro` (read-only) · `fs-write` (filesystem write) · `shell` (spawns processes).")
|
|
108
|
+
lines.append("- **Transports** — `stdio` (`scripts/mcp_server/`) · `worker` (`workers/mcp/`). A tool may live on both.")
|
|
109
|
+
lines.append("- **Stub** — catalog-listed for discovery; returns the `not_implemented` envelope from")
|
|
110
|
+
lines.append(" [`mcp-tool-stub-envelope.md`](mcp-tool-stub-envelope.md) until promoted.")
|
|
111
|
+
lines.append("")
|
|
112
|
+
return "\n".join(lines) + "\n"
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def main() -> int:
|
|
116
|
+
ap = argparse.ArgumentParser(description=__doc__)
|
|
117
|
+
g = ap.add_mutually_exclusive_group()
|
|
118
|
+
g.add_argument("--check", action="store_true", help="Drift gate: exit 1 if file is stale.")
|
|
119
|
+
g.add_argument("--write", action="store_true", help="Regenerate the inventory file.")
|
|
120
|
+
ap.add_argument("--quiet", action="store_true")
|
|
121
|
+
args = ap.parse_args()
|
|
122
|
+
|
|
123
|
+
catalog = json.loads(CATALOG.read_text(encoding="utf-8"))
|
|
124
|
+
handlers = _index_handlers()
|
|
125
|
+
cat_lines = _index_catalog_lines()
|
|
126
|
+
rendered = _render(catalog, handlers, cat_lines)
|
|
127
|
+
|
|
128
|
+
if args.check:
|
|
129
|
+
on_disk = OUT.read_text(encoding="utf-8") if OUT.exists() else ""
|
|
130
|
+
if on_disk != rendered:
|
|
131
|
+
print(f"❌ {OUT.relative_to(ROOT)} drifted from generator.", file=sys.stderr)
|
|
132
|
+
print(" Run: python3 scripts/audit_mcp_tools.py --write", file=sys.stderr)
|
|
133
|
+
return 1
|
|
134
|
+
if not args.quiet:
|
|
135
|
+
print(f"BASELINE: {OUT.relative_to(ROOT)} is in sync · {len(catalog['tools'])} tool(s)")
|
|
136
|
+
return 0
|
|
137
|
+
|
|
138
|
+
OUT.parent.mkdir(parents=True, exist_ok=True)
|
|
139
|
+
OUT.write_text(rendered, encoding="utf-8")
|
|
140
|
+
if not args.quiet:
|
|
141
|
+
print(f"✅ wrote {OUT.relative_to(ROOT)} · {len(catalog['tools'])} tool(s)")
|
|
142
|
+
return 0
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
if __name__ == "__main__":
|
|
146
|
+
sys.exit(main())
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Baseline-closure check — step-4 Phase 3 Step 4.
|
|
3
|
+
|
|
4
|
+
Returns exit 0 iff the 60-day clock has elapsed since
|
|
5
|
+
`bench/baseline-start.txt` AND `bench/reports/` contains at least
|
|
6
|
+
`--min-reports` complete runs for the named corpus (default 30).
|
|
7
|
+
|
|
8
|
+
Read by P2 enforcement roadmaps as their precondition (G1 gate in
|
|
9
|
+
step-99). This is the single arbiter of "are we allowed to flip
|
|
10
|
+
defaults yet" — no other timer is authoritative.
|
|
11
|
+
|
|
12
|
+
Exit codes:
|
|
13
|
+
0 — baseline ready (clock elapsed AND report count met)
|
|
14
|
+
1 — argument / file error
|
|
15
|
+
2 — baseline not ready (clock OR reports insufficient)
|
|
16
|
+
|
|
17
|
+
CLI:
|
|
18
|
+
python3 scripts/bench_baseline_ready.py
|
|
19
|
+
python3 scripts/bench_baseline_ready.py --corpus dev --min-days 60 --min-reports 30
|
|
20
|
+
python3 scripts/bench_baseline_ready.py --json
|
|
21
|
+
"""
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import argparse
|
|
25
|
+
import json
|
|
26
|
+
import sys
|
|
27
|
+
from datetime import date, datetime, timezone
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
|
|
30
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _read_baseline_start(path: Path) -> date | None:
|
|
34
|
+
if not path.exists():
|
|
35
|
+
return None
|
|
36
|
+
for line in path.read_text(encoding="utf-8").splitlines():
|
|
37
|
+
stripped = line.strip()
|
|
38
|
+
if not stripped or stripped.startswith("#"):
|
|
39
|
+
continue
|
|
40
|
+
try:
|
|
41
|
+
return datetime.strptime(stripped, "%Y-%m-%d").date()
|
|
42
|
+
except ValueError:
|
|
43
|
+
continue
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def main(argv: list[str] | None = None) -> int:
|
|
48
|
+
ap = argparse.ArgumentParser(
|
|
49
|
+
description=__doc__,
|
|
50
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
51
|
+
)
|
|
52
|
+
ap.add_argument("--corpus", default="dev")
|
|
53
|
+
ap.add_argument("--reports-dir", default="bench/reports")
|
|
54
|
+
ap.add_argument("--baseline-file", default="bench/baseline-start.txt")
|
|
55
|
+
ap.add_argument("--min-days", type=int, default=60)
|
|
56
|
+
ap.add_argument("--min-reports", type=int, default=30)
|
|
57
|
+
ap.add_argument("--json", action="store_true")
|
|
58
|
+
args = ap.parse_args(argv)
|
|
59
|
+
|
|
60
|
+
baseline_path = REPO_ROOT / args.baseline_file
|
|
61
|
+
start = _read_baseline_start(baseline_path)
|
|
62
|
+
if start is None:
|
|
63
|
+
msg = f"baseline-start file missing or unreadable: {baseline_path}"
|
|
64
|
+
if args.json:
|
|
65
|
+
print(json.dumps({"status": "error", "reason": msg}))
|
|
66
|
+
else:
|
|
67
|
+
print(f" ❌ {msg}", file=sys.stderr)
|
|
68
|
+
return 1
|
|
69
|
+
|
|
70
|
+
today = datetime.now(timezone.utc).date()
|
|
71
|
+
days_elapsed = (today - start).days
|
|
72
|
+
days_ok = days_elapsed >= args.min_days
|
|
73
|
+
|
|
74
|
+
reports_dir = REPO_ROOT / args.reports_dir
|
|
75
|
+
report_count = (
|
|
76
|
+
len(list(reports_dir.glob(f"*-{args.corpus}.json")))
|
|
77
|
+
if reports_dir.exists() else 0
|
|
78
|
+
)
|
|
79
|
+
reports_ok = report_count >= args.min_reports
|
|
80
|
+
|
|
81
|
+
ready = days_ok and reports_ok
|
|
82
|
+
payload = {
|
|
83
|
+
"status": "ready" if ready else "warmup",
|
|
84
|
+
"corpus": args.corpus,
|
|
85
|
+
"baseline_start": start.isoformat(),
|
|
86
|
+
"today": today.isoformat(),
|
|
87
|
+
"days_elapsed": days_elapsed,
|
|
88
|
+
"min_days": args.min_days,
|
|
89
|
+
"days_ok": days_ok,
|
|
90
|
+
"report_count": report_count,
|
|
91
|
+
"min_reports": args.min_reports,
|
|
92
|
+
"reports_ok": reports_ok,
|
|
93
|
+
}
|
|
94
|
+
if args.json:
|
|
95
|
+
print(json.dumps(payload, indent=2))
|
|
96
|
+
else:
|
|
97
|
+
emoji = "✅" if ready else "⏳"
|
|
98
|
+
verdict = "READY" if ready else "WARMUP"
|
|
99
|
+
print(
|
|
100
|
+
f" {emoji} bench-baseline · corpus={args.corpus} · "
|
|
101
|
+
f"{verdict} · days={days_elapsed}/{args.min_days} · "
|
|
102
|
+
f"reports={report_count}/{args.min_reports}"
|
|
103
|
+
)
|
|
104
|
+
return 0 if ready else 2
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
if __name__ == "__main__":
|
|
108
|
+
sys.exit(main())
|