@event4u/agent-config 2.19.0 → 2.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/.agent-src/commands/agent-status.md +29 -0
  2. package/.agent-src/commands/onboard.md +221 -81
  3. package/.agent-src/packs/README.md +49 -0
  4. package/.agent-src/packs/agency-delivery.yml +63 -0
  5. package/.agent-src/packs/content-engine.yml +53 -0
  6. package/.agent-src/packs/founder-mvp.yml +51 -0
  7. package/.agent-src/presets/README.md +26 -0
  8. package/.agent-src/presets/balanced.yml +34 -0
  9. package/.agent-src/presets/fast.yml +31 -0
  10. package/.agent-src/presets/strict.yml +38 -0
  11. package/.agent-src/profiles/README.md +29 -0
  12. package/.agent-src/profiles/agency.yml +27 -0
  13. package/.agent-src/profiles/content_creator.yml +25 -0
  14. package/.agent-src/profiles/developer.yml +26 -0
  15. package/.agent-src/profiles/finance.yml +24 -0
  16. package/.agent-src/profiles/founder.yml +25 -0
  17. package/.agent-src/profiles/ops.yml +25 -0
  18. package/.agent-src/rules/no-cheap-questions.md +25 -17
  19. package/.agent-src/skills/adr-create/SKILL.md +78 -68
  20. package/.agent-src/skills/subagent-orchestration/SKILL.md +33 -0
  21. package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
  22. package/.agent-src/templates/skill-archive-note.md +101 -0
  23. package/.claude-plugin/marketplace.json +1 -1
  24. package/CHANGELOG.md +73 -70
  25. package/README.md +68 -72
  26. package/config/agent-settings.template.yml +22 -0
  27. package/docs/adrs/caveman/0001-default-off-until-bench.md +93 -0
  28. package/docs/adrs/caveman/README.md +9 -0
  29. package/docs/adrs/cost/0001-hard-stop-hook.md +114 -0
  30. package/docs/adrs/cost/README.md +9 -0
  31. package/docs/adrs/memory/0001-consumer-side-snapshot.md +111 -0
  32. package/docs/adrs/memory/README.md +9 -0
  33. package/docs/adrs/router/0001-three-tier-routing.md +119 -0
  34. package/docs/adrs/router/README.md +9 -0
  35. package/docs/adrs/schema/0001-json-schema-frontmatter.md +102 -0
  36. package/docs/adrs/schema/README.md +9 -0
  37. package/docs/adrs/smoke/0001-per-tier-smoke-scripts.md +99 -0
  38. package/docs/adrs/smoke/README.md +9 -0
  39. package/docs/architecture/current-onboard-baseline.md +126 -0
  40. package/docs/architecture/current-safety-behavior.md +137 -0
  41. package/docs/archive/CHANGELOG-pre-2.16.0.md +48 -0
  42. package/docs/archive/CHANGELOG-pre-2.17.0.md +63 -0
  43. package/docs/contracts/adr-layout.md +108 -0
  44. package/docs/contracts/benchmark-corpus-spec.md +97 -0
  45. package/docs/contracts/benchmark-report-schema.md +111 -0
  46. package/docs/contracts/command-clusters.md +1 -0
  47. package/docs/contracts/command-taxonomy.md +137 -0
  48. package/docs/contracts/compression-default-kill-criterion.md +69 -0
  49. package/docs/contracts/config-presets.md +144 -0
  50. package/docs/contracts/cost-dashboard.md +143 -0
  51. package/docs/contracts/cost-enforcement.md +134 -0
  52. package/docs/contracts/file-ownership-matrix.json +0 -7
  53. package/docs/contracts/mcp-tool-inventory.md +53 -0
  54. package/docs/contracts/measurement-baseline.md +102 -0
  55. package/docs/contracts/namespace.md +125 -0
  56. package/docs/contracts/profile-system.md +142 -0
  57. package/docs/contracts/safety-model.md +129 -0
  58. package/docs/contracts/smoke-contracts.md +144 -0
  59. package/docs/contracts/workflow-packs.md +121 -0
  60. package/docs/decisions/ADR-010-profile-pack-preset-boundary.md +132 -0
  61. package/docs/decisions/INDEX.md +1 -0
  62. package/docs/featured-commands.md +27 -0
  63. package/docs/parity/bench-ruflo.json +58 -0
  64. package/docs/parity/bench.json +41 -0
  65. package/docs/parity/ruflo.md +46 -0
  66. package/docs/profiles.md +91 -0
  67. package/package.json +1 -1
  68. package/scripts/_cli/cmd_explain.py +250 -0
  69. package/scripts/_lib/bench_cost.py +138 -0
  70. package/scripts/_lib/bench_quality.py +118 -0
  71. package/scripts/_lib/bench_report.py +150 -0
  72. package/scripts/agent-config +13 -0
  73. package/scripts/audit_adr_coverage.py +175 -0
  74. package/scripts/audit_mcp_tools.py +146 -0
  75. package/scripts/bench_baseline_ready.py +108 -0
  76. package/scripts/bench_drift_check.py +151 -0
  77. package/scripts/bench_per_tool.py +216 -0
  78. package/scripts/bench_run.py +155 -0
  79. package/scripts/config/__init__.py +9 -0
  80. package/scripts/config/presets.py +206 -0
  81. package/scripts/config/profiles.py +173 -0
  82. package/scripts/cost/budget.mjs +73 -12
  83. package/scripts/cost/preflight.mjs +89 -0
  84. package/scripts/lint_archived_skills.py +143 -0
  85. package/scripts/lint_bench_corpus.py +161 -0
  86. package/scripts/lint_namespace.py +135 -0
  87. package/scripts/lint_roadmap_complexity.py +3 -2
  88. package/scripts/skill_overlap.py +204 -0
  89. package/scripts/skill_usage_collect.py +191 -0
  90. package/scripts/skill_usage_report.py +162 -0
  91. package/scripts/smoke/kernel.sh +101 -0
  92. package/scripts/smoke/router.sh +129 -0
  93. package/scripts/smoke/schema.sh +71 -0
  94. package/scripts/smoke/skills.sh +101 -0
@@ -0,0 +1,118 @@
1
+ # Quality probe for `scripts/bench_run.py` — step-4 Phase 2 Step 3.
2
+ #
3
+ # Each prompt declares `rubric.must_include` / `must_not_include` or a
4
+ # `quality_assertion` regex (per docs/contracts/benchmark-corpus-spec.md).
5
+ # When an agent-output file is passed via --agent-output, we score the
6
+ # assertions against actual output. Without it, we emit `not_collected`
7
+ # per docs/contracts/benchmark-report-schema.md § quality invariants.
8
+ """Quality probe helper for the bench runner."""
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import re
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+
17
+ def _eval_rubric(rubric: dict[str, Any], output: str) -> tuple[bool, str]:
18
+ """Apply rubric.must_include / must_not_include / length_words to output."""
19
+ for phrase in rubric.get("must_include") or []:
20
+ if phrase not in output:
21
+ return False, f"missing: {phrase!r}"
22
+ for phrase in rubric.get("must_not_include") or []:
23
+ if phrase in output:
24
+ return False, f"forbidden: {phrase!r}"
25
+ bounds = rubric.get("length_words") or {}
26
+ if bounds:
27
+ words = len(output.split())
28
+ lo, hi = bounds.get("min", 0), bounds.get("max", 0)
29
+ if lo and words < lo:
30
+ return False, f"length<{lo}: {words}"
31
+ if hi and words > hi:
32
+ return False, f"length>{hi}: {words}"
33
+ return True, "ok"
34
+
35
+
36
+ def _eval_regex(pattern: str, output: str) -> tuple[bool, str]:
37
+ try:
38
+ compiled = re.compile(pattern, re.MULTILINE)
39
+ except re.error as exc:
40
+ return False, f"bad_regex: {exc}"
41
+ return (bool(compiled.search(output)), "ok" if compiled.search(output) else "no_match")
42
+
43
+
44
+ def _format_rubric(rubric: dict[str, Any]) -> str:
45
+ parts = []
46
+ if rubric.get("must_include"):
47
+ parts.append(f"must_include={rubric['must_include']}")
48
+ if rubric.get("must_not_include"):
49
+ parts.append(f"must_not_include={rubric['must_not_include']}")
50
+ if rubric.get("length_words"):
51
+ parts.append(f"length_words={rubric['length_words']}")
52
+ return " ".join(parts) or "<empty>"
53
+
54
+
55
+ def score_corpus(
56
+ prompts: list[dict[str, Any]],
57
+ agent_output_path: Path | None,
58
+ ) -> dict[str, Any]:
59
+ """Return the `quality` block per benchmark-report-schema § quality."""
60
+ declared = [
61
+ p for p in prompts
62
+ if (p.get("rubric") or {}).get("must_include")
63
+ or (p.get("rubric") or {}).get("must_not_include")
64
+ or (p.get("rubric") or {}).get("length_words")
65
+ or p.get("quality_assertion")
66
+ ]
67
+ total_declared = len(declared)
68
+
69
+ if agent_output_path is None or not agent_output_path.is_file():
70
+ return {
71
+ "source": "not_collected",
72
+ "prompts_with_assertion": total_declared,
73
+ "prompts_passing": 0,
74
+ "quality_score": 0.0,
75
+ "per_prompt": [
76
+ {
77
+ "id": p["id"],
78
+ "assertion": p.get("quality_assertion") or _format_rubric(p.get("rubric") or {}),
79
+ "assertion_kind": "quality_assertion" if p.get("quality_assertion") else "rubric",
80
+ "passed": "not_collected",
81
+ }
82
+ for p in declared
83
+ ],
84
+ }
85
+
86
+ outputs = json.loads(agent_output_path.read_text(encoding="utf-8"))
87
+ per_prompt: list[dict[str, Any]] = []
88
+ passing = 0
89
+ for p in declared:
90
+ pid = p["id"]
91
+ output_text = str(outputs.get(pid, ""))
92
+ rubric = p.get("rubric") or {}
93
+ regex = p.get("quality_assertion")
94
+ if regex:
95
+ ok, _why = _eval_regex(regex, output_text)
96
+ kind = "quality_assertion"
97
+ assertion = regex
98
+ else:
99
+ ok, _why = _eval_rubric(rubric, output_text)
100
+ kind = "rubric"
101
+ assertion = _format_rubric(rubric)
102
+ per_prompt.append({
103
+ "id": pid,
104
+ "assertion": assertion,
105
+ "assertion_kind": kind,
106
+ "passed": ok,
107
+ })
108
+ if ok:
109
+ passing += 1
110
+
111
+ score = round(passing / total_declared, 4) if total_declared else 0.0
112
+ return {
113
+ "source": str(agent_output_path),
114
+ "prompts_with_assertion": total_declared,
115
+ "prompts_passing": passing,
116
+ "quality_score": score,
117
+ "per_prompt": per_prompt,
118
+ }
@@ -0,0 +1,150 @@
1
+ # Report emitter for `scripts/bench_run.py` — step-4 Phase 2 Step 4.
2
+ #
3
+ # Serializes the unified report dict to JSON + Markdown per
4
+ # docs/contracts/benchmark-report-schema.md. Filename format:
5
+ # `bench/reports/<UTC ISO-8601 with : -> ->-<corpus_id>.{json,md}`.
6
+ """Report emitter for the bench runner."""
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+
15
+ def utc_now_filename_stamp() -> str:
16
+ """Sortable lexicographic stamp — drop ':' so filenames stay portable."""
17
+ return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%SZ")
18
+
19
+
20
+ def utc_now_iso() -> str:
21
+ return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
22
+
23
+
24
+ def report_paths(reports_dir: Path, corpus_id: str, stamp: str) -> tuple[Path, Path]:
25
+ base = f"{stamp}-{corpus_id}"
26
+ return reports_dir / f"{base}.json", reports_dir / f"{base}.md"
27
+
28
+
29
+ def write_json(path: Path, report: dict[str, Any]) -> None:
30
+ path.parent.mkdir(parents=True, exist_ok=True)
31
+ path.write_text(json.dumps(report, indent=2) + "\n", encoding="utf-8")
32
+
33
+
34
+ def _selection_section(selection: dict[str, Any]) -> str:
35
+ lines = [
36
+ "## Selection accuracy",
37
+ "",
38
+ f"- top-K = **{selection['top_k']}** · "
39
+ f"hit **{selection['prompts_hit']} / {selection['prompts_total']}** · "
40
+ f"accuracy **{selection['selection_accuracy']:.2%}** · "
41
+ f"target **{selection['target']:.2%}** · "
42
+ f"verdict **{'PASS' if selection['passed'] else 'FAIL'}**",
43
+ "",
44
+ "| id | hit | expected | top-K ranked |",
45
+ "|---|---|---|---|",
46
+ ]
47
+ for r in selection.get("per_prompt", []):
48
+ mark = "✅" if r["hit"] else "❌"
49
+ expected = ", ".join(r.get("expected_skills") or []) or "—"
50
+ ranked = ", ".join(r.get("top_k_ranked") or []) or "—"
51
+ lines.append(f"| `{r['id']}` | {mark} | {expected} | {ranked} |")
52
+ return "\n".join(lines)
53
+
54
+
55
+ def _cost_section(cost: dict[str, Any]) -> str:
56
+ if cost.get("source") == "unavailable":
57
+ return (
58
+ "## Cost capture\n\n"
59
+ f"- **source:** `unavailable` ({cost.get('reason', 'unknown')})\n"
60
+ f"- **scanned:** `{cost.get('scanned_path', '—')}`\n"
61
+ f"- **pricing sourced on:** {cost.get('pricing_sourced_on') or '—'}\n\n"
62
+ "_No session jsonl available. Run `node scripts/cost/track.mjs` "
63
+ "from a real Claude Code session to populate agents/cost-tracking/sessions.jsonl._\n"
64
+ )
65
+ totals = cost["totals"]
66
+ lines = [
67
+ "## Cost capture",
68
+ "",
69
+ f"- **source:** `{cost['source']}` · sessions scanned: **{cost['sessions_scanned']}**",
70
+ f"- **pricing sourced on:** {cost.get('pricing_sourced_on') or '—'}",
71
+ f"- **total cost:** **${totals['total_cost_usd']:.6f}**",
72
+ "",
73
+ "| tier | messages | cost (USD) |",
74
+ "|---|---:|---:|",
75
+ ]
76
+ for tier, slot in cost["per_tier"].items():
77
+ if slot["messages"] == 0 and slot["cost_usd"] == 0.0:
78
+ continue
79
+ lines.append(f"| {tier} | {slot['messages']} | ${slot['cost_usd']:.6f} |")
80
+ lines += [
81
+ "",
82
+ "| metric | value |",
83
+ "|---|---:|",
84
+ f"| input_tokens | {totals['input_tokens']} |",
85
+ f"| output_tokens | {totals['output_tokens']} |",
86
+ f"| cache_read_input_tokens | {totals['cache_read_input_tokens']} |",
87
+ f"| cache_creation_input_tokens | {totals['cache_creation_input_tokens']} |",
88
+ ]
89
+ return "\n".join(lines)
90
+
91
+
92
+ def _quality_section(quality: dict[str, Any]) -> str:
93
+ if quality["source"] == "not_collected":
94
+ return (
95
+ "## Quality probe\n\n"
96
+ f"- **source:** `not_collected` · assertions declared: "
97
+ f"**{quality['prompts_with_assertion']}**\n"
98
+ "- _Pass `--agent-output <path-to-outputs.json>` (map of `id -> str`) "
99
+ "to score the rubrics. Schema invariant: missing output keeps "
100
+ "`verdict.overall` at `partial`._\n"
101
+ )
102
+ lines = [
103
+ "## Quality probe",
104
+ "",
105
+ f"- **source:** `{quality['source']}` · "
106
+ f"passing **{quality['prompts_passing']} / {quality['prompts_with_assertion']}** · "
107
+ f"score **{quality['quality_score']:.2%}**",
108
+ "",
109
+ "| id | kind | passed | assertion |",
110
+ "|---|---|---|---|",
111
+ ]
112
+ for r in quality.get("per_prompt", []):
113
+ mark = "✅" if r["passed"] is True else ("❌" if r["passed"] is False else "—")
114
+ lines.append(f"| `{r['id']}` | {r['assertion_kind']} | {mark} | `{r['assertion']}` |")
115
+ return "\n".join(lines)
116
+
117
+
118
+ def render_markdown(report: dict[str, Any]) -> str:
119
+ corpus = report["corpus"]
120
+ sel = report["selection"]
121
+ cost = report["cost"]
122
+ qual = report["quality"]
123
+ verdict = report["verdict"]
124
+ headline = (
125
+ f"# Benchmark Report — `{corpus['id']}` · {report['generated_at']}\n\n"
126
+ "## Headline\n\n"
127
+ f"- **selection** {sel['selection_accuracy']:.2%} (target {sel['target']:.2%}) → **{verdict['selection']}**\n"
128
+ f"- **cost** ${cost['totals']['total_cost_usd']:.6f} "
129
+ f"({'sessions=' + str(cost['sessions_scanned']) if cost['source'] != 'unavailable' else cost['source']})\n"
130
+ f"- **quality** {qual['quality_score']:.2%} → **{verdict['quality']}**\n"
131
+ f"- **overall** → **{verdict['overall']}**\n"
132
+ )
133
+ notes = (
134
+ "## Notes\n\n"
135
+ f"- corpus path: `{corpus['path']}` · prompts: **{corpus['prompt_count']}**\n"
136
+ f"- pricing: `bench/pricing.yaml`\n"
137
+ f"- baseline collector: `{report['runner']['baseline_collector']}`\n"
138
+ )
139
+ return "\n\n".join([
140
+ headline,
141
+ _selection_section(sel),
142
+ _cost_section(cost),
143
+ _quality_section(qual),
144
+ notes,
145
+ ]) + "\n"
146
+
147
+
148
+ def write_markdown(path: Path, report: dict[str, Any]) -> None:
149
+ path.parent.mkdir(parents=True, exist_ok=True)
150
+ path.write_text(render_markdown(report), encoding="utf-8")
@@ -97,6 +97,10 @@ Tier 1 — power-user (release shape, audit, migration):
97
97
  Lists missing, modified, and foreign files.
98
98
  Exits 1 on drift, 2 on missing lockfile.
99
99
  Flags: --json | --project=<path>
100
+ explain Read-only decision-chain trace.
101
+ Usage: explain config | explain rule <name>
102
+ | explain route "<text>"
103
+ Flags: --json | --project=<path>
100
104
  migrate One-shot migration off legacy composer / npm install paths
101
105
  Flags: --dry-run (detect only)
102
106
  first-run Guided first-run setup — cost profile, settings, tooling
@@ -749,6 +753,14 @@ cmd_versions() {
749
753
  exec env PYTHONPATH="$PACKAGE_ROOT" python3 -m scripts._cli.cmd_versions "$@"
750
754
  }
751
755
 
756
+ # `agent-config explain <config|rule|route>` — print the decision chain
757
+ # behind a configuration or routing outcome. Read-only diagnostic; never
758
+ # edits state. See scripts/_cli/cmd_explain.py.
759
+ cmd_explain() {
760
+ require_python3
761
+ exec env PYTHONPATH="$PACKAGE_ROOT" python3 -m scripts._cli.cmd_explain "$@"
762
+ }
763
+
752
764
  main() {
753
765
  local cmd="${1-}"
754
766
  [[ $# -gt 0 ]] && shift || true
@@ -801,6 +813,7 @@ main() {
801
813
  prune) cmd_prune "$@" ;;
802
814
  doctor) cmd_doctor "$@" ;;
803
815
  versions) cmd_versions "$@" ;;
816
+ explain) cmd_explain "$@" ;;
804
817
  help|--help|-h|"")
805
818
  # Optional `--tier=0|1|all` filter (default 0).
806
819
  local tier_arg="0"
@@ -0,0 +1,175 @@
1
+ #!/usr/bin/env python3
2
+ """Audit per-area ADR coverage against docs/contracts/ and the canonical
3
+ AREAS inventory. Contract: docs/contracts/adr-layout.md.
4
+
5
+ Modes:
6
+ --report (default) one-shot inventory: which areas exist, ADR count
7
+ per area, contracts missing a bootstrap ADR.
8
+ --check exit 1 on hard failures (number gaps, missing area README,
9
+ broken supersedes); exit 0 with warnings on missing
10
+ bootstrap ADRs and dangling references.
11
+ --regen-area-readme <area>
12
+ rewrite docs/adrs/<area>/README.md from the area's ADR
13
+ frontmatter. Idempotent.
14
+ """
15
+ from __future__ import annotations
16
+ import argparse, re, sys
17
+ from pathlib import Path
18
+
19
+ ROOT = Path(__file__).resolve().parent.parent
20
+ ADR_ROOT = ROOT / "docs" / "adrs"
21
+ CONTRACT_ROOT = ROOT / "docs" / "contracts"
22
+
23
+ # Canonical area inventory. To add an area: add it here, then run
24
+ # `python3 scripts/audit_adr_coverage.py --check` in the same PR.
25
+ AREAS: dict[str, dict[str, str]] = {
26
+ "cost": {"contract": "cost-enforcement.md",
27
+ "scope": "Budget ladder, hard-stop hook, cost reporting and dashboards."},
28
+ "caveman": {"contract": "compression-default-kill-criterion.md",
29
+ "scope": "Caveman-speak compression, decompression, reversibility guards."},
30
+ "schema": {"contract": "agents/docs/frontmatter-contract.md",
31
+ "scope": "Frontmatter schemas, v2 rigor, lint behaviour for skills / rules / commands."},
32
+ "router": {"contract": "rule-router.md",
33
+ "scope": "router.json shape, tier semantics, dispatch precedence."},
34
+ "smoke": {"contract": "smoke-contracts.md",
35
+ "scope": "Per-tier smoke contracts, baseline locks, regression gates."},
36
+ "memory": {"contract": "agent-memory-contract.md",
37
+ "scope": "Memory MCP, propose / promote / poison flow, runtime-trust scoring."},
38
+ }
39
+
40
+ NAMED = re.compile(r"^(\d{4})-([a-z0-9-]+)\.md$")
41
+ FM = re.compile(r"^---\n(.*?)\n---", re.DOTALL)
42
+ FIELD = re.compile(r"^([a-z_]+):\s*(.+?)\s*$", re.MULTILINE)
43
+
44
+
45
+ def parse_fm(text: str) -> dict[str, str]:
46
+ m = FM.search(text)
47
+ if not m:
48
+ return {}
49
+ return {k: v.strip(" \"'") for k, v in FIELD.findall(m.group(1))}
50
+
51
+
52
+ def scan_area(area: str) -> tuple[list[dict], list[str]]:
53
+ """Return (adrs, errors). adrs sorted by number."""
54
+ area_dir = ADR_ROOT / area
55
+ errs: list[str] = []
56
+ if not area_dir.exists():
57
+ return [], errs
58
+ adrs: list[dict] = []
59
+ for p in sorted(area_dir.glob("*.md")):
60
+ if p.name == "README.md":
61
+ continue
62
+ m = NAMED.match(p.name)
63
+ if not m:
64
+ errs.append(f"{area}/{p.name}: filename does not match NNNN-<slug>.md")
65
+ continue
66
+ fm = parse_fm(p.read_text(encoding="utf-8"))
67
+ adrs.append({"num": m.group(1), "slug": m.group(2),
68
+ "path": p.name, **fm})
69
+ # Gap check.
70
+ nums = [int(a["num"]) for a in adrs]
71
+ for i, n in enumerate(nums, start=1):
72
+ if n != i:
73
+ errs.append(f"{area}/: number gap at position {i} (got {n:04d})")
74
+ break
75
+ return adrs, errs
76
+
77
+
78
+ def _contract_path(meta: dict[str, str]) -> Path:
79
+ """Resolve a contract reference. Plain filename → docs/contracts/<file>;
80
+ a path with separators → repo-relative."""
81
+ c = meta["contract"]
82
+ return (ROOT / c) if "/" in c else (CONTRACT_ROOT / c)
83
+
84
+
85
+ def render_area_readme(area: str, meta: dict[str, str], adrs: list[dict]) -> str:
86
+ lines = [f"# ADRs — `{area}`", "",
87
+ f"> {meta['scope']}", ""]
88
+ contract_path = _contract_path(meta)
89
+ repo_rel = contract_path.relative_to(ROOT) if contract_path.exists() else Path(
90
+ meta["contract"] if "/" in meta["contract"] else f"docs/contracts/{meta['contract']}")
91
+ # Link target is relative to docs/adrs/<area>/README.md (2 levels up from area dir).
92
+ link_target = Path("..") / ".." / ".." / repo_rel
93
+ if contract_path.exists():
94
+ lines.append(f"Contract: [`{repo_rel}`]({link_target}).")
95
+ else:
96
+ lines.append(f"Contract: _not yet published_ (`{repo_rel}`).")
97
+ lines += ["",
98
+ "| # | Title | Status | Date | Supersedes |",
99
+ "|---|---|---|---|---|"]
100
+ for a in adrs:
101
+ title = a.get("decision", a["slug"]).replace("-", " ").title()
102
+ lines.append(f"| [{a['num']}]({a['path']}) | {title} | "
103
+ f"{a.get('status','—')} | {a.get('date','—')} | "
104
+ f"{a.get('supersedes','—')} |")
105
+ if not adrs:
106
+ lines.append("| _none yet_ | — | — | — | — |")
107
+ return "\n".join(lines) + "\n"
108
+
109
+
110
+ def cmd_report(args) -> int:
111
+ print("## ADR coverage report")
112
+ print()
113
+ print("| Area | Contract | ADRs | README | Status |")
114
+ print("|---|---|---:|:---:|---|")
115
+ missing_bootstrap = 0
116
+ for area, meta in AREAS.items():
117
+ adrs, _ = scan_area(area)
118
+ readme = "✅" if (ADR_ROOT / area / "README.md").exists() else "—"
119
+ contract_present = _contract_path(meta).exists()
120
+ status = "ok" if adrs else "missing bootstrap"
121
+ if not adrs:
122
+ missing_bootstrap += 1
123
+ contract_cell = meta["contract"] if contract_present else f"_{meta['contract']}_ (no contract)"
124
+ print(f"| `{area}` | {contract_cell} | {len(adrs)} | {readme} | {status} |")
125
+ print()
126
+ print(f"BASELINE: {len(AREAS)} canonical areas · {missing_bootstrap} missing bootstrap ADR(s)")
127
+ return 0
128
+
129
+
130
+ def cmd_check(args) -> int:
131
+ hard = 0
132
+ warn = 0
133
+ for area, meta in AREAS.items():
134
+ adrs, errs = scan_area(area)
135
+ for e in errs:
136
+ print(f"❌ {e}", file=sys.stderr); hard += 1
137
+ if adrs and not (ADR_ROOT / area / "README.md").exists():
138
+ print(f"❌ {area}/: README.md missing", file=sys.stderr); hard += 1
139
+ if not adrs:
140
+ print(f"⚠️ {area}/: no bootstrap ADR yet (contract: {meta['contract']})", file=sys.stderr)
141
+ warn += 1
142
+ print(f"BASELINE: {hard} hard fail(s) · {warn} warn(s)")
143
+ return 1 if hard else 0
144
+
145
+
146
+ def cmd_regen_area_readme(args) -> int:
147
+ area = args.regen_area_readme
148
+ if area not in AREAS:
149
+ print(f"❌ unknown area '{area}' — add to AREAS inventory first", file=sys.stderr)
150
+ return 1
151
+ adrs, errs = scan_area(area)
152
+ for e in errs:
153
+ print(f"❌ {e}", file=sys.stderr)
154
+ out = ADR_ROOT / area / "README.md"
155
+ out.parent.mkdir(parents=True, exist_ok=True)
156
+ out.write_text(render_area_readme(area, AREAS[area], adrs), encoding="utf-8")
157
+ print(f"wrote {out.relative_to(ROOT)}")
158
+ return 0
159
+
160
+
161
+ def main() -> int:
162
+ ap = argparse.ArgumentParser(description=__doc__)
163
+ grp = ap.add_mutually_exclusive_group()
164
+ grp.add_argument("--check", action="store_true")
165
+ grp.add_argument("--regen-area-readme", metavar="AREA")
166
+ args = ap.parse_args()
167
+ if args.check:
168
+ return cmd_check(args)
169
+ if args.regen_area_readme:
170
+ return cmd_regen_area_readme(args)
171
+ return cmd_report(args)
172
+
173
+
174
+ if __name__ == "__main__":
175
+ sys.exit(main())
@@ -0,0 +1,146 @@
1
+ #!/usr/bin/env python3
2
+ """MCP-tool inventory generator. Reads the source-of-truth catalog at
3
+ `scripts/mcp_server/consumer_tool_catalog.json` and the handler
4
+ registry at `scripts/mcp_server/tools.py`, emits
5
+ `docs/contracts/mcp-tool-inventory.md` with every tool cited by
6
+ `<file>:<line>`. README's MCP-tool count line links here; the bare
7
+ claim is banned.
8
+
9
+ Contract: step-11 Phase 5 Step 3
10
+ (agents/roadmaps/step-11-ruflo-parity.md).
11
+
12
+ Modes:
13
+ --check exit non-zero if the generated inventory drifts from
14
+ the on-disk file (CI gate).
15
+ --write regenerate the inventory file in-place (default).
16
+ """
17
+ from __future__ import annotations
18
+ import argparse, json, re, sys
19
+ from pathlib import Path
20
+
21
+ ROOT = Path(__file__).resolve().parent.parent
22
+ CATALOG = ROOT / "scripts/mcp_server/consumer_tool_catalog.json"
23
+ TOOLS_PY = ROOT / "scripts/mcp_server/tools.py"
24
+ OUT = ROOT / "docs/contracts/mcp-tool-inventory.md"
25
+
26
+ # Match `"<name>": BuiltinTool(` in the ALLOWLIST dict.
27
+ HANDLER_RE = re.compile(r'^\s*"([a-z_]+)"\s*:\s*BuiltinTool\(')
28
+ # Match `"name": "<name>",` in the catalog json (for catalog citations).
29
+ CATALOG_NAME_RE = re.compile(r'^\s*"name"\s*:\s*"([a-z_]+)"\s*,?\s*$')
30
+
31
+
32
+ def _index_handlers() -> dict[str, int]:
33
+ out: dict[str, int] = {}
34
+ for i, line in enumerate(TOOLS_PY.read_text(encoding="utf-8").splitlines(), 1):
35
+ m = HANDLER_RE.match(line)
36
+ if m:
37
+ out[m.group(1)] = i
38
+ return out
39
+
40
+
41
+ def _index_catalog_lines() -> dict[str, int]:
42
+ out: dict[str, int] = {}
43
+ for i, line in enumerate(CATALOG.read_text(encoding="utf-8").splitlines(), 1):
44
+ m = CATALOG_NAME_RE.match(line)
45
+ if m and m.group(1) not in out:
46
+ out[m.group(1)] = i
47
+ return out
48
+
49
+
50
+ def _render(catalog: dict, handlers: dict[str, int], cat_lines: dict[str, int]) -> str:
51
+ tools = catalog["tools"]
52
+ total = len(tools)
53
+ by_transport: dict[str, int] = {}
54
+ by_side_effect: dict[str, int] = {}
55
+ for t in tools:
56
+ for tr in t["implemented_on"]:
57
+ by_transport[tr] = by_transport.get(tr, 0) + 1
58
+ by_side_effect[t["side_effect"]] = by_side_effect.get(t["side_effect"], 0) + 1
59
+ stub_count = sum(1 for t in tools if not t["implemented_on"])
60
+ transport_summary = ", ".join(f"{k}={v}" for k, v in sorted(by_transport.items())) or "none"
61
+ side_effect_summary = ", ".join(f"{k}={v}" for k, v in sorted(by_side_effect.items()))
62
+
63
+ lines: list[str] = []
64
+ lines.append("---")
65
+ lines.append("stability: beta")
66
+ lines.append("keep-beta-until: 2026-08-14")
67
+ lines.append("---")
68
+ lines.append("")
69
+ lines.append("# MCP tool inventory")
70
+ lines.append("")
71
+ lines.append("> Generated by [`scripts/audit_mcp_tools.py`](../../scripts/audit_mcp_tools.py)")
72
+ lines.append("> from the source-of-truth catalog")
73
+ lines.append("> [`scripts/mcp_server/consumer_tool_catalog.json`](../../scripts/mcp_server/consumer_tool_catalog.json).")
74
+ lines.append("> Do **not** hand-edit; rerun `python3 scripts/audit_mcp_tools.py --write`.")
75
+ lines.append(">")
76
+ lines.append("> Step-11 Phase 5 Step 3 (`step-11-ruflo-parity.md`).")
77
+ lines.append("")
78
+ lines.append("## Summary")
79
+ lines.append("")
80
+ lines.append(f"- **Total tools:** {total}")
81
+ lines.append(f"- **By transport:** {transport_summary}")
82
+ lines.append(f"- **By side-effect:** {side_effect_summary}")
83
+ lines.append(f"- **Discovery-only stubs (no implementation):** {stub_count}")
84
+ lines.append("")
85
+ lines.append("## Tools")
86
+ lines.append("")
87
+ lines.append("| Tool | Side-effect | Transports | Catalog | Handler |")
88
+ lines.append("|---|---|---|---|---|")
89
+ for t in tools:
90
+ name = t["name"]
91
+ side = t["side_effect"]
92
+ transports = ", ".join(t["implemented_on"]) if t["implemented_on"] else "_(stub)_"
93
+ cat_line = cat_lines.get(name)
94
+ cat_cite = (
95
+ f"[`consumer_tool_catalog.json:{cat_line}`](../../scripts/mcp_server/consumer_tool_catalog.json#L{cat_line})"
96
+ if cat_line else "_missing_"
97
+ )
98
+ h_line = handlers.get(name)
99
+ h_cite = (
100
+ f"[`tools.py:{h_line}`](../../scripts/mcp_server/tools.py#L{h_line})"
101
+ if h_line else "_stub-only_"
102
+ )
103
+ lines.append(f"| `{name}` | `{side}` | {transports} | {cat_cite} | {h_cite} |")
104
+ lines.append("")
105
+ lines.append("## Glossary")
106
+ lines.append("")
107
+ lines.append("- **Side-effect** — `ro` (read-only) · `fs-write` (filesystem write) · `shell` (spawns processes).")
108
+ lines.append("- **Transports** — `stdio` (`scripts/mcp_server/`) · `worker` (`workers/mcp/`). A tool may live on both.")
109
+ lines.append("- **Stub** — catalog-listed for discovery; returns the `not_implemented` envelope from")
110
+ lines.append(" [`mcp-tool-stub-envelope.md`](mcp-tool-stub-envelope.md) until promoted.")
111
+ lines.append("")
112
+ return "\n".join(lines) + "\n"
113
+
114
+
115
+ def main() -> int:
116
+ ap = argparse.ArgumentParser(description=__doc__)
117
+ g = ap.add_mutually_exclusive_group()
118
+ g.add_argument("--check", action="store_true", help="Drift gate: exit 1 if file is stale.")
119
+ g.add_argument("--write", action="store_true", help="Regenerate the inventory file.")
120
+ ap.add_argument("--quiet", action="store_true")
121
+ args = ap.parse_args()
122
+
123
+ catalog = json.loads(CATALOG.read_text(encoding="utf-8"))
124
+ handlers = _index_handlers()
125
+ cat_lines = _index_catalog_lines()
126
+ rendered = _render(catalog, handlers, cat_lines)
127
+
128
+ if args.check:
129
+ on_disk = OUT.read_text(encoding="utf-8") if OUT.exists() else ""
130
+ if on_disk != rendered:
131
+ print(f"❌ {OUT.relative_to(ROOT)} drifted from generator.", file=sys.stderr)
132
+ print(" Run: python3 scripts/audit_mcp_tools.py --write", file=sys.stderr)
133
+ return 1
134
+ if not args.quiet:
135
+ print(f"BASELINE: {OUT.relative_to(ROOT)} is in sync · {len(catalog['tools'])} tool(s)")
136
+ return 0
137
+
138
+ OUT.parent.mkdir(parents=True, exist_ok=True)
139
+ OUT.write_text(rendered, encoding="utf-8")
140
+ if not args.quiet:
141
+ print(f"✅ wrote {OUT.relative_to(ROOT)} · {len(catalog['tools'])} tool(s)")
142
+ return 0
143
+
144
+
145
+ if __name__ == "__main__":
146
+ sys.exit(main())