npm - @event4u/agent-config - Versions diffs - 2.19.0 → 2.20.1 - Mend

@event4u/agent-config 2.19.0 → 2.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

package/.agent-src/commands/agent-status.md +29 -0
package/.agent-src/commands/onboard.md +221 -81
package/.agent-src/packs/README.md +49 -0
package/.agent-src/packs/agency-delivery.yml +63 -0
package/.agent-src/packs/content-engine.yml +53 -0
package/.agent-src/packs/founder-mvp.yml +51 -0
package/.agent-src/presets/README.md +26 -0
package/.agent-src/presets/balanced.yml +34 -0
package/.agent-src/presets/fast.yml +31 -0
package/.agent-src/presets/strict.yml +38 -0
package/.agent-src/profiles/README.md +29 -0
package/.agent-src/profiles/agency.yml +27 -0
package/.agent-src/profiles/content_creator.yml +25 -0
package/.agent-src/profiles/developer.yml +26 -0
package/.agent-src/profiles/finance.yml +24 -0
package/.agent-src/profiles/founder.yml +25 -0
package/.agent-src/profiles/ops.yml +25 -0
package/.agent-src/rules/no-cheap-questions.md +25 -17
package/.agent-src/skills/adr-create/SKILL.md +78 -68
package/.agent-src/skills/subagent-orchestration/SKILL.md +33 -0
package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
package/.agent-src/templates/skill-archive-note.md +101 -0
package/.claude-plugin/marketplace.json +1 -1
package/CHANGELOG.md +73 -70
package/README.md +68 -72
package/config/agent-settings.template.yml +22 -0
package/docs/adrs/caveman/0001-default-off-until-bench.md +93 -0
package/docs/adrs/caveman/README.md +9 -0
package/docs/adrs/cost/0001-hard-stop-hook.md +114 -0
package/docs/adrs/cost/README.md +9 -0
package/docs/adrs/memory/0001-consumer-side-snapshot.md +111 -0
package/docs/adrs/memory/README.md +9 -0
package/docs/adrs/router/0001-three-tier-routing.md +119 -0
package/docs/adrs/router/README.md +9 -0
package/docs/adrs/schema/0001-json-schema-frontmatter.md +102 -0
package/docs/adrs/schema/README.md +9 -0
package/docs/adrs/smoke/0001-per-tier-smoke-scripts.md +99 -0
package/docs/adrs/smoke/README.md +9 -0
package/docs/architecture/current-onboard-baseline.md +126 -0
package/docs/architecture/current-safety-behavior.md +137 -0
package/docs/archive/CHANGELOG-pre-2.16.0.md +48 -0
package/docs/archive/CHANGELOG-pre-2.17.0.md +63 -0
package/docs/contracts/adr-layout.md +108 -0
package/docs/contracts/benchmark-corpus-spec.md +97 -0
package/docs/contracts/benchmark-report-schema.md +111 -0
package/docs/contracts/command-clusters.md +1 -0
package/docs/contracts/command-taxonomy.md +137 -0
package/docs/contracts/compression-default-kill-criterion.md +69 -0
package/docs/contracts/config-presets.md +144 -0
package/docs/contracts/cost-dashboard.md +143 -0
package/docs/contracts/cost-enforcement.md +134 -0
package/docs/contracts/file-ownership-matrix.json +0 -7
package/docs/contracts/mcp-tool-inventory.md +53 -0
package/docs/contracts/measurement-baseline.md +102 -0
package/docs/contracts/namespace.md +125 -0
package/docs/contracts/profile-system.md +142 -0
package/docs/contracts/safety-model.md +129 -0
package/docs/contracts/smoke-contracts.md +144 -0
package/docs/contracts/workflow-packs.md +121 -0
package/docs/decisions/ADR-010-profile-pack-preset-boundary.md +132 -0
package/docs/decisions/INDEX.md +1 -0
package/docs/featured-commands.md +27 -0
package/docs/parity/bench-ruflo.json +58 -0
package/docs/parity/bench.json +41 -0
package/docs/parity/ruflo.md +46 -0
package/docs/profiles.md +91 -0
package/package.json +1 -1
package/scripts/_cli/cmd_explain.py +250 -0
package/scripts/_lib/bench_cost.py +138 -0
package/scripts/_lib/bench_quality.py +118 -0
package/scripts/_lib/bench_report.py +150 -0
package/scripts/agent-config +13 -0
package/scripts/audit_adr_coverage.py +175 -0
package/scripts/audit_mcp_tools.py +146 -0
package/scripts/bench_baseline_ready.py +108 -0
package/scripts/bench_drift_check.py +151 -0
package/scripts/bench_per_tool.py +216 -0
package/scripts/bench_run.py +155 -0
package/scripts/config/__init__.py +9 -0
package/scripts/config/presets.py +206 -0
package/scripts/config/profiles.py +173 -0
package/scripts/cost/budget.mjs +73 -12
package/scripts/cost/preflight.mjs +89 -0
package/scripts/lint_archived_skills.py +143 -0
package/scripts/lint_bench_corpus.py +161 -0
package/scripts/lint_namespace.py +135 -0
package/scripts/lint_roadmap_complexity.py +3 -2
package/scripts/skill_overlap.py +204 -0
package/scripts/skill_usage_collect.py +191 -0
package/scripts/skill_usage_report.py +162 -0
package/scripts/smoke/kernel.sh +101 -0
package/scripts/smoke/router.sh +129 -0
package/scripts/smoke/schema.sh +71 -0
package/scripts/smoke/skills.sh +101 -0

package/scripts/_lib/bench_quality.py ADDED Viewed

@@ -0,0 +1,118 @@
+# Quality probe for `scripts/bench_run.py` — step-4 Phase 2 Step 3.
+#
+# Each prompt declares `rubric.must_include` / `must_not_include` or a
+# `quality_assertion` regex (per docs/contracts/benchmark-corpus-spec.md).
+# When an agent-output file is passed via --agent-output, we score the
+# assertions against actual output. Without it, we emit `not_collected`
+# per docs/contracts/benchmark-report-schema.md § quality invariants.
+"""Quality probe helper for the bench runner."""
+from __future__ import annotations
+import json
+import re
+from pathlib import Path
+from typing import Any
+def _eval_rubric(rubric: dict[str, Any], output: str) -> tuple[bool, str]:
+    """Apply rubric.must_include / must_not_include / length_words to output."""
+    for phrase in rubric.get("must_include") or []:
+        if phrase not in output:
+            return False, f"missing: {phrase!r}"
+    for phrase in rubric.get("must_not_include") or []:
+        if phrase in output:
+            return False, f"forbidden: {phrase!r}"
+    bounds = rubric.get("length_words") or {}
+    if bounds:
+        words = len(output.split())
+        lo, hi = bounds.get("min", 0), bounds.get("max", 0)
+        if lo and words < lo:
+            return False, f"length<{lo}: {words}"
+        if hi and words > hi:
+            return False, f"length>{hi}: {words}"
+    return True, "ok"
+def _eval_regex(pattern: str, output: str) -> tuple[bool, str]:
+    try:
+        compiled = re.compile(pattern, re.MULTILINE)
+    except re.error as exc:
+        return False, f"bad_regex: {exc}"
+    return (bool(compiled.search(output)), "ok" if compiled.search(output) else "no_match")
+def _format_rubric(rubric: dict[str, Any]) -> str:
+    parts = []
+    if rubric.get("must_include"):
+        parts.append(f"must_include={rubric['must_include']}")
+    if rubric.get("must_not_include"):
+        parts.append(f"must_not_include={rubric['must_not_include']}")
+    if rubric.get("length_words"):
+        parts.append(f"length_words={rubric['length_words']}")
+    return " ".join(parts) or "<empty>"
+def score_corpus(
+    prompts: list[dict[str, Any]],
+    agent_output_path: Path | None,
+) -> dict[str, Any]:
+    """Return the `quality` block per benchmark-report-schema § quality."""
+    declared = [
+        p for p in prompts
+        if (p.get("rubric") or {}).get("must_include")
+        or (p.get("rubric") or {}).get("must_not_include")
+        or (p.get("rubric") or {}).get("length_words")
+        or p.get("quality_assertion")
+    ]
+    total_declared = len(declared)
+    if agent_output_path is None or not agent_output_path.is_file():
+        return {
+            "source": "not_collected",
+            "prompts_with_assertion": total_declared,
+            "prompts_passing": 0,
+            "quality_score": 0.0,
+            "per_prompt": [
+                {
+                    "id": p["id"],
+                    "assertion": p.get("quality_assertion") or _format_rubric(p.get("rubric") or {}),
+                    "assertion_kind": "quality_assertion" if p.get("quality_assertion") else "rubric",
+                    "passed": "not_collected",
+                }
+                for p in declared
+            ],
+        }
+    outputs = json.loads(agent_output_path.read_text(encoding="utf-8"))
+    per_prompt: list[dict[str, Any]] = []
+    passing = 0
+    for p in declared:
+        pid = p["id"]
+        output_text = str(outputs.get(pid, ""))
+        rubric = p.get("rubric") or {}
+        regex = p.get("quality_assertion")
+        if regex:
+            ok, _why = _eval_regex(regex, output_text)
+            kind = "quality_assertion"
+            assertion = regex
+        else:
+            ok, _why = _eval_rubric(rubric, output_text)
+            kind = "rubric"
+            assertion = _format_rubric(rubric)
+        per_prompt.append({
+            "id": pid,
+            "assertion": assertion,
+            "assertion_kind": kind,
+            "passed": ok,
+        })
+        if ok:
+            passing += 1
+    score = round(passing / total_declared, 4) if total_declared else 0.0
+    return {
+        "source": str(agent_output_path),
+        "prompts_with_assertion": total_declared,
+        "prompts_passing": passing,
+        "quality_score": score,
+        "per_prompt": per_prompt,
+    }

package/scripts/_lib/bench_report.py ADDED Viewed

@@ -0,0 +1,150 @@
+# Report emitter for `scripts/bench_run.py` — step-4 Phase 2 Step 4.
+#
+# Serializes the unified report dict to JSON + Markdown per
+# docs/contracts/benchmark-report-schema.md. Filename format:
+# `bench/reports/<UTC ISO-8601 with : -> ->-<corpus_id>.{json,md}`.
+"""Report emitter for the bench runner."""
+from __future__ import annotations
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+def utc_now_filename_stamp() -> str:
+    """Sortable lexicographic stamp — drop ':' so filenames stay portable."""
+    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%SZ")
+def utc_now_iso() -> str:
+    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+def report_paths(reports_dir: Path, corpus_id: str, stamp: str) -> tuple[Path, Path]:
+    base = f"{stamp}-{corpus_id}"
+    return reports_dir / f"{base}.json", reports_dir / f"{base}.md"
+def write_json(path: Path, report: dict[str, Any]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(report, indent=2) + "\n", encoding="utf-8")
+def _selection_section(selection: dict[str, Any]) -> str:
+    lines = [
+        "## Selection accuracy",
+        "",
+        f"- top-K = **{selection['top_k']}** · "
+        f"hit **{selection['prompts_hit']} / {selection['prompts_total']}** · "
+        f"accuracy **{selection['selection_accuracy']:.2%}** · "
+        f"target **{selection['target']:.2%}** · "
+        f"verdict **{'PASS' if selection['passed'] else 'FAIL'}**",
+        "",
+        "| id | hit | expected | top-K ranked |",
+        "|---|---|---|---|",
+    ]
+    for r in selection.get("per_prompt", []):
+        mark = "✅" if r["hit"] else "❌"
+        expected = ", ".join(r.get("expected_skills") or []) or "—"
+        ranked = ", ".join(r.get("top_k_ranked") or []) or "—"
+        lines.append(f"| `{r['id']}` | {mark} | {expected} | {ranked} |")
+    return "\n".join(lines)
+def _cost_section(cost: dict[str, Any]) -> str:
+    if cost.get("source") == "unavailable":
+        return (
+            "## Cost capture\n\n"
+            f"- **source:** `unavailable` ({cost.get('reason', 'unknown')})\n"
+            f"- **scanned:** `{cost.get('scanned_path', '—')}`\n"
+            f"- **pricing sourced on:** {cost.get('pricing_sourced_on') or '—'}\n\n"
+            "_No session jsonl available. Run `node scripts/cost/track.mjs` "
+            "from a real Claude Code session to populate agents/cost-tracking/sessions.jsonl._\n"
+        )
+    totals = cost["totals"]
+    lines = [
+        "## Cost capture",
+        "",
+        f"- **source:** `{cost['source']}` · sessions scanned: **{cost['sessions_scanned']}**",
+        f"- **pricing sourced on:** {cost.get('pricing_sourced_on') or '—'}",
+        f"- **total cost:** **${totals['total_cost_usd']:.6f}**",
+        "",
+        "| tier | messages | cost (USD) |",
+        "|---|---:|---:|",
+    ]
+    for tier, slot in cost["per_tier"].items():
+        if slot["messages"] == 0 and slot["cost_usd"] == 0.0:
+            continue
+        lines.append(f"| {tier} | {slot['messages']} | ${slot['cost_usd']:.6f} |")
+    lines += [
+        "",
+        "| metric | value |",
+        "|---|---:|",
+        f"| input_tokens | {totals['input_tokens']} |",
+        f"| output_tokens | {totals['output_tokens']} |",
+        f"| cache_read_input_tokens | {totals['cache_read_input_tokens']} |",
+        f"| cache_creation_input_tokens | {totals['cache_creation_input_tokens']} |",
+    ]
+    return "\n".join(lines)
+def _quality_section(quality: dict[str, Any]) -> str:
+    if quality["source"] == "not_collected":
+        return (
+            "## Quality probe\n\n"
+            f"- **source:** `not_collected` · assertions declared: "
+            f"**{quality['prompts_with_assertion']}**\n"
+            "- _Pass `--agent-output <path-to-outputs.json>` (map of `id -> str`) "
+            "to score the rubrics. Schema invariant: missing output keeps "
+            "`verdict.overall` at `partial`._\n"
+        )
+    lines = [
+        "## Quality probe",
+        "",
+        f"- **source:** `{quality['source']}` · "
+        f"passing **{quality['prompts_passing']} / {quality['prompts_with_assertion']}** · "
+        f"score **{quality['quality_score']:.2%}**",
+        "",
+        "| id | kind | passed | assertion |",
+        "|---|---|---|---|",
+    ]
+    for r in quality.get("per_prompt", []):
+        mark = "✅" if r["passed"] is True else ("❌" if r["passed"] is False else "—")
+        lines.append(f"| `{r['id']}` | {r['assertion_kind']} | {mark} | `{r['assertion']}` |")
+    return "\n".join(lines)
+def render_markdown(report: dict[str, Any]) -> str:
+    corpus = report["corpus"]
+    sel = report["selection"]
+    cost = report["cost"]
+    qual = report["quality"]
+    verdict = report["verdict"]
+    headline = (
+        f"# Benchmark Report — `{corpus['id']}` · {report['generated_at']}\n\n"
+        "## Headline\n\n"
+        f"- **selection** {sel['selection_accuracy']:.2%} (target {sel['target']:.2%}) → **{verdict['selection']}**\n"
+        f"- **cost** ${cost['totals']['total_cost_usd']:.6f} "
+        f"({'sessions=' + str(cost['sessions_scanned']) if cost['source'] != 'unavailable' else cost['source']})\n"
+        f"- **quality** {qual['quality_score']:.2%} → **{verdict['quality']}**\n"
+        f"- **overall** → **{verdict['overall']}**\n"
+    )
+    notes = (
+        "## Notes\n\n"
+        f"- corpus path: `{corpus['path']}` · prompts: **{corpus['prompt_count']}**\n"
+        f"- pricing: `bench/pricing.yaml`\n"
+        f"- baseline collector: `{report['runner']['baseline_collector']}`\n"
+    )
+    return "\n\n".join([
+        headline,
+        _selection_section(sel),
+        _cost_section(cost),
+        _quality_section(qual),
+        notes,
+    ]) + "\n"
+def write_markdown(path: Path, report: dict[str, Any]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(render_markdown(report), encoding="utf-8")

package/scripts/agent-config CHANGED Viewed

@@ -97,6 +97,10 @@ Tier 1 — power-user (release shape, audit, migration):
                              Lists missing, modified, and foreign files.
                              Exits 1 on drift, 2 on missing lockfile.
                              Flags: --json | --project=<path>
+  explain                    Read-only decision-chain trace.
+                             Usage: explain config | explain rule <name>
+                                  | explain route "<text>"
+                             Flags: --json | --project=<path>
   migrate                    One-shot migration off legacy composer / npm install paths
                              Flags: --dry-run (detect only)
   first-run                  Guided first-run setup — cost profile, settings, tooling
@@ -749,6 +753,14 @@ cmd_versions() {
   exec env PYTHONPATH="$PACKAGE_ROOT" python3 -m scripts._cli.cmd_versions "$@"
 }
+# `agent-config explain <config|rule|route>` — print the decision chain
+# behind a configuration or routing outcome. Read-only diagnostic; never
+# edits state. See scripts/_cli/cmd_explain.py.
+cmd_explain() {
+  require_python3
+  exec env PYTHONPATH="$PACKAGE_ROOT" python3 -m scripts._cli.cmd_explain "$@"
+}
 main() {
   local cmd="${1-}"
   [[ $# -gt 0 ]] && shift || true
@@ -801,6 +813,7 @@ main() {
     prune)                   cmd_prune "$@" ;;
     doctor)                  cmd_doctor "$@" ;;
     versions)                cmd_versions "$@" ;;
+    explain)                 cmd_explain "$@" ;;
     help|--help|-h|"")
       # Optional `--tier=0|1|all` filter (default 0).
       local tier_arg="0"

package/scripts/audit_adr_coverage.py ADDED Viewed

@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+"""Audit per-area ADR coverage against docs/contracts/ and the canonical
+AREAS inventory. Contract: docs/contracts/adr-layout.md.
+Modes:
+  --report   (default) one-shot inventory: which areas exist, ADR count
+             per area, contracts missing a bootstrap ADR.
+  --check    exit 1 on hard failures (number gaps, missing area README,
+             broken supersedes); exit 0 with warnings on missing
+             bootstrap ADRs and dangling references.
+  --regen-area-readme <area>
+             rewrite docs/adrs/<area>/README.md from the area's ADR
+             frontmatter. Idempotent.
+"""
+from __future__ import annotations
+import argparse, re, sys
+from pathlib import Path
+ROOT = Path(__file__).resolve().parent.parent
+ADR_ROOT = ROOT / "docs" / "adrs"
+CONTRACT_ROOT = ROOT / "docs" / "contracts"
+# Canonical area inventory. To add an area: add it here, then run
+# `python3 scripts/audit_adr_coverage.py --check` in the same PR.
+AREAS: dict[str, dict[str, str]] = {
+    "cost":    {"contract": "cost-enforcement.md",
+                "scope":    "Budget ladder, hard-stop hook, cost reporting and dashboards."},
+    "caveman": {"contract": "compression-default-kill-criterion.md",
+                "scope":    "Caveman-speak compression, decompression, reversibility guards."},
+    "schema":  {"contract": "agents/docs/frontmatter-contract.md",
+                "scope":    "Frontmatter schemas, v2 rigor, lint behaviour for skills / rules / commands."},
+    "router":  {"contract": "rule-router.md",
+                "scope":    "router.json shape, tier semantics, dispatch precedence."},
+    "smoke":   {"contract": "smoke-contracts.md",
+                "scope":    "Per-tier smoke contracts, baseline locks, regression gates."},
+    "memory":  {"contract": "agent-memory-contract.md",
+                "scope":    "Memory MCP, propose / promote / poison flow, runtime-trust scoring."},
+}
+NAMED = re.compile(r"^(\d{4})-([a-z0-9-]+)\.md$")
+FM = re.compile(r"^---\n(.*?)\n---", re.DOTALL)
+FIELD = re.compile(r"^([a-z_]+):\s*(.+?)\s*$", re.MULTILINE)
+def parse_fm(text: str) -> dict[str, str]:
+    m = FM.search(text)
+    if not m:
+        return {}
+    return {k: v.strip(" \"'") for k, v in FIELD.findall(m.group(1))}
+def scan_area(area: str) -> tuple[list[dict], list[str]]:
+    """Return (adrs, errors). adrs sorted by number."""
+    area_dir = ADR_ROOT / area
+    errs: list[str] = []
+    if not area_dir.exists():
+        return [], errs
+    adrs: list[dict] = []
+    for p in sorted(area_dir.glob("*.md")):
+        if p.name == "README.md":
+            continue
+        m = NAMED.match(p.name)
+        if not m:
+            errs.append(f"{area}/{p.name}: filename does not match NNNN-<slug>.md")
+            continue
+        fm = parse_fm(p.read_text(encoding="utf-8"))
+        adrs.append({"num": m.group(1), "slug": m.group(2),
+                     "path": p.name, **fm})
+    # Gap check.
+    nums = [int(a["num"]) for a in adrs]
+    for i, n in enumerate(nums, start=1):
+        if n != i:
+            errs.append(f"{area}/: number gap at position {i} (got {n:04d})")
+            break
+    return adrs, errs
+def _contract_path(meta: dict[str, str]) -> Path:
+    """Resolve a contract reference. Plain filename → docs/contracts/<file>;
+    a path with separators → repo-relative."""
+    c = meta["contract"]
+    return (ROOT / c) if "/" in c else (CONTRACT_ROOT / c)
+def render_area_readme(area: str, meta: dict[str, str], adrs: list[dict]) -> str:
+    lines = [f"# ADRs — `{area}`", "",
+             f"> {meta['scope']}", ""]
+    contract_path = _contract_path(meta)
+    repo_rel = contract_path.relative_to(ROOT) if contract_path.exists() else Path(
+        meta["contract"] if "/" in meta["contract"] else f"docs/contracts/{meta['contract']}")
+    # Link target is relative to docs/adrs/<area>/README.md (2 levels up from area dir).
+    link_target = Path("..") / ".." / ".." / repo_rel
+    if contract_path.exists():
+        lines.append(f"Contract: [`{repo_rel}`]({link_target}).")
+    else:
+        lines.append(f"Contract: _not yet published_ (`{repo_rel}`).")
+    lines += ["",
+              "| # | Title | Status | Date | Supersedes |",
+              "|---|---|---|---|---|"]
+    for a in adrs:
+        title = a.get("decision", a["slug"]).replace("-", " ").title()
+        lines.append(f"| [{a['num']}]({a['path']}) | {title} | "
+                     f"{a.get('status','—')} | {a.get('date','—')} | "
+                     f"{a.get('supersedes','—')} |")
+    if not adrs:
+        lines.append("| _none yet_ | — | — | — | — |")
+    return "\n".join(lines) + "\n"
+def cmd_report(args) -> int:
+    print("## ADR coverage report")
+    print()
+    print("| Area | Contract | ADRs | README | Status |")
+    print("|---|---|---:|:---:|---|")
+    missing_bootstrap = 0
+    for area, meta in AREAS.items():
+        adrs, _ = scan_area(area)
+        readme = "✅" if (ADR_ROOT / area / "README.md").exists() else "—"
+        contract_present = _contract_path(meta).exists()
+        status = "ok" if adrs else "missing bootstrap"
+        if not adrs:
+            missing_bootstrap += 1
+        contract_cell = meta["contract"] if contract_present else f"_{meta['contract']}_ (no contract)"
+        print(f"| `{area}` | {contract_cell} | {len(adrs)} | {readme} | {status} |")
+    print()
+    print(f"BASELINE: {len(AREAS)} canonical areas · {missing_bootstrap} missing bootstrap ADR(s)")
+    return 0
+def cmd_check(args) -> int:
+    hard = 0
+    warn = 0
+    for area, meta in AREAS.items():
+        adrs, errs = scan_area(area)
+        for e in errs:
+            print(f"❌ {e}", file=sys.stderr); hard += 1
+        if adrs and not (ADR_ROOT / area / "README.md").exists():
+            print(f"❌ {area}/: README.md missing", file=sys.stderr); hard += 1
+        if not adrs:
+            print(f"⚠️  {area}/: no bootstrap ADR yet (contract: {meta['contract']})", file=sys.stderr)
+            warn += 1
+    print(f"BASELINE: {hard} hard fail(s) · {warn} warn(s)")
+    return 1 if hard else 0
+def cmd_regen_area_readme(args) -> int:
+    area = args.regen_area_readme
+    if area not in AREAS:
+        print(f"❌ unknown area '{area}' — add to AREAS inventory first", file=sys.stderr)
+        return 1
+    adrs, errs = scan_area(area)
+    for e in errs:
+        print(f"❌ {e}", file=sys.stderr)
+    out = ADR_ROOT / area / "README.md"
+    out.parent.mkdir(parents=True, exist_ok=True)
+    out.write_text(render_area_readme(area, AREAS[area], adrs), encoding="utf-8")
+    print(f"wrote {out.relative_to(ROOT)}")
+    return 0
+def main() -> int:
+    ap = argparse.ArgumentParser(description=__doc__)
+    grp = ap.add_mutually_exclusive_group()
+    grp.add_argument("--check", action="store_true")
+    grp.add_argument("--regen-area-readme", metavar="AREA")
+    args = ap.parse_args()
+    if args.check:
+        return cmd_check(args)
+    if args.regen_area_readme:
+        return cmd_regen_area_readme(args)
+    return cmd_report(args)
+if __name__ == "__main__":
+    sys.exit(main())

package/scripts/audit_mcp_tools.py ADDED Viewed

@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+"""MCP-tool inventory generator. Reads the source-of-truth catalog at
+`scripts/mcp_server/consumer_tool_catalog.json` and the handler
+registry at `scripts/mcp_server/tools.py`, emits
+`docs/contracts/mcp-tool-inventory.md` with every tool cited by
+`<file>:<line>`. README's MCP-tool count line links here; the bare
+claim is banned.
+Contract: step-11 Phase 5 Step 3
+(agents/roadmaps/step-11-ruflo-parity.md).
+Modes:
+  --check    exit non-zero if the generated inventory drifts from
+             the on-disk file (CI gate).
+  --write    regenerate the inventory file in-place (default).
+"""
+from __future__ import annotations
+import argparse, json, re, sys
+from pathlib import Path
+ROOT = Path(__file__).resolve().parent.parent
+CATALOG = ROOT / "scripts/mcp_server/consumer_tool_catalog.json"
+TOOLS_PY = ROOT / "scripts/mcp_server/tools.py"
+OUT = ROOT / "docs/contracts/mcp-tool-inventory.md"
+# Match `"<name>": BuiltinTool(` in the ALLOWLIST dict.
+HANDLER_RE = re.compile(r'^\s*"([a-z_]+)"\s*:\s*BuiltinTool\(')
+# Match `"name": "<name>",` in the catalog json (for catalog citations).
+CATALOG_NAME_RE = re.compile(r'^\s*"name"\s*:\s*"([a-z_]+)"\s*,?\s*$')
+def _index_handlers() -> dict[str, int]:
+    out: dict[str, int] = {}
+    for i, line in enumerate(TOOLS_PY.read_text(encoding="utf-8").splitlines(), 1):
+        m = HANDLER_RE.match(line)
+        if m:
+            out[m.group(1)] = i
+    return out
+def _index_catalog_lines() -> dict[str, int]:
+    out: dict[str, int] = {}
+    for i, line in enumerate(CATALOG.read_text(encoding="utf-8").splitlines(), 1):
+        m = CATALOG_NAME_RE.match(line)
+        if m and m.group(1) not in out:
+            out[m.group(1)] = i
+    return out
+def _render(catalog: dict, handlers: dict[str, int], cat_lines: dict[str, int]) -> str:
+    tools = catalog["tools"]
+    total = len(tools)
+    by_transport: dict[str, int] = {}
+    by_side_effect: dict[str, int] = {}
+    for t in tools:
+        for tr in t["implemented_on"]:
+            by_transport[tr] = by_transport.get(tr, 0) + 1
+        by_side_effect[t["side_effect"]] = by_side_effect.get(t["side_effect"], 0) + 1
+    stub_count = sum(1 for t in tools if not t["implemented_on"])
+    transport_summary = ", ".join(f"{k}={v}" for k, v in sorted(by_transport.items())) or "none"
+    side_effect_summary = ", ".join(f"{k}={v}" for k, v in sorted(by_side_effect.items()))
+    lines: list[str] = []
+    lines.append("---")
+    lines.append("stability: beta")
+    lines.append("keep-beta-until: 2026-08-14")
+    lines.append("---")
+    lines.append("")
+    lines.append("# MCP tool inventory")
+    lines.append("")
+    lines.append("> Generated by [`scripts/audit_mcp_tools.py`](../../scripts/audit_mcp_tools.py)")
+    lines.append("> from the source-of-truth catalog")
+    lines.append("> [`scripts/mcp_server/consumer_tool_catalog.json`](../../scripts/mcp_server/consumer_tool_catalog.json).")
+    lines.append("> Do **not** hand-edit; rerun `python3 scripts/audit_mcp_tools.py --write`.")
+    lines.append(">")
+    lines.append("> Step-11 Phase 5 Step 3 (`step-11-ruflo-parity.md`).")
+    lines.append("")
+    lines.append("## Summary")
+    lines.append("")
+    lines.append(f"- **Total tools:** {total}")
+    lines.append(f"- **By transport:** {transport_summary}")
+    lines.append(f"- **By side-effect:** {side_effect_summary}")
+    lines.append(f"- **Discovery-only stubs (no implementation):** {stub_count}")
+    lines.append("")
+    lines.append("## Tools")
+    lines.append("")
+    lines.append("| Tool | Side-effect | Transports | Catalog | Handler |")
+    lines.append("|---|---|---|---|---|")
+    for t in tools:
+        name = t["name"]
+        side = t["side_effect"]
+        transports = ", ".join(t["implemented_on"]) if t["implemented_on"] else "_(stub)_"
+        cat_line = cat_lines.get(name)
+        cat_cite = (
+            f"[`consumer_tool_catalog.json:{cat_line}`](../../scripts/mcp_server/consumer_tool_catalog.json#L{cat_line})"
+            if cat_line else "_missing_"
+        )
+        h_line = handlers.get(name)
+        h_cite = (
+            f"[`tools.py:{h_line}`](../../scripts/mcp_server/tools.py#L{h_line})"
+            if h_line else "_stub-only_"
+        )
+        lines.append(f"| `{name}` | `{side}` | {transports} | {cat_cite} | {h_cite} |")
+    lines.append("")
+    lines.append("## Glossary")
+    lines.append("")
+    lines.append("- **Side-effect** — `ro` (read-only) · `fs-write` (filesystem write) · `shell` (spawns processes).")
+    lines.append("- **Transports** — `stdio` (`scripts/mcp_server/`) · `worker` (`workers/mcp/`). A tool may live on both.")
+    lines.append("- **Stub** — catalog-listed for discovery; returns the `not_implemented` envelope from")
+    lines.append("  [`mcp-tool-stub-envelope.md`](mcp-tool-stub-envelope.md) until promoted.")
+    lines.append("")
+    return "\n".join(lines) + "\n"
+def main() -> int:
+    ap = argparse.ArgumentParser(description=__doc__)
+    g = ap.add_mutually_exclusive_group()
+    g.add_argument("--check", action="store_true", help="Drift gate: exit 1 if file is stale.")
+    g.add_argument("--write", action="store_true", help="Regenerate the inventory file.")
+    ap.add_argument("--quiet", action="store_true")
+    args = ap.parse_args()
+    catalog = json.loads(CATALOG.read_text(encoding="utf-8"))
+    handlers = _index_handlers()
+    cat_lines = _index_catalog_lines()
+    rendered = _render(catalog, handlers, cat_lines)
+    if args.check:
+        on_disk = OUT.read_text(encoding="utf-8") if OUT.exists() else ""
+        if on_disk != rendered:
+            print(f"❌ {OUT.relative_to(ROOT)} drifted from generator.", file=sys.stderr)
+            print("   Run: python3 scripts/audit_mcp_tools.py --write", file=sys.stderr)
+            return 1
+        if not args.quiet:
+            print(f"BASELINE: {OUT.relative_to(ROOT)} is in sync · {len(catalog['tools'])} tool(s)")
+        return 0
+    OUT.parent.mkdir(parents=True, exist_ok=True)
+    OUT.write_text(rendered, encoding="utf-8")
+    if not args.quiet:
+        print(f"✅ wrote {OUT.relative_to(ROOT)} · {len(catalog['tools'])} tool(s)")
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())