npm - @event4u/agent-config - Versions diffs - 5.4.1 → 5.6.0 - Mend

@event4u/agent-config 5.4.1 → 5.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

package/.agent-src/commands/image/analyse.md +51 -0
package/.agent-src/commands/image/create.md +53 -0
package/.agent-src/commands/image/verify.md +48 -0
package/.agent-src/commands/image.md +69 -0
package/.agent-src/commands/knowledge/cross-repo.md +71 -0
package/.agent-src/commands/knowledge.md +2 -0
package/.agent-src/commands/skill/preview.md +67 -0
package/.agent-src/commands/skill.md +48 -0
package/.agent-src/commands/skills/discover.md +76 -0
package/.agent-src/commands/skills.md +56 -0
package/.agent-src/commands/video/from-song.md +351 -0
package/.agent-src/commands/video.md +19 -9
package/.agent-src/contexts/authority/commit-mechanics.md +8 -0
package/.agent-src/rules/commit-policy.md +3 -8
package/.agent-src/rules/linked-projects-onboarding-gate.md +1 -1
package/.agent-src/rules/media-sync-ground-truth.md +58 -0
package/.agent-src/skills/image-analyser/SKILL.md +121 -0
package/.agent-src/skills/image-analyser/canon-spec.md +109 -0
package/.agent-src/skills/image-analyser/evals/triggers.json +16 -0
package/.agent-src/skills/image-creator/SKILL.md +117 -0
package/.agent-src/skills/image-creator/evals/triggers.json +16 -0
package/.agent-src/skills/song-to-script/SKILL.md +216 -0
package/.claude-plugin/marketplace.json +15 -2
package/CHANGELOG.md +84 -0
package/CONTRIBUTING.md +6 -0
package/README.md +3 -3
package/config/agent-settings.template.yml +18 -0
package/dist/cli/registry.js +1 -0
package/dist/cli/registry.js.map +1 -1
package/dist/discovery/deprecation-report.md +1 -1
package/dist/discovery/discovery-manifest.json +327 -20
package/dist/discovery/discovery-manifest.json.sha256 +1 -1
package/dist/discovery/discovery-manifest.summary.md +4 -4
package/dist/discovery/orphan-report.md +1 -1
package/dist/discovery/packs.json +24 -10
package/dist/discovery/trust-report.md +3 -3
package/dist/discovery/workspaces.json +20 -6
package/dist/mcp/registry-manifest.json +3 -3
package/dist/router.json +1 -1
package/dist/server/schemas/settings.js +4 -0
package/dist/server/schemas/settings.js.map +1 -1
package/docs/architecture.md +3 -3
package/docs/catalog.md +20 -6
package/docs/contracts/benchmark-report-schema.md +12 -10
package/docs/contracts/command-clusters.md +5 -1
package/docs/contracts/cross-repo-retrieval.md +64 -0
package/docs/contracts/rule-router.md +39 -0
package/docs/contracts/skill-discovery.md +80 -0
package/docs/contracts/skill-dry-run.md +47 -0
package/docs/contracts/value-dashboard-spec.md +7 -3
package/docs/contracts/value-report-schema.md +6 -1
package/docs/decisions/ADR-032-linked-projects-scope.md +7 -3
package/docs/getting-started.md +2 -2
package/docs/guides/cross-repo-linked-projects.md +7 -0
package/docs/guides/cross-repo-retrieval.md +61 -0
package/docs/guides/skill-discovery.md +71 -0
package/docs/guides/skill-preview.md +71 -0
package/docs/value.md +17 -17
package/package.json +1 -1
package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
package/scripts/_dispatch.bash +10 -0
package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
package/scripts/_lib/bench_report.py +13 -14
package/scripts/_lib/bench_telegraph_report.py +1 -2
package/scripts/_lib/token_count.py +95 -0
package/scripts/_lib/value_report.py +3 -3
package/scripts/ai-video/adapters/higgsfield.sh +163 -6
package/scripts/ai-video/adapters/openai-images.sh +92 -6
package/scripts/ai-video/lib/probe-audio.sh +181 -0
package/scripts/audit_auto_rules.py +22 -6
package/scripts/audit_command_surface.py +6 -1
package/scripts/audit_initial_context.py +210 -0
package/scripts/bench_ab_diff.py +4 -11
package/scripts/bench_run.py +2 -3
package/scripts/bench_runner.py +2 -2
package/scripts/condense.py +44 -3
package/scripts/cross_repo_retrieve.py +172 -0
package/scripts/inventory_meta_layers.py +288 -0
package/scripts/iron_law_sha.py +14 -5
package/scripts/linked_projects_list.py +91 -0
package/scripts/measure_rule_budget.py +15 -0
package/scripts/memory_lookup.py +53 -2
package/scripts/project_thin_rules.py +168 -0
package/scripts/render_value_md.py +14 -23
package/scripts/schemas/command.schema.json +1 -1
package/scripts/schemas/rule.schema.json +1 -1
package/scripts/schemas/skill.schema.json +2 -2
package/scripts/skill_discovery.py +254 -0
package/scripts/skill_linter.py +8 -4
package/scripts/skill_preview.py +179 -0
package/scripts/trigger_coverage.py +129 -0

package/scripts/ai-video/lib/probe-audio.sh ADDED Viewed

@@ -0,0 +1,181 @@
+#!/usr/bin/env bash
+# probe-audio.sh — turn a song file into a deterministic, network-free
+# JSON summary the `song-to-script` skill maps to scenes:
+#
+#   {"duration": <seconds>,
+#    "method": "silence" | "rms" | "interval",
+#    "warning": "<present only for the interval fallback>",
+#    "sections": [{"start":0.0,"end":12.5,"energy":0.41,"label":"intro"}, ...]}
+#
+# HONEST FRAMING (AI-council design review, 2026-05-30): this is energy /
+# silence segmentation, NOT beat detection or musical analysis. Modern
+# masters are brick-walled (near-constant RMS), so a real cut structure
+# is often absent. The probe therefore degrades through three methods and
+# always reports which one produced the anchors:
+#
+#   1. silence  — ffmpeg silencedetect found real quiet gaps → true cuts.
+#   2. rms      — no usable silence; greedy-merge per-window RMS energy.
+#   3. interval — track is structurally flat (brick-walled / sustained):
+#                 fall back to fixed-interval cuts and SET `warning` so the
+#                 caller (and the operator) knows timing is not musical.
+#
+# Sections are cut anchors, never a transcription. For beat-accurate cuts
+# the operator passes `--scene-durations` to /video:from-song instead.
+#
+# Usage:
+#   probe-audio.sh <song-file> [--window <seconds>] [--interval <seconds>]
+#                              [--silence-db <dB>] [--silence-min <seconds>]
+#
+#   --window       RMS analysis window (default 3)
+#   --interval     fixed-interval fallback section length (default 15)
+#   --silence-db   silencedetect noise floor (default -30)
+#   --silence-min  silencedetect minimum gap to count as a boundary (default 0.5)
+#
+# Exit codes:
+#   0  JSON written to stdout
+#   2  usage / file missing
+#   3  required tool missing (ffprobe / ffmpeg)
+#   4  no audio stream in the file
+set -euo pipefail
+die() { printf 'probe-audio: %s\n' "$2" >&2; exit "$1"; }
+[ "$#" -ge 1 ] || die 2 "usage: $0 <song-file> [--window <s>] [--interval <s>] [--silence-db <dB>] [--silence-min <s>]"
+song="$1"; shift || true
+window=3
+interval=15
+silence_db=-30
+silence_min=0.5
+while [ "$#" -gt 0 ]; do
+  case "$1" in
+    --window)      window="${2:-3}";       shift 2 ;;
+    --interval)    interval="${2:-15}";     shift 2 ;;
+    --silence-db)  silence_db="${2:--30}";  shift 2 ;;
+    --silence-min) silence_min="${2:-0.5}"; shift 2 ;;
+    *) die 2 "unknown arg: $1" ;;
+  esac
+done
+[ -f "${song}" ] || die 2 "file not found: ${song}"
+command -v ffprobe >/dev/null 2>&1 || die 3 "ffprobe not found"
+command -v ffmpeg  >/dev/null 2>&1 || die 3 "ffmpeg not found"
+# --- 1. duration + audio-stream check ----------------------------------
+duration="$(ffprobe -v error -select_streams a:0 \
+  -show_entries format=duration -of default=nk=1:nw=1 "${song}" 2>/dev/null || true)"
+[ -n "${duration}" ] || die 4 "no audio stream in: ${song}"
+# --- 2. per-window RMS energy via astats --------------------------------
+# Slice the track into <window>-second chunks; read mean RMS level (dB),
+# normalise to 0..1 where -60dB→0 and 0dB→1. These energies feed BOTH the
+# rms-merge method and the per-section labelling of every method.
+n_windows="$(awk -v d="${duration}" -v w="${window}" 'BEGIN{
+  n = int(d / w); if (n * w < d) n++; if (n < 1) n = 1; print n }')"
+win_starts=""; win_energy=""
+i=0
+while [ "${i}" -lt "${n_windows}" ]; do
+  start="$(awk -v i="${i}" -v w="${window}" 'BEGIN{printf "%.3f", i*w}')"
+  rms_db="$(ffmpeg -hide_banner -nostats -ss "${start}" -t "${window}" -i "${song}" \
+    -af astats=metadata=1:reset=1 -f null - 2>&1 \
+    | awk -F': ' '/RMS level dB/ {v=$2} END{print v}')"
+  case "${rms_db}" in ""|*inf*|*nan*) rms_db=-60 ;; esac
+  norm="$(awk -v x="${rms_db}" 'BEGIN{
+    v=(x+60)/60; if(v<0)v=0; if(v>1)v=1; printf "%.3f", v }')"
+  win_starts="${win_starts}${start}\n"
+  win_energy="${win_energy}${norm}\n"
+  i=$((i + 1))
+done
+# --- 3. silencedetect boundaries ----------------------------------------
+# Real quiet gaps split the track at musically-meaningful points far more
+# reliably than RMS deltas on a compressed master. Collect the midpoints
+# of detected silences as candidate section boundaries.
+sil_bounds="$(ffmpeg -hide_banner -nostats -i "${song}" \
+  -af "silencedetect=noise=${silence_db}dB:d=${silence_min}" -f null - 2>&1 \
+  | awk '
+      /silence_start/ { for(i=1;i<=NF;i++) if($i=="silence_start:") s=$(i+1) }
+      /silence_end/   { for(i=1;i<=NF;i++) if($i=="silence_end:")   { e=$(i+1); printf "%.3f\n", (s+e)/2 } }
+    ' 2>/dev/null || true)"
+n_sil="$(printf '%s' "${sil_bounds}" | sed '/^$/d' | wc -l | tr -d ' ')"
+# --- 4. choose method + build section boundaries ------------------------
+# A method needs >= 3 sections (>= 2 internal boundaries) to count as
+# "structure found"; otherwise degrade to the next method.
+method=""
+boundaries=""   # internal cut points (excluding 0 and duration)
+if [ "${n_sil}" -ge 2 ]; then
+  method="silence"
+  boundaries="$(printf '%s\n' "${sil_bounds}" | sed '/^$/d' \
+    | awk -v d="${duration}" '$1>0.5 && $1<d-0.5' | sort -n | uniq)"
+fi
+if [ -z "${method}" ]; then
+  # greedy-merge adjacent RMS windows; keep a boundary on energy delta > 0.12
+  rms_bounds="$(paste <(printf '%b' "${win_starts}") <(printf '%b' "${win_energy}") \
+    | awk -v d="${duration}" '
+        { st[NR]=$1; en[NR]=$2; cnt=NR }
+        END {
+          prev=en[1]
+          for(k=2;k<=cnt;k++){
+            if ((en[k]-prev>0.12)||(prev-en[k]>0.12)) { if(st[k]>0.5 && st[k]<d-0.5) print st[k] }
+            prev=en[k]
+          }
+        }')"
+  n_rms="$(printf '%s' "${rms_bounds}" | sed '/^$/d' | wc -l | tr -d ' ')"
+  if [ "${n_rms}" -ge 2 ]; then
+    method="rms"
+    boundaries="$(printf '%s\n' "${rms_bounds}" | sed '/^$/d' | sort -n | uniq)"
+  fi
+fi
+warning=""
+if [ -z "${method}" ]; then
+  method="interval"
+  warning="track is structurally flat (no usable silence or energy structure); sections are fixed ${interval}s intervals, not musical cuts"
+  boundaries="$(awk -v d="${duration}" -v iv="${interval}" 'BEGIN{
+    for(t=iv; t<d-0.5; t+=iv) printf "%.3f\n", t }')"
+fi
+# --- 5. assemble sections, label, emit JSON -----------------------------
+# Boundaries → [0, b1, b2, ..., duration] section edges. Energy per section
+# = mean of the RMS windows whose start falls inside it.
+printf '%s' "${boundaries}" \
+  | sed '/^$/d' \
+  | awk -v d="${duration}" -v method="${method}" -v warning="${warning}" \
+        -v wins="$(printf '%b' "${win_starts}")" -v ens="$(printf '%b' "${win_energy}")" '
+    BEGIN {
+      nw=split(wins, ws, "\n"); split(ens, es, "\n")
+      # build edges
+      ne=0; edges[ne++]=0
+    }
+    { edges[ne++]=$1+0 }
+    END {
+      edges[ne++]=d+0
+      # mean energy across all windows for relative labelling
+      sum=0; c=0
+      for(k=1;k<=nw;k++){ if(ws[k]!=""){ sum+=es[k]; c++ } }
+      mean=(c?sum/c:0)
+      printf "{\"duration\": %.3f, \"method\": \"%s\"", d, method
+      if (warning != "") { gsub(/"/,"\\\"",warning); printf ", \"warning\": \"%s\"", warning }
+      printf ", \"sections\": ["
+      segs=ne-1
+      for(j=0;j<segs;j++){
+        s=edges[j]; e=edges[j+1]
+        # mean energy of windows starting within [s,e)
+        es_sum=0; es_c=0
+        for(k=1;k<=nw;k++){ if(ws[k]!=""){ if(ws[k]+0>=s && ws[k]+0<e){ es_sum+=es[k]; es_c++ } } }
+        energy=(es_c?es_sum/es_c:mean)
+        if (j==0) label="intro"
+        else if (j==segs-1) label=(energy<mean?"outro":"drop")
+        else if (energy>=mean+0.10) label="drop"
+        else if (energy<=mean-0.10) label="breakdown"
+        else label="build"
+        sep=(j<segs-1)?",":""
+        printf "{\"start\": %.3f, \"end\": %.3f, \"energy\": %.3f, \"label\": \"%s\"}%s", s, e, energy, label, sep
+      }
+      print "]}"
+    }'

package/scripts/audit_auto_rules.py CHANGED Viewed

@@ -25,8 +25,24 @@ from pathlib import Path
 import yaml
 REPO_ROOT = Path(__file__).resolve().parent.parent
-SRC_RULES = REPO_ROOT / ".agent-src.uncondensed" / "rules"
-PROJECTED_RULES = REPO_ROOT / ".augment" / "rules"
+sys.path.insert(0, str(REPO_ROOT / "scripts"))
+from _lib.agent_src import artefact_roots  # noqa: E402
+# Pre-monorepo this was REPO_ROOT/.agent-src.uncondensed/rules. Post-move
+# (ADR-017) source rules live under packages/*/.agent-src.uncondensed/rules.
+def _src_rule_paths() -> list[Path]:
+    paths: list[Path] = []
+    seen: set[str] = set()
+    for root in artefact_roots():
+        d = root / "rules"
+        if d.is_dir():
+            for p in sorted(d.glob("*.md")):
+                if p.name not in seen:
+                    seen.add(p.name)
+                    paths.append(p)
+    return paths
+PROJECTED_RULES = REPO_ROOT / ".agent-src" / "rules"
 REPORT_DIR = REPO_ROOT / "agents" / "reports"
 JSON_OUT = REPORT_DIR / "auto-rules-audit.json"
 MD_OUT = REPORT_DIR / "auto-rules-audit.md"
@@ -67,7 +83,7 @@ def _trigger_summary(triggers: list) -> dict:
 def collect() -> list[dict]:
     rules: list[dict] = []
-    for path in sorted(SRC_RULES.glob("*.md")):
+    for path in _src_rule_paths():
         text = path.read_text(encoding="utf-8")
         fm, body = _split_frontmatter(text)
         if fm.get("type") != "auto":
@@ -107,7 +123,7 @@ def render_markdown(rules: list[dict]) -> str:
         "# Auto-Rule Audit",
         "",
         "Generated by `scripts/audit_auto_rules.py` for Phase 5 of",
-        "`agents/roadmaps/road-to-augment-limit-fit.md`. Re-run after",
+        "`agents/roadmaps/archive/road-to-augment-limit-fit.md`. Re-run after",
         "any rule add/merge/deprecate to refresh the baseline.",
         "",
         "## Totals",
@@ -141,8 +157,8 @@ def render_markdown(rules: list[dict]) -> str:
 def main() -> int:
-    if not SRC_RULES.is_dir():
-        print(f"❌  Missing source dir: {SRC_RULES}", file=sys.stderr)
+    if not _src_rule_paths():
+        print("❌  No source rules found under any artefact root's rules/", file=sys.stderr)
         return 1
     rules = collect()
     REPORT_DIR.mkdir(parents=True, exist_ok=True)

package/scripts/audit_command_surface.py CHANGED Viewed

@@ -37,7 +37,12 @@ from pathlib import Path
 from typing import List
 REPO_ROOT = Path(__file__).resolve().parent.parent
-DEFAULT_ROOT = REPO_ROOT / ".agent-src.uncondensed" / "commands"
+# Pre-monorepo: REPO_ROOT/.agent-src.uncondensed/commands. Post-move (ADR-017)
+# the core command surface lives under packages/core/.agent-src.uncondensed.
+# Fall back to the legacy path only if the packages layout is absent.
+_CORE_COMMANDS = REPO_ROOT / "packages" / "core" / ".agent-src.uncondensed" / "commands"
+_LEGACY_COMMANDS = REPO_ROOT / ".agent-src.uncondensed" / "commands"
+DEFAULT_ROOT = _CORE_COMMANDS if _CORE_COMMANDS.is_dir() else _LEGACY_COMMANDS
 REPORT_DIR = REPO_ROOT / "agents" / "reports"
 OUT_JSON = REPORT_DIR / "command-surface.json"
 OUT_MD = REPORT_DIR / "command-surface.md"

package/scripts/audit_initial_context.py ADDED Viewed

@@ -0,0 +1,210 @@
+#!/usr/bin/env python3
+"""Initial-context token audit (roadmap `road-to-lean-initial-context`).
+Serves three roadmap steps with one analyzer (no new analyzer where one
+exists — reuses `scripts/_lib/token_count.py`):
+- **0B.2** — always-on rule-body footprint per tool projection.
+- **0B.4** — description-catalog initial cost (skill + command name+desc).
+- **1.3** — unified `audit:tokens` surfacing per-tool initial-token estimate,
+  longest rules in tokens, and the description-catalog pool.
+`char != token`: every number is reported in both. GPT counts are exact when
+`tiktoken` is installed, else a documented proxy (see `token_count`).
+Usage:
+    python3 scripts/audit_initial_context.py            # markdown report → stdout
+    python3 scripts/audit_initial_context.py --json     # machine-readable
+    python3 scripts/audit_initial_context.py --write     # write report files
+    python3 scripts/audit_initial_context.py --fail-if-over-budget  # CI gate (1.4)
+Exit codes: 0 = ok (or no budget set); 1 = a measured surface exceeds its
+configured token budget (only with --fail-if-over-budget).
+"""
+from __future__ import annotations
+import argparse
+import datetime as _dt
+import glob
+import json
+import re
+import sys
+from pathlib import Path
+REPO_ROOT = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(REPO_ROOT / "scripts"))
+from _lib import token_count  # noqa: E402
+try:
+    import yaml
+except ImportError:  # pragma: no cover
+    sys.stderr.write("error: PyYAML required (pip install pyyaml)\n")
+    sys.exit(2)
+REPORT_DIR = REPO_ROOT / "internal" / "bench" / "reports"
+# Tools whose rules/ dir holds one .md per rule (full body projected today).
+DIR_RULE_TOOLS = (".claude", ".augment", ".cursor")
+# Tools whose always-on surface is a single monolithic file.
+MONOLITH_TOOLS = (".windsurfrules",)
+# Initial-token budget per surface (None = advisory only, no gate). These are
+# soft ceilings the audit can enforce once a baseline is agreed (1.4). Set
+# generously now; tighten as Phase 3 lands.
+BUDGETS: dict[str, int | None] = {
+    "rules.gpt": None,
+    "skill_catalog.gpt": None,
+    "command_catalog.gpt": None,
+}
+def _frontmatter(path: Path) -> dict:
+    try:
+        text = path.read_text(encoding="utf-8", errors="ignore")
+    except OSError:
+        return {}
+    m = re.match(r"^---\s*\n(.*?)\n---\s*\n", text, re.DOTALL)
+    if not m:
+        return {}
+    try:
+        return yaml.safe_load(m.group(1)) or {}
+    except yaml.YAMLError:
+        return {}
+def _measure_files(paths: list[Path]) -> dict:
+    blob = "".join(p.read_text(encoding="utf-8", errors="ignore") for p in paths)
+    out = token_count.measure(blob)
+    out["files"] = len(paths)
+    return out
+def rule_footprint() -> dict:
+    """0B.2 — always-on rule footprint per tool."""
+    tools: dict[str, dict] = {}
+    for tool in DIR_RULE_TOOLS:
+        files = sorted((REPO_ROOT / tool / "rules").glob("*.md"))
+        if files:
+            tools[tool] = _measure_files(files)
+    for tool in MONOLITH_TOOLS:
+        f = REPO_ROOT / tool
+        if f.is_file():
+            m = token_count.measure(f.read_text(encoding="utf-8", errors="ignore"))
+            m["files"] = 1
+            tools[tool] = m
+    return tools
+def _catalog(glob_pat: str) -> dict:
+    entries = []
+    for f in glob.glob(str(REPO_ROOT / glob_pat), recursive=True):
+        fm = _frontmatter(Path(f))
+        name = fm.get("name") or Path(f).parent.name
+        desc = fm.get("description", "")
+        if desc:
+            entries.append(f"{name}: {desc}")
+    m = token_count.measure("\n".join(entries))
+    m["entries"] = len(entries)
+    return m
+def description_catalog() -> dict:
+    """0B.4 — description-catalog cost (eager progressive-disclosure surface)."""
+    return {
+        "skills_projected": _catalog(".claude/skills/*/SKILL.md"),
+        "skills_core_source": _catalog("packages/core/.agent-src.uncondensed/skills/*/SKILL.md"),
+        "commands_core_source": _catalog("packages/core/.agent-src.uncondensed/commands/**/*.md"),
+    }
+def longest_rules(top: int = 10) -> list[dict]:
+    """1.3 — longest rules in tokens (the trim candidates)."""
+    rows = []
+    for tool in DIR_RULE_TOOLS:
+        d = REPO_ROOT / tool / "rules"
+        if d.is_dir():
+            for p in d.glob("*.md"):
+                m = token_count.measure(p.read_text(encoding="utf-8", errors="ignore"))
+                rows.append({"id": p.stem, "tokens_gpt": m["tokens_gpt"], "chars": m["chars"]})
+            break  # one tool is representative — bodies are identical across DIR tools
+    rows.sort(key=lambda r: (-r["tokens_gpt"], r["id"]))
+    return rows[:top]
+def build() -> dict:
+    return {
+        "generated": _dt.datetime.now(_dt.timezone.utc).isoformat(timespec="seconds"),
+        "token_method": token_count.method_note(),
+        "rule_footprint": rule_footprint(),
+        "description_catalog": description_catalog(),
+        "longest_rules": longest_rules(),
+    }
+def render_md(d: dict) -> str:
+    L = ["# Initial-context token audit", "",
+         f"- generated: `{d['generated']}`",
+         f"- token method: {d['token_method']}", "",
+         "## 0B.2 — always-on rule footprint per tool", "",
+         "| tool | files | chars | GPT tok | Claude tok |",
+         "|---|--:|--:|--:|--:|"]
+    for tool, m in d["rule_footprint"].items():
+        L.append(f"| `{tool}` | {m['files']} | {m['chars']:,} | {m['tokens_gpt']:,} | {m['tokens_claude']:,} |")
+    L += ["", "## 0B.4 — description-catalog cost (eager)", "",
+          "| catalog | entries | chars | GPT tok | Claude tok |",
+          "|---|--:|--:|--:|--:|"]
+    for name, m in d["description_catalog"].items():
+        L.append(f"| {name} | {m['entries']} | {m['chars']:,} | {m['tokens_gpt']:,} | {m['tokens_claude']:,} |")
+    L += ["", "## 1.3 — top-10 longest rules (token trim candidates)", "",
+          "| rule | GPT tok | chars |", "|---|--:|--:|"]
+    for r in d["longest_rules"]:
+        L.append(f"| `{r['id']}` | {r['tokens_gpt']:,} | {r['chars']:,} |")
+    L.append("")
+    return "\n".join(L)
+def main(argv: list[str] | None = None) -> int:
+    ap = argparse.ArgumentParser(description=__doc__.splitlines()[0])
+    ap.add_argument("--json", action="store_true")
+    ap.add_argument("--write", action="store_true", help="write report files under internal/bench/reports/")
+    ap.add_argument("--fail-if-over-budget", action="store_true",
+                    help="exit 1 if a surface exceeds its configured token budget (1.4)")
+    args = ap.parse_args(argv)
+    data = build()
+    if args.fail_if_over_budget:
+        breaches = []
+        rf = next(iter(data["rule_footprint"].values()), {})
+        checks = {
+            "rules.gpt": rf.get("tokens_gpt", 0),
+            "skill_catalog.gpt": data["description_catalog"]["skills_projected"]["tokens_gpt"],
+            "command_catalog.gpt": data["description_catalog"]["commands_core_source"]["tokens_gpt"],
+        }
+        for key, val in checks.items():
+            cap = BUDGETS.get(key)
+            if cap is not None and val > cap:
+                breaches.append(f"{key} {val} > budget {cap}")
+        if breaches:
+            print("❌  initial-context budget: " + "; ".join(breaches))
+            return 1
+        print("✅  initial-context budget: pass (or advisory-only)")
+        return 0
+    if args.json:
+        print(json.dumps(data, indent=2, sort_keys=True))
+    else:
+        print(render_md(data))
+    if args.write:
+        REPORT_DIR.mkdir(parents=True, exist_ok=True)
+        (REPORT_DIR / "projection-cost.json").write_text(
+            json.dumps(data, indent=2, sort_keys=True), encoding="utf-8")
+        (REPORT_DIR / "projection-cost.md").write_text(render_md(data), encoding="utf-8")
+        print(f"\n→ wrote {REPORT_DIR.relative_to(REPO_ROOT)}/projection-cost.{{json,md}}")
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

package/scripts/bench_ab_diff.py CHANGED Viewed

@@ -10,7 +10,7 @@ Inputs: two report JSON paths. Output: a JSON artefact under
 The diff content depends on the corpus:
 - `ab-tracka` — trigger-accuracy %, false-positive count, per-rule lift.
-- `ab-trackb` — completion-rate per category, wall-time, tokens, cost,
+- `ab-trackb` — completion-rate per category, wall-time, tokens,
   ask-vs-act ratio, tool-call count.
 Phase 2 only writes the structural skeleton (delta object with `with`,
@@ -74,7 +74,7 @@ def compute_track_a_diff(with_results: dict, without_results: dict) -> dict:
 def compute_track_b_diff(with_results: dict, without_results: dict) -> dict:
-    """Track B: completion rate per category + wall-time + tokens + cost + ask-vs-act."""
+    """Track B: completion rate per category + wall-time + tokens + ask-vs-act."""
     def mean(d: dict, key: str) -> float:
         try:
             return float(d.get(key, 0.0))
@@ -111,15 +111,8 @@ def compute_track_b_diff(with_results: dict, without_results: dict) -> dict:
                 3,
             ),
         },
-        "cost_usd": {
-            "with": mean(with_results, "mean_cost_usd"),
-            "without": mean(without_results, "mean_cost_usd"),
-            "delta": round(
-                mean(with_results, "mean_cost_usd")
-                - mean(without_results, "mean_cost_usd"),
-                4,
-            ),
-        },
+        # cost_usd comparison intentionally omitted — API pricing misleads
+        # subscription users; tokens are the currency-neutral metric.
         "ask_vs_act_ratio": {
             "with": mean(with_results, "ask_vs_act_ratio"),
             "without": mean(without_results, "ask_vs_act_ratio"),

package/scripts/bench_run.py CHANGED Viewed

@@ -150,7 +150,7 @@ def main(argv: list[str] | None = None) -> int:
     headline = (
         f"bench {report['corpus']['id']} · "
         f"selection {sel['selection_accuracy']:.2%} ({verdict['selection']}) · "
-        f"cost ${cost['totals']['total_cost_usd']:.6f} ({cost.get('source', 'n/a')}) · "
+        f"tokens {cost.get('source', 'n/a')} · "
         f"quality {qual['quality_score']:.2%} ({verdict['quality']}) · "
         f"overall {verdict['overall']}"
     )
@@ -252,8 +252,7 @@ def _run_telegraph(args: argparse.Namespace) -> int:
         f"telegraph · prompts {report['corpus']['prompt_count']} · "
         f"calls {cost['totals']['calls']} · errors {cost['totals']['errors']} · "
         f"vs_raw med {report['telegraph']['aggregate']['savings_vs_raw']['median']:.2%} · "
-        f"vs_terse med {report['telegraph']['aggregate']['savings_vs_terse']['median']:.2%} · "
-        f"cost ${cost['totals']['total_cost_usd']:.6f}"
+        f"vs_terse med {report['telegraph']['aggregate']['savings_vs_terse']['median']:.2%}"
     )
     if args.quiet:
         print(headline)

package/scripts/bench_runner.py CHANGED Viewed

@@ -2,7 +2,7 @@
 """Bench runner for the eval corpora — step-4 measurement-and-benchmark Phase 1.
 Deterministic, no-API skill-selection baseline. For each prompt in a
-corpus YAML, ranks the 210 skills in `.agent-src.uncondensed/skills/`
+corpus YAML, ranks the skills in the projected catalog `.agent-src/skills/`
 by keyword overlap between the prompt text and each skill's
 `description` frontmatter field. Reports selection accuracy as
 `top-K contains >= 1 expected_skill`.
@@ -33,7 +33,7 @@ except ImportError:
     sys.exit(2)
 REPO_ROOT = Path(__file__).resolve().parent.parent
-SKILLS_DIR = REPO_ROOT / ".agent-src.uncondensed" / "skills"
+SKILLS_DIR = REPO_ROOT / ".agent-src" / "skills"
 CORPUS_DIR = REPO_ROOT / "tests" / "eval"
 STOPWORDS = frozenset({

package/scripts/condense.py CHANGED Viewed

@@ -144,6 +144,31 @@ def _read_augment_rules_use_symlinks() -> bool:
     return False
+def _lean_projection_mode() -> str:
+    """Read lean_projection.mode from .agent-settings.yml.
+    `eager-all` (default) → every rule body inlined into every projection
+    (today's behaviour). `thin` → kernel full-bodied + non-kernel rules as
+    router-resolved pointers (lean-initial-context Phase 3.1; ~36k GPT tok
+    lighter, measured). Missing / malformed → `eager-all`, so the thin path
+    is strictly opt-in and one-flip-revertible (see docs/contracts/rule-router.md
+    § Kill-switch). The flip MUST be live-A/B-validated before it ships as the
+    default — a thin projection only holds behaviour if the agent resolves the
+    pointer on trigger-match.
+    """
+    try:
+        from scripts._lib.agent_settings import load_agent_settings
+    except ImportError:  # pragma: no cover — script-style invocation
+        import sys as _sys
+        from pathlib import Path as _Path
+        _sys.path.insert(0, str(_Path(__file__).resolve().parent))
+        from _lib.agent_settings import load_agent_settings  # type: ignore[import-not-found]
+    data = load_agent_settings(project_path=SETTINGS_FILE)
+    lean = data.get("lean_projection")
+    if isinstance(lean, dict) and str(lean.get("mode", "")).strip().lower() == "thin":
+        return "thin"
+    return "eager-all"
 def file_hash(filepath: Path) -> str:
@@ -654,6 +679,18 @@ def generate_rule_symlinks() -> int:
     # All .md files in .agent-src/rules/ — not just universal ones
     rules = sorted([f.name for f in RULES_SOURCE.glob("*.md")])
     tool_dirs = _filter_tool_dirs(TOOL_DIRS)
+    # Thin-projection opt-in (lean-initial-context Phase 3.1). Default
+    # `eager-all` keeps the symlink behaviour below untouched; `thin` writes
+    # kernel rules full + non-kernel rules as router-resolved pointers.
+    thin_files: dict[str, str] | None = None
+    if _lean_projection_mode() == "thin":
+        try:
+            from scripts.project_thin_rules import build_thin
+        except ImportError:  # pragma: no cover — script-style invocation
+            from project_thin_rules import build_thin  # type: ignore[import-not-found]
+        thin_files = build_thin(RULES_SOURCE)
     total = 0
     for tool_dir, rel_prefix in tool_dirs.items():
         target_dir = PROJECT_ROOT / tool_dir
@@ -666,17 +703,21 @@ def generate_rule_symlinks() -> int:
         for rule in rules:
             link = target_dir / rule
-            target = Path(rel_prefix) / rule
             if link.exists() or link.is_symlink():
                 link.unlink()
-            link.symlink_to(target)
+            if thin_files is not None:
+                # Thin mode: write a real file (kernel full / non-kernel pointer),
+                # not a symlink to the full source body.
+                link.write_text(thin_files[rule], encoding="utf-8")
+            else:
+                link.symlink_to(Path(rel_prefix) / rule)
             total += 1
     # Verify counts match across all tool directories
     source_count = len(rules)
     for tool_dir in tool_dirs:
         target_dir = PROJECT_ROOT / tool_dir
-        tool_count = len([f for f in target_dir.iterdir() if f.is_symlink() and f.suffix == ".md"])
+        tool_count = len([f for f in target_dir.iterdir() if f.suffix == ".md"])
         if tool_count != source_count:
             print(f"  ⚠️  {tool_dir}: {tool_count} rules (expected {source_count})")