@event4u/agent-config 5.4.1 → 5.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/.agent-src/commands/image/analyse.md +51 -0
  2. package/.agent-src/commands/image/create.md +53 -0
  3. package/.agent-src/commands/image/verify.md +48 -0
  4. package/.agent-src/commands/image.md +69 -0
  5. package/.agent-src/commands/knowledge/cross-repo.md +71 -0
  6. package/.agent-src/commands/knowledge.md +2 -0
  7. package/.agent-src/commands/skill/preview.md +67 -0
  8. package/.agent-src/commands/skill.md +48 -0
  9. package/.agent-src/commands/skills/discover.md +76 -0
  10. package/.agent-src/commands/skills.md +56 -0
  11. package/.agent-src/commands/video/from-song.md +351 -0
  12. package/.agent-src/commands/video.md +19 -9
  13. package/.agent-src/contexts/authority/commit-mechanics.md +8 -0
  14. package/.agent-src/rules/commit-policy.md +3 -8
  15. package/.agent-src/rules/linked-projects-onboarding-gate.md +1 -1
  16. package/.agent-src/rules/media-sync-ground-truth.md +58 -0
  17. package/.agent-src/skills/image-analyser/SKILL.md +121 -0
  18. package/.agent-src/skills/image-analyser/canon-spec.md +109 -0
  19. package/.agent-src/skills/image-analyser/evals/triggers.json +16 -0
  20. package/.agent-src/skills/image-creator/SKILL.md +117 -0
  21. package/.agent-src/skills/image-creator/evals/triggers.json +16 -0
  22. package/.agent-src/skills/song-to-script/SKILL.md +216 -0
  23. package/.claude-plugin/marketplace.json +15 -2
  24. package/CHANGELOG.md +84 -0
  25. package/CONTRIBUTING.md +6 -0
  26. package/README.md +3 -3
  27. package/config/agent-settings.template.yml +18 -0
  28. package/dist/cli/registry.js +1 -0
  29. package/dist/cli/registry.js.map +1 -1
  30. package/dist/discovery/deprecation-report.md +1 -1
  31. package/dist/discovery/discovery-manifest.json +327 -20
  32. package/dist/discovery/discovery-manifest.json.sha256 +1 -1
  33. package/dist/discovery/discovery-manifest.summary.md +4 -4
  34. package/dist/discovery/orphan-report.md +1 -1
  35. package/dist/discovery/packs.json +24 -10
  36. package/dist/discovery/trust-report.md +3 -3
  37. package/dist/discovery/workspaces.json +20 -6
  38. package/dist/mcp/registry-manifest.json +3 -3
  39. package/dist/router.json +1 -1
  40. package/dist/server/schemas/settings.js +4 -0
  41. package/dist/server/schemas/settings.js.map +1 -1
  42. package/docs/architecture.md +3 -3
  43. package/docs/catalog.md +20 -6
  44. package/docs/contracts/benchmark-report-schema.md +12 -10
  45. package/docs/contracts/command-clusters.md +5 -1
  46. package/docs/contracts/cross-repo-retrieval.md +64 -0
  47. package/docs/contracts/rule-router.md +39 -0
  48. package/docs/contracts/skill-discovery.md +80 -0
  49. package/docs/contracts/skill-dry-run.md +47 -0
  50. package/docs/contracts/value-dashboard-spec.md +7 -3
  51. package/docs/contracts/value-report-schema.md +6 -1
  52. package/docs/decisions/ADR-032-linked-projects-scope.md +7 -3
  53. package/docs/getting-started.md +2 -2
  54. package/docs/guides/cross-repo-linked-projects.md +7 -0
  55. package/docs/guides/cross-repo-retrieval.md +61 -0
  56. package/docs/guides/skill-discovery.md +71 -0
  57. package/docs/guides/skill-preview.md +71 -0
  58. package/docs/value.md +17 -17
  59. package/package.json +1 -1
  60. package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
  61. package/scripts/_dispatch.bash +10 -0
  62. package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
  63. package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
  64. package/scripts/_lib/bench_report.py +13 -14
  65. package/scripts/_lib/bench_telegraph_report.py +1 -2
  66. package/scripts/_lib/token_count.py +95 -0
  67. package/scripts/_lib/value_report.py +3 -3
  68. package/scripts/ai-video/adapters/higgsfield.sh +163 -6
  69. package/scripts/ai-video/adapters/openai-images.sh +92 -6
  70. package/scripts/ai-video/lib/probe-audio.sh +181 -0
  71. package/scripts/audit_auto_rules.py +22 -6
  72. package/scripts/audit_command_surface.py +6 -1
  73. package/scripts/audit_initial_context.py +210 -0
  74. package/scripts/bench_ab_diff.py +4 -11
  75. package/scripts/bench_run.py +2 -3
  76. package/scripts/bench_runner.py +2 -2
  77. package/scripts/condense.py +44 -3
  78. package/scripts/cross_repo_retrieve.py +172 -0
  79. package/scripts/inventory_meta_layers.py +288 -0
  80. package/scripts/iron_law_sha.py +14 -5
  81. package/scripts/linked_projects_list.py +91 -0
  82. package/scripts/measure_rule_budget.py +15 -0
  83. package/scripts/memory_lookup.py +53 -2
  84. package/scripts/project_thin_rules.py +168 -0
  85. package/scripts/render_value_md.py +14 -23
  86. package/scripts/schemas/command.schema.json +1 -1
  87. package/scripts/schemas/rule.schema.json +1 -1
  88. package/scripts/schemas/skill.schema.json +2 -2
  89. package/scripts/skill_discovery.py +254 -0
  90. package/scripts/skill_linter.py +8 -4
  91. package/scripts/skill_preview.py +179 -0
  92. package/scripts/trigger_coverage.py +129 -0
@@ -0,0 +1,181 @@
1
+ #!/usr/bin/env bash
2
+ # probe-audio.sh — turn a song file into a deterministic, network-free
3
+ # JSON summary the `song-to-script` skill maps to scenes:
4
+ #
5
+ # {"duration": <seconds>,
6
+ # "method": "silence" | "rms" | "interval",
7
+ # "warning": "<present only for the interval fallback>",
8
+ # "sections": [{"start":0.0,"end":12.5,"energy":0.41,"label":"intro"}, ...]}
9
+ #
10
+ # HONEST FRAMING (AI-council design review, 2026-05-30): this is energy /
11
+ # silence segmentation, NOT beat detection or musical analysis. Modern
12
+ # masters are brick-walled (near-constant RMS), so a real cut structure
13
+ # is often absent. The probe therefore degrades through three methods and
14
+ # always reports which one produced the anchors:
15
+ #
16
+ # 1. silence — ffmpeg silencedetect found real quiet gaps → true cuts.
17
+ # 2. rms — no usable silence; greedy-merge per-window RMS energy.
18
+ # 3. interval — track is structurally flat (brick-walled / sustained):
19
+ # fall back to fixed-interval cuts and SET `warning` so the
20
+ # caller (and the operator) knows timing is not musical.
21
+ #
22
+ # Sections are cut anchors, never a transcription. For beat-accurate cuts
23
+ # the operator passes `--scene-durations` to /video:from-song instead.
24
+ #
25
+ # Usage:
26
+ # probe-audio.sh <song-file> [--window <seconds>] [--interval <seconds>]
27
+ # [--silence-db <dB>] [--silence-min <seconds>]
28
+ #
29
+ # --window RMS analysis window (default 3)
30
+ # --interval fixed-interval fallback section length (default 15)
31
+ # --silence-db silencedetect noise floor (default -30)
32
+ # --silence-min silencedetect minimum gap to count as a boundary (default 0.5)
33
+ #
34
+ # Exit codes:
35
+ # 0 JSON written to stdout
36
+ # 2 usage / file missing
37
+ # 3 required tool missing (ffprobe / ffmpeg)
38
+ # 4 no audio stream in the file
39
+
40
+ set -euo pipefail
41
+
42
+ die() { printf 'probe-audio: %s\n' "$2" >&2; exit "$1"; }
43
+
44
+ [ "$#" -ge 1 ] || die 2 "usage: $0 <song-file> [--window <s>] [--interval <s>] [--silence-db <dB>] [--silence-min <s>]"
45
+
46
+ song="$1"; shift || true
47
+ window=3
48
+ interval=15
49
+ silence_db=-30
50
+ silence_min=0.5
51
+ while [ "$#" -gt 0 ]; do
52
+ case "$1" in
53
+ --window) window="${2:-3}"; shift 2 ;;
54
+ --interval) interval="${2:-15}"; shift 2 ;;
55
+ --silence-db) silence_db="${2:--30}"; shift 2 ;;
56
+ --silence-min) silence_min="${2:-0.5}"; shift 2 ;;
57
+ *) die 2 "unknown arg: $1" ;;
58
+ esac
59
+ done
60
+
61
+ [ -f "${song}" ] || die 2 "file not found: ${song}"
62
+ command -v ffprobe >/dev/null 2>&1 || die 3 "ffprobe not found"
63
+ command -v ffmpeg >/dev/null 2>&1 || die 3 "ffmpeg not found"
64
+
65
+ # --- 1. duration + audio-stream check ----------------------------------
66
+ duration="$(ffprobe -v error -select_streams a:0 \
67
+ -show_entries format=duration -of default=nk=1:nw=1 "${song}" 2>/dev/null || true)"
68
+ [ -n "${duration}" ] || die 4 "no audio stream in: ${song}"
69
+
70
+ # --- 2. per-window RMS energy via astats --------------------------------
71
+ # Slice the track into <window>-second chunks; read mean RMS level (dB),
72
+ # normalise to 0..1 where -60dB→0 and 0dB→1. These energies feed BOTH the
73
+ # rms-merge method and the per-section labelling of every method.
74
+ n_windows="$(awk -v d="${duration}" -v w="${window}" 'BEGIN{
75
+ n = int(d / w); if (n * w < d) n++; if (n < 1) n = 1; print n }')"
76
+
77
+ win_starts=""; win_energy=""
78
+ i=0
79
+ while [ "${i}" -lt "${n_windows}" ]; do
80
+ start="$(awk -v i="${i}" -v w="${window}" 'BEGIN{printf "%.3f", i*w}')"
81
+ rms_db="$(ffmpeg -hide_banner -nostats -ss "${start}" -t "${window}" -i "${song}" \
82
+ -af astats=metadata=1:reset=1 -f null - 2>&1 \
83
+ | awk -F': ' '/RMS level dB/ {v=$2} END{print v}')"
84
+ case "${rms_db}" in ""|*inf*|*nan*) rms_db=-60 ;; esac
85
+ norm="$(awk -v x="${rms_db}" 'BEGIN{
86
+ v=(x+60)/60; if(v<0)v=0; if(v>1)v=1; printf "%.3f", v }')"
87
+ win_starts="${win_starts}${start}\n"
88
+ win_energy="${win_energy}${norm}\n"
89
+ i=$((i + 1))
90
+ done
91
+
92
+ # --- 3. silencedetect boundaries ----------------------------------------
93
+ # Real quiet gaps split the track at musically-meaningful points far more
94
+ # reliably than RMS deltas on a compressed master. Collect the midpoints
95
+ # of detected silences as candidate section boundaries.
96
+ sil_bounds="$(ffmpeg -hide_banner -nostats -i "${song}" \
97
+ -af "silencedetect=noise=${silence_db}dB:d=${silence_min}" -f null - 2>&1 \
98
+ | awk '
99
+ /silence_start/ { for(i=1;i<=NF;i++) if($i=="silence_start:") s=$(i+1) }
100
+ /silence_end/ { for(i=1;i<=NF;i++) if($i=="silence_end:") { e=$(i+1); printf "%.3f\n", (s+e)/2 } }
101
+ ' 2>/dev/null || true)"
102
+ n_sil="$(printf '%s' "${sil_bounds}" | sed '/^$/d' | wc -l | tr -d ' ')"
103
+
104
+ # --- 4. choose method + build section boundaries ------------------------
105
+ # A method needs >= 3 sections (>= 2 internal boundaries) to count as
106
+ # "structure found"; otherwise degrade to the next method.
107
+ method=""
108
+ boundaries="" # internal cut points (excluding 0 and duration)
109
+
110
+ if [ "${n_sil}" -ge 2 ]; then
111
+ method="silence"
112
+ boundaries="$(printf '%s\n' "${sil_bounds}" | sed '/^$/d' \
113
+ | awk -v d="${duration}" '$1>0.5 && $1<d-0.5' | sort -n | uniq)"
114
+ fi
115
+
116
+ if [ -z "${method}" ]; then
117
+ # greedy-merge adjacent RMS windows; keep a boundary on energy delta > 0.12
118
+ rms_bounds="$(paste <(printf '%b' "${win_starts}") <(printf '%b' "${win_energy}") \
119
+ | awk -v d="${duration}" '
120
+ { st[NR]=$1; en[NR]=$2; cnt=NR }
121
+ END {
122
+ prev=en[1]
123
+ for(k=2;k<=cnt;k++){
124
+ if ((en[k]-prev>0.12)||(prev-en[k]>0.12)) { if(st[k]>0.5 && st[k]<d-0.5) print st[k] }
125
+ prev=en[k]
126
+ }
127
+ }')"
128
+ n_rms="$(printf '%s' "${rms_bounds}" | sed '/^$/d' | wc -l | tr -d ' ')"
129
+ if [ "${n_rms}" -ge 2 ]; then
130
+ method="rms"
131
+ boundaries="$(printf '%s\n' "${rms_bounds}" | sed '/^$/d' | sort -n | uniq)"
132
+ fi
133
+ fi
134
+
135
+ warning=""
136
+ if [ -z "${method}" ]; then
137
+ method="interval"
138
+ warning="track is structurally flat (no usable silence or energy structure); sections are fixed ${interval}s intervals, not musical cuts"
139
+ boundaries="$(awk -v d="${duration}" -v iv="${interval}" 'BEGIN{
140
+ for(t=iv; t<d-0.5; t+=iv) printf "%.3f\n", t }')"
141
+ fi
142
+
143
+ # --- 5. assemble sections, label, emit JSON -----------------------------
144
+ # Boundaries → [0, b1, b2, ..., duration] section edges. Energy per section
145
+ # = mean of the RMS windows whose start falls inside it.
146
+ printf '%s' "${boundaries}" \
147
+ | sed '/^$/d' \
148
+ | awk -v d="${duration}" -v method="${method}" -v warning="${warning}" \
149
+ -v wins="$(printf '%b' "${win_starts}")" -v ens="$(printf '%b' "${win_energy}")" '
150
+ BEGIN {
151
+ nw=split(wins, ws, "\n"); split(ens, es, "\n")
152
+ # build edges
153
+ ne=0; edges[ne++]=0
154
+ }
155
+ { edges[ne++]=$1+0 }
156
+ END {
157
+ edges[ne++]=d+0
158
+ # mean energy across all windows for relative labelling
159
+ sum=0; c=0
160
+ for(k=1;k<=nw;k++){ if(ws[k]!=""){ sum+=es[k]; c++ } }
161
+ mean=(c?sum/c:0)
162
+ printf "{\"duration\": %.3f, \"method\": \"%s\"", d, method
163
+ if (warning != "") { gsub(/"/,"\\\"",warning); printf ", \"warning\": \"%s\"", warning }
164
+ printf ", \"sections\": ["
165
+ segs=ne-1
166
+ for(j=0;j<segs;j++){
167
+ s=edges[j]; e=edges[j+1]
168
+ # mean energy of windows starting within [s,e)
169
+ es_sum=0; es_c=0
170
+ for(k=1;k<=nw;k++){ if(ws[k]!=""){ if(ws[k]+0>=s && ws[k]+0<e){ es_sum+=es[k]; es_c++ } } }
171
+ energy=(es_c?es_sum/es_c:mean)
172
+ if (j==0) label="intro"
173
+ else if (j==segs-1) label=(energy<mean?"outro":"drop")
174
+ else if (energy>=mean+0.10) label="drop"
175
+ else if (energy<=mean-0.10) label="breakdown"
176
+ else label="build"
177
+ sep=(j<segs-1)?",":""
178
+ printf "{\"start\": %.3f, \"end\": %.3f, \"energy\": %.3f, \"label\": \"%s\"}%s", s, e, energy, label, sep
179
+ }
180
+ print "]}"
181
+ }'
@@ -25,8 +25,24 @@ from pathlib import Path
25
25
  import yaml
26
26
 
27
27
  REPO_ROOT = Path(__file__).resolve().parent.parent
28
- SRC_RULES = REPO_ROOT / ".agent-src.uncondensed" / "rules"
29
- PROJECTED_RULES = REPO_ROOT / ".augment" / "rules"
28
+ sys.path.insert(0, str(REPO_ROOT / "scripts"))
29
+ from _lib.agent_src import artefact_roots # noqa: E402
30
+
31
+ # Pre-monorepo this was REPO_ROOT/.agent-src.uncondensed/rules. Post-move
32
+ # (ADR-017) source rules live under packages/*/.agent-src.uncondensed/rules.
33
+ def _src_rule_paths() -> list[Path]:
34
+ paths: list[Path] = []
35
+ seen: set[str] = set()
36
+ for root in artefact_roots():
37
+ d = root / "rules"
38
+ if d.is_dir():
39
+ for p in sorted(d.glob("*.md")):
40
+ if p.name not in seen:
41
+ seen.add(p.name)
42
+ paths.append(p)
43
+ return paths
44
+
45
+ PROJECTED_RULES = REPO_ROOT / ".agent-src" / "rules"
30
46
  REPORT_DIR = REPO_ROOT / "agents" / "reports"
31
47
  JSON_OUT = REPORT_DIR / "auto-rules-audit.json"
32
48
  MD_OUT = REPORT_DIR / "auto-rules-audit.md"
@@ -67,7 +83,7 @@ def _trigger_summary(triggers: list) -> dict:
67
83
 
68
84
  def collect() -> list[dict]:
69
85
  rules: list[dict] = []
70
- for path in sorted(SRC_RULES.glob("*.md")):
86
+ for path in _src_rule_paths():
71
87
  text = path.read_text(encoding="utf-8")
72
88
  fm, body = _split_frontmatter(text)
73
89
  if fm.get("type") != "auto":
@@ -107,7 +123,7 @@ def render_markdown(rules: list[dict]) -> str:
107
123
  "# Auto-Rule Audit",
108
124
  "",
109
125
  "Generated by `scripts/audit_auto_rules.py` for Phase 5 of",
110
- "`agents/roadmaps/road-to-augment-limit-fit.md`. Re-run after",
126
+ "`agents/roadmaps/archive/road-to-augment-limit-fit.md`. Re-run after",
111
127
  "any rule add/merge/deprecate to refresh the baseline.",
112
128
  "",
113
129
  "## Totals",
@@ -141,8 +157,8 @@ def render_markdown(rules: list[dict]) -> str:
141
157
 
142
158
 
143
159
  def main() -> int:
144
- if not SRC_RULES.is_dir():
145
- print(f"❌ Missing source dir: {SRC_RULES}", file=sys.stderr)
160
+ if not _src_rule_paths():
161
+ print("❌ No source rules found under any artefact root's rules/", file=sys.stderr)
146
162
  return 1
147
163
  rules = collect()
148
164
  REPORT_DIR.mkdir(parents=True, exist_ok=True)
@@ -37,7 +37,12 @@ from pathlib import Path
37
37
  from typing import List
38
38
 
39
39
  REPO_ROOT = Path(__file__).resolve().parent.parent
40
- DEFAULT_ROOT = REPO_ROOT / ".agent-src.uncondensed" / "commands"
40
+ # Pre-monorepo: REPO_ROOT/.agent-src.uncondensed/commands. Post-move (ADR-017)
41
+ # the core command surface lives under packages/core/.agent-src.uncondensed.
42
+ # Fall back to the legacy path only if the packages layout is absent.
43
+ _CORE_COMMANDS = REPO_ROOT / "packages" / "core" / ".agent-src.uncondensed" / "commands"
44
+ _LEGACY_COMMANDS = REPO_ROOT / ".agent-src.uncondensed" / "commands"
45
+ DEFAULT_ROOT = _CORE_COMMANDS if _CORE_COMMANDS.is_dir() else _LEGACY_COMMANDS
41
46
  REPORT_DIR = REPO_ROOT / "agents" / "reports"
42
47
  OUT_JSON = REPORT_DIR / "command-surface.json"
43
48
  OUT_MD = REPORT_DIR / "command-surface.md"
@@ -0,0 +1,210 @@
1
+ #!/usr/bin/env python3
2
+ """Initial-context token audit (roadmap `road-to-lean-initial-context`).
3
+
4
+ Serves three roadmap steps with one analyzer (no new analyzer where one
5
+ exists — reuses `scripts/_lib/token_count.py`):
6
+
7
+ - **0B.2** — always-on rule-body footprint per tool projection.
8
+ - **0B.4** — description-catalog initial cost (skill + command name+desc).
9
+ - **1.3** — unified `audit:tokens` surfacing per-tool initial-token estimate,
10
+ longest rules in tokens, and the description-catalog pool.
11
+
12
+ `char != token`: every number is reported in both. GPT counts are exact when
13
+ `tiktoken` is installed, else a documented proxy (see `token_count`).
14
+
15
+ Usage:
16
+ python3 scripts/audit_initial_context.py # markdown report → stdout
17
+ python3 scripts/audit_initial_context.py --json # machine-readable
18
+ python3 scripts/audit_initial_context.py --write # write report files
19
+ python3 scripts/audit_initial_context.py --fail-if-over-budget # CI gate (1.4)
20
+
21
+ Exit codes: 0 = ok (or no budget set); 1 = a measured surface exceeds its
22
+ configured token budget (only with --fail-if-over-budget).
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import argparse
28
+ import datetime as _dt
29
+ import glob
30
+ import json
31
+ import re
32
+ import sys
33
+ from pathlib import Path
34
+
35
+ REPO_ROOT = Path(__file__).resolve().parent.parent
36
+ sys.path.insert(0, str(REPO_ROOT / "scripts"))
37
+ from _lib import token_count # noqa: E402
38
+
39
+ try:
40
+ import yaml
41
+ except ImportError: # pragma: no cover
42
+ sys.stderr.write("error: PyYAML required (pip install pyyaml)\n")
43
+ sys.exit(2)
44
+
45
+ REPORT_DIR = REPO_ROOT / "internal" / "bench" / "reports"
46
+
47
+ # Tools whose rules/ dir holds one .md per rule (full body projected today).
48
+ DIR_RULE_TOOLS = (".claude", ".augment", ".cursor")
49
+ # Tools whose always-on surface is a single monolithic file.
50
+ MONOLITH_TOOLS = (".windsurfrules",)
51
+
52
+ # Initial-token budget per surface (None = advisory only, no gate). These are
53
+ # soft ceilings the audit can enforce once a baseline is agreed (1.4). Set
54
+ # generously now; tighten as Phase 3 lands.
55
+ BUDGETS: dict[str, int | None] = {
56
+ "rules.gpt": None,
57
+ "skill_catalog.gpt": None,
58
+ "command_catalog.gpt": None,
59
+ }
60
+
61
+
62
+ def _frontmatter(path: Path) -> dict:
63
+ try:
64
+ text = path.read_text(encoding="utf-8", errors="ignore")
65
+ except OSError:
66
+ return {}
67
+ m = re.match(r"^---\s*\n(.*?)\n---\s*\n", text, re.DOTALL)
68
+ if not m:
69
+ return {}
70
+ try:
71
+ return yaml.safe_load(m.group(1)) or {}
72
+ except yaml.YAMLError:
73
+ return {}
74
+
75
+
76
+ def _measure_files(paths: list[Path]) -> dict:
77
+ blob = "".join(p.read_text(encoding="utf-8", errors="ignore") for p in paths)
78
+ out = token_count.measure(blob)
79
+ out["files"] = len(paths)
80
+ return out
81
+
82
+
83
+ def rule_footprint() -> dict:
84
+ """0B.2 — always-on rule footprint per tool."""
85
+ tools: dict[str, dict] = {}
86
+ for tool in DIR_RULE_TOOLS:
87
+ files = sorted((REPO_ROOT / tool / "rules").glob("*.md"))
88
+ if files:
89
+ tools[tool] = _measure_files(files)
90
+ for tool in MONOLITH_TOOLS:
91
+ f = REPO_ROOT / tool
92
+ if f.is_file():
93
+ m = token_count.measure(f.read_text(encoding="utf-8", errors="ignore"))
94
+ m["files"] = 1
95
+ tools[tool] = m
96
+ return tools
97
+
98
+
99
+ def _catalog(glob_pat: str) -> dict:
100
+ entries = []
101
+ for f in glob.glob(str(REPO_ROOT / glob_pat), recursive=True):
102
+ fm = _frontmatter(Path(f))
103
+ name = fm.get("name") or Path(f).parent.name
104
+ desc = fm.get("description", "")
105
+ if desc:
106
+ entries.append(f"{name}: {desc}")
107
+ m = token_count.measure("\n".join(entries))
108
+ m["entries"] = len(entries)
109
+ return m
110
+
111
+
112
+ def description_catalog() -> dict:
113
+ """0B.4 — description-catalog cost (eager progressive-disclosure surface)."""
114
+ return {
115
+ "skills_projected": _catalog(".claude/skills/*/SKILL.md"),
116
+ "skills_core_source": _catalog("packages/core/.agent-src.uncondensed/skills/*/SKILL.md"),
117
+ "commands_core_source": _catalog("packages/core/.agent-src.uncondensed/commands/**/*.md"),
118
+ }
119
+
120
+
121
+ def longest_rules(top: int = 10) -> list[dict]:
122
+ """1.3 — longest rules in tokens (the trim candidates)."""
123
+ rows = []
124
+ for tool in DIR_RULE_TOOLS:
125
+ d = REPO_ROOT / tool / "rules"
126
+ if d.is_dir():
127
+ for p in d.glob("*.md"):
128
+ m = token_count.measure(p.read_text(encoding="utf-8", errors="ignore"))
129
+ rows.append({"id": p.stem, "tokens_gpt": m["tokens_gpt"], "chars": m["chars"]})
130
+ break # one tool is representative — bodies are identical across DIR tools
131
+ rows.sort(key=lambda r: (-r["tokens_gpt"], r["id"]))
132
+ return rows[:top]
133
+
134
+
135
+ def build() -> dict:
136
+ return {
137
+ "generated": _dt.datetime.now(_dt.timezone.utc).isoformat(timespec="seconds"),
138
+ "token_method": token_count.method_note(),
139
+ "rule_footprint": rule_footprint(),
140
+ "description_catalog": description_catalog(),
141
+ "longest_rules": longest_rules(),
142
+ }
143
+
144
+
145
+ def render_md(d: dict) -> str:
146
+ L = ["# Initial-context token audit", "",
147
+ f"- generated: `{d['generated']}`",
148
+ f"- token method: {d['token_method']}", "",
149
+ "## 0B.2 — always-on rule footprint per tool", "",
150
+ "| tool | files | chars | GPT tok | Claude tok |",
151
+ "|---|--:|--:|--:|--:|"]
152
+ for tool, m in d["rule_footprint"].items():
153
+ L.append(f"| `{tool}` | {m['files']} | {m['chars']:,} | {m['tokens_gpt']:,} | {m['tokens_claude']:,} |")
154
+ L += ["", "## 0B.4 — description-catalog cost (eager)", "",
155
+ "| catalog | entries | chars | GPT tok | Claude tok |",
156
+ "|---|--:|--:|--:|--:|"]
157
+ for name, m in d["description_catalog"].items():
158
+ L.append(f"| {name} | {m['entries']} | {m['chars']:,} | {m['tokens_gpt']:,} | {m['tokens_claude']:,} |")
159
+ L += ["", "## 1.3 — top-10 longest rules (token trim candidates)", "",
160
+ "| rule | GPT tok | chars |", "|---|--:|--:|"]
161
+ for r in d["longest_rules"]:
162
+ L.append(f"| `{r['id']}` | {r['tokens_gpt']:,} | {r['chars']:,} |")
163
+ L.append("")
164
+ return "\n".join(L)
165
+
166
+
167
+ def main(argv: list[str] | None = None) -> int:
168
+ ap = argparse.ArgumentParser(description=__doc__.splitlines()[0])
169
+ ap.add_argument("--json", action="store_true")
170
+ ap.add_argument("--write", action="store_true", help="write report files under internal/bench/reports/")
171
+ ap.add_argument("--fail-if-over-budget", action="store_true",
172
+ help="exit 1 if a surface exceeds its configured token budget (1.4)")
173
+ args = ap.parse_args(argv)
174
+
175
+ data = build()
176
+
177
+ if args.fail_if_over_budget:
178
+ breaches = []
179
+ rf = next(iter(data["rule_footprint"].values()), {})
180
+ checks = {
181
+ "rules.gpt": rf.get("tokens_gpt", 0),
182
+ "skill_catalog.gpt": data["description_catalog"]["skills_projected"]["tokens_gpt"],
183
+ "command_catalog.gpt": data["description_catalog"]["commands_core_source"]["tokens_gpt"],
184
+ }
185
+ for key, val in checks.items():
186
+ cap = BUDGETS.get(key)
187
+ if cap is not None and val > cap:
188
+ breaches.append(f"{key} {val} > budget {cap}")
189
+ if breaches:
190
+ print("❌ initial-context budget: " + "; ".join(breaches))
191
+ return 1
192
+ print("✅ initial-context budget: pass (or advisory-only)")
193
+ return 0
194
+
195
+ if args.json:
196
+ print(json.dumps(data, indent=2, sort_keys=True))
197
+ else:
198
+ print(render_md(data))
199
+
200
+ if args.write:
201
+ REPORT_DIR.mkdir(parents=True, exist_ok=True)
202
+ (REPORT_DIR / "projection-cost.json").write_text(
203
+ json.dumps(data, indent=2, sort_keys=True), encoding="utf-8")
204
+ (REPORT_DIR / "projection-cost.md").write_text(render_md(data), encoding="utf-8")
205
+ print(f"\n→ wrote {REPORT_DIR.relative_to(REPO_ROOT)}/projection-cost.{{json,md}}")
206
+ return 0
207
+
208
+
209
+ if __name__ == "__main__":
210
+ sys.exit(main())
@@ -10,7 +10,7 @@ Inputs: two report JSON paths. Output: a JSON artefact under
10
10
  The diff content depends on the corpus:
11
11
 
12
12
  - `ab-tracka` — trigger-accuracy %, false-positive count, per-rule lift.
13
- - `ab-trackb` — completion-rate per category, wall-time, tokens, cost,
13
+ - `ab-trackb` — completion-rate per category, wall-time, tokens,
14
14
  ask-vs-act ratio, tool-call count.
15
15
 
16
16
  Phase 2 only writes the structural skeleton (delta object with `with`,
@@ -74,7 +74,7 @@ def compute_track_a_diff(with_results: dict, without_results: dict) -> dict:
74
74
 
75
75
 
76
76
  def compute_track_b_diff(with_results: dict, without_results: dict) -> dict:
77
- """Track B: completion rate per category + wall-time + tokens + cost + ask-vs-act."""
77
+ """Track B: completion rate per category + wall-time + tokens + ask-vs-act."""
78
78
  def mean(d: dict, key: str) -> float:
79
79
  try:
80
80
  return float(d.get(key, 0.0))
@@ -111,15 +111,8 @@ def compute_track_b_diff(with_results: dict, without_results: dict) -> dict:
111
111
  3,
112
112
  ),
113
113
  },
114
- "cost_usd": {
115
- "with": mean(with_results, "mean_cost_usd"),
116
- "without": mean(without_results, "mean_cost_usd"),
117
- "delta": round(
118
- mean(with_results, "mean_cost_usd")
119
- - mean(without_results, "mean_cost_usd"),
120
- 4,
121
- ),
122
- },
114
+ # cost_usd comparison intentionally omitted — API pricing misleads
115
+ # subscription users; tokens are the currency-neutral metric.
123
116
  "ask_vs_act_ratio": {
124
117
  "with": mean(with_results, "ask_vs_act_ratio"),
125
118
  "without": mean(without_results, "ask_vs_act_ratio"),
@@ -150,7 +150,7 @@ def main(argv: list[str] | None = None) -> int:
150
150
  headline = (
151
151
  f"bench {report['corpus']['id']} · "
152
152
  f"selection {sel['selection_accuracy']:.2%} ({verdict['selection']}) · "
153
- f"cost ${cost['totals']['total_cost_usd']:.6f} ({cost.get('source', 'n/a')}) · "
153
+ f"tokens {cost.get('source', 'n/a')} · "
154
154
  f"quality {qual['quality_score']:.2%} ({verdict['quality']}) · "
155
155
  f"overall {verdict['overall']}"
156
156
  )
@@ -252,8 +252,7 @@ def _run_telegraph(args: argparse.Namespace) -> int:
252
252
  f"telegraph · prompts {report['corpus']['prompt_count']} · "
253
253
  f"calls {cost['totals']['calls']} · errors {cost['totals']['errors']} · "
254
254
  f"vs_raw med {report['telegraph']['aggregate']['savings_vs_raw']['median']:.2%} · "
255
- f"vs_terse med {report['telegraph']['aggregate']['savings_vs_terse']['median']:.2%} · "
256
- f"cost ${cost['totals']['total_cost_usd']:.6f}"
255
+ f"vs_terse med {report['telegraph']['aggregate']['savings_vs_terse']['median']:.2%}"
257
256
  )
258
257
  if args.quiet:
259
258
  print(headline)
@@ -2,7 +2,7 @@
2
2
  """Bench runner for the eval corpora — step-4 measurement-and-benchmark Phase 1.
3
3
 
4
4
  Deterministic, no-API skill-selection baseline. For each prompt in a
5
- corpus YAML, ranks the 210 skills in `.agent-src.uncondensed/skills/`
5
+ corpus YAML, ranks the skills in the projected catalog `.agent-src/skills/`
6
6
  by keyword overlap between the prompt text and each skill's
7
7
  `description` frontmatter field. Reports selection accuracy as
8
8
  `top-K contains >= 1 expected_skill`.
@@ -33,7 +33,7 @@ except ImportError:
33
33
  sys.exit(2)
34
34
 
35
35
  REPO_ROOT = Path(__file__).resolve().parent.parent
36
- SKILLS_DIR = REPO_ROOT / ".agent-src.uncondensed" / "skills"
36
+ SKILLS_DIR = REPO_ROOT / ".agent-src" / "skills"
37
37
  CORPUS_DIR = REPO_ROOT / "tests" / "eval"
38
38
 
39
39
  STOPWORDS = frozenset({
@@ -144,6 +144,31 @@ def _read_augment_rules_use_symlinks() -> bool:
144
144
  return False
145
145
 
146
146
 
147
+ def _lean_projection_mode() -> str:
148
+ """Read lean_projection.mode from .agent-settings.yml.
149
+
150
+ `eager-all` (default) → every rule body inlined into every projection
151
+ (today's behaviour). `thin` → kernel full-bodied + non-kernel rules as
152
+ router-resolved pointers (lean-initial-context Phase 3.1; ~36k GPT tok
153
+ lighter, measured). Missing / malformed → `eager-all`, so the thin path
154
+ is strictly opt-in and one-flip-revertible (see docs/contracts/rule-router.md
155
+ § Kill-switch). The flip MUST be live-A/B-validated before it ships as the
156
+ default — a thin projection only holds behaviour if the agent resolves the
157
+ pointer on trigger-match.
158
+ """
159
+ try:
160
+ from scripts._lib.agent_settings import load_agent_settings
161
+ except ImportError: # pragma: no cover — script-style invocation
162
+ import sys as _sys
163
+ from pathlib import Path as _Path
164
+ _sys.path.insert(0, str(_Path(__file__).resolve().parent))
165
+ from _lib.agent_settings import load_agent_settings # type: ignore[import-not-found]
166
+
167
+ data = load_agent_settings(project_path=SETTINGS_FILE)
168
+ lean = data.get("lean_projection")
169
+ if isinstance(lean, dict) and str(lean.get("mode", "")).strip().lower() == "thin":
170
+ return "thin"
171
+ return "eager-all"
147
172
 
148
173
 
149
174
  def file_hash(filepath: Path) -> str:
@@ -654,6 +679,18 @@ def generate_rule_symlinks() -> int:
654
679
  # All .md files in .agent-src/rules/ — not just universal ones
655
680
  rules = sorted([f.name for f in RULES_SOURCE.glob("*.md")])
656
681
  tool_dirs = _filter_tool_dirs(TOOL_DIRS)
682
+
683
+ # Thin-projection opt-in (lean-initial-context Phase 3.1). Default
684
+ # `eager-all` keeps the symlink behaviour below untouched; `thin` writes
685
+ # kernel rules full + non-kernel rules as router-resolved pointers.
686
+ thin_files: dict[str, str] | None = None
687
+ if _lean_projection_mode() == "thin":
688
+ try:
689
+ from scripts.project_thin_rules import build_thin
690
+ except ImportError: # pragma: no cover — script-style invocation
691
+ from project_thin_rules import build_thin # type: ignore[import-not-found]
692
+ thin_files = build_thin(RULES_SOURCE)
693
+
657
694
  total = 0
658
695
  for tool_dir, rel_prefix in tool_dirs.items():
659
696
  target_dir = PROJECT_ROOT / tool_dir
@@ -666,17 +703,21 @@ def generate_rule_symlinks() -> int:
666
703
 
667
704
  for rule in rules:
668
705
  link = target_dir / rule
669
- target = Path(rel_prefix) / rule
670
706
  if link.exists() or link.is_symlink():
671
707
  link.unlink()
672
- link.symlink_to(target)
708
+ if thin_files is not None:
709
+ # Thin mode: write a real file (kernel full / non-kernel pointer),
710
+ # not a symlink to the full source body.
711
+ link.write_text(thin_files[rule], encoding="utf-8")
712
+ else:
713
+ link.symlink_to(Path(rel_prefix) / rule)
673
714
  total += 1
674
715
 
675
716
  # Verify counts match across all tool directories
676
717
  source_count = len(rules)
677
718
  for tool_dir in tool_dirs:
678
719
  target_dir = PROJECT_ROOT / tool_dir
679
- tool_count = len([f for f in target_dir.iterdir() if f.is_symlink() and f.suffix == ".md"])
720
+ tool_count = len([f for f in target_dir.iterdir() if f.suffix == ".md"])
680
721
  if tool_count != source_count:
681
722
  print(f" ⚠️ {tool_dir}: {tool_count} rules (expected {source_count})")
682
723