@event4u/agent-config 5.5.0 → 5.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/image/analyse.md +51 -0
- package/.agent-src/commands/image/create.md +53 -0
- package/.agent-src/commands/image/verify.md +48 -0
- package/.agent-src/commands/image.md +69 -0
- package/.agent-src/commands/video/from-song.md +40 -6
- package/.agent-src/contexts/authority/commit-mechanics.md +8 -0
- package/.agent-src/rules/commit-policy.md +3 -8
- package/.agent-src/rules/media-sync-ground-truth.md +58 -0
- package/.agent-src/skills/image-analyser/SKILL.md +121 -0
- package/.agent-src/skills/image-analyser/canon-spec.md +109 -0
- package/.agent-src/skills/image-analyser/evals/triggers.json +16 -0
- package/.agent-src/skills/image-creator/SKILL.md +117 -0
- package/.agent-src/skills/image-creator/evals/triggers.json +16 -0
- package/.agent-src/skills/song-to-script/SKILL.md +36 -13
- package/.claude-plugin/marketplace.json +7 -1
- package/CHANGELOG.md +47 -0
- package/README.md +2 -2
- package/config/agent-settings.template.yml +18 -0
- package/dist/discovery/deprecation-report.md +1 -1
- package/dist/discovery/discovery-manifest.json +171 -18
- package/dist/discovery/discovery-manifest.json.sha256 +1 -1
- package/dist/discovery/discovery-manifest.summary.md +4 -4
- package/dist/discovery/orphan-report.md +1 -1
- package/dist/discovery/packs.json +15 -8
- package/dist/discovery/trust-report.md +3 -3
- package/dist/discovery/workspaces.json +13 -6
- package/dist/mcp/registry-manifest.json +3 -3
- package/dist/router.json +1 -1
- package/dist/server/schemas/settings.js +4 -0
- package/dist/server/schemas/settings.js.map +1 -1
- package/docs/architecture.md +3 -3
- package/docs/catalog.md +20 -6
- package/docs/contracts/benchmark-report-schema.md +12 -10
- package/docs/contracts/command-clusters.md +1 -0
- package/docs/contracts/rule-router.md +39 -0
- package/docs/contracts/value-dashboard-spec.md +7 -3
- package/docs/contracts/value-report-schema.md +6 -1
- package/docs/getting-started.md +2 -2
- package/docs/value.md +17 -17
- package/package.json +1 -1
- package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
- package/scripts/_lib/bench_report.py +13 -14
- package/scripts/_lib/bench_telegraph_report.py +1 -2
- package/scripts/_lib/token_count.py +95 -0
- package/scripts/_lib/value_report.py +3 -3
- package/scripts/ai-video/adapters/higgsfield.sh +163 -6
- package/scripts/ai-video/adapters/openai-images.sh +92 -6
- package/scripts/audit_auto_rules.py +22 -6
- package/scripts/audit_command_surface.py +6 -1
- package/scripts/audit_initial_context.py +210 -0
- package/scripts/bench_ab_diff.py +4 -11
- package/scripts/bench_run.py +2 -3
- package/scripts/bench_runner.py +2 -2
- package/scripts/condense.py +44 -3
- package/scripts/iron_law_sha.py +14 -5
- package/scripts/measure_rule_budget.py +15 -0
- package/scripts/project_thin_rules.py +168 -0
- package/scripts/render_value_md.py +14 -23
- package/scripts/schemas/command.schema.json +1 -1
- package/scripts/schemas/rule.schema.json +1 -1
- package/scripts/schemas/skill.schema.json +2 -2
- package/scripts/trigger_coverage.py +129 -0
|
@@ -42,12 +42,98 @@ aiv_cmd_run() {
|
|
|
42
42
|
seed="$(printf '%s' "${stdin_json}" | jq -r '.seed // empty')"
|
|
43
43
|
ref_first="$(printf '%s' "${stdin_json}" | jq -r '.ref_images[0] // empty')"
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
#
|
|
48
|
-
#
|
|
49
|
-
|
|
50
|
-
|
|
45
|
+
[ -n "${prompt}" ] || aiv_die 7 "${ADAPTER_ID}: empty prompt (prompt.* blocks required)"
|
|
46
|
+
|
|
47
|
+
# Images-generations API has no negative-prompt field — fold the
|
|
48
|
+
# negative list into the prompt as an explicit "Avoid:" clause.
|
|
49
|
+
local negative
|
|
50
|
+
negative="$(printf '%s' "${stdin_json}" | jq -r '(.negative // []) | join(", ")')"
|
|
51
|
+
[ -n "${negative}" ] && prompt="${prompt} Avoid: ${negative}."
|
|
52
|
+
|
|
53
|
+
# gpt-image-1 has no seed param. Log if present.
|
|
54
|
+
[ -n "${seed}" ] && printf '%s: seed=%s ignored (gpt-image-1 has no seed)\n' "${ADAPTER_ID}" "${seed}" >&2
|
|
55
|
+
: "${ref_first:=}"
|
|
56
|
+
|
|
57
|
+
# Resolve size from requested aspect (stdin .aspect overrides XML tuning).
|
|
58
|
+
local aspect quality size out
|
|
59
|
+
aspect="$(printf '%s' "${stdin_json}" | jq -r --arg a "${AIV_TUNING_ASPECT:-16:9}" '.aspect // $a')"
|
|
60
|
+
quality="${AIV_TUNING_QUALITY:-high}"
|
|
61
|
+
case "${aspect}" in
|
|
62
|
+
16:9|3:2|landscape) size="1536x1024" ;;
|
|
63
|
+
9:16|2:3|portrait) size="1024x1536" ;;
|
|
64
|
+
1:1|square) size="1024x1024" ;;
|
|
65
|
+
*) size="1536x1024" ;;
|
|
66
|
+
esac
|
|
67
|
+
|
|
68
|
+
# Output path: caller-set AIV_OUT wins; else a temp PNG.
|
|
69
|
+
out="${AIV_OUT:-}"
|
|
70
|
+
[ -n "${out}" ] || out="$(mktemp -t aiv-openai-XXXXXX).png"
|
|
71
|
+
|
|
72
|
+
# Collect reference image files. When present → /v1/images/edits
|
|
73
|
+
# (reference-conditioned, so the model adheres to the supplied
|
|
74
|
+
# character); otherwise plain text-to-image /v1/images/generations.
|
|
75
|
+
local -a ref_files=() tmp_files=()
|
|
76
|
+
local r tmp
|
|
77
|
+
while IFS= read -r r; do
|
|
78
|
+
[ -n "${r}" ] || continue
|
|
79
|
+
case "${r}" in
|
|
80
|
+
http://*|https://*)
|
|
81
|
+
tmp="$(mktemp -t aiv-ref-XXXXXX).png"
|
|
82
|
+
curl -sS -L -o "${tmp}" "${r}" || aiv_die 8 "${ADAPTER_ID}: failed to download ref image: ${r}"
|
|
83
|
+
ref_files+=("${tmp}"); tmp_files+=("${tmp}") ;;
|
|
84
|
+
*)
|
|
85
|
+
case "${r}" in /*) : ;; *) r="$(pwd)/${r}" ;; esac
|
|
86
|
+
[ -f "${r}" ] || aiv_die 7 "${ADAPTER_ID}: ref image not found: ${r}"
|
|
87
|
+
ref_files+=("${r}") ;;
|
|
88
|
+
esac
|
|
89
|
+
done < <(printf '%s' "${stdin_json}" | jq -r '.ref_images[]? // empty')
|
|
90
|
+
|
|
91
|
+
local req resp http_code body b64
|
|
92
|
+
if [ "${#ref_files[@]}" -gt 0 ]; then
|
|
93
|
+
# Reference-conditioned edit. gpt-image-1 accepts multiple image[] refs.
|
|
94
|
+
local -a fargs=(-F "model=${AIV_MODEL:-gpt-image-1}" -F "prompt=${prompt}" \
|
|
95
|
+
-F "size=${size}" -F "quality=${quality}" -F "n=1")
|
|
96
|
+
for r in "${ref_files[@]}"; do fargs+=(-F "image[]=@${r};type=image/png"); done
|
|
97
|
+
printf '%s: edits endpoint with %d reference image(s)\n' "${ADAPTER_ID}" "${#ref_files[@]}" >&2
|
|
98
|
+
resp="$(curl -sS -w '\n%{http_code}' \
|
|
99
|
+
-X POST "${AIV_ENDPOINT%/}/images/edits" \
|
|
100
|
+
-H "Authorization: Bearer ${AIV_KEY}" \
|
|
101
|
+
"${fargs[@]}")" \
|
|
102
|
+
|| aiv_die 8 "${ADAPTER_ID}: curl to ${AIV_ENDPOINT%/}/images/edits failed"
|
|
103
|
+
else
|
|
104
|
+
req="$(jq -n \
|
|
105
|
+
--arg m "${AIV_MODEL:-gpt-image-1}" --arg p "${prompt}" \
|
|
106
|
+
--arg s "${size}" --arg q "${quality}" \
|
|
107
|
+
'{model: $m, prompt: $p, size: $s, quality: $q, n: 1}')"
|
|
108
|
+
resp="$(curl -sS -w '\n%{http_code}' \
|
|
109
|
+
-X POST "${AIV_ENDPOINT%/}/images/generations" \
|
|
110
|
+
-H "Authorization: Bearer ${AIV_KEY}" \
|
|
111
|
+
-H "Content-Type: application/json" \
|
|
112
|
+
--data-binary "${req}")" \
|
|
113
|
+
|| aiv_die 8 "${ADAPTER_ID}: curl to ${AIV_ENDPOINT%/}/images/generations failed"
|
|
114
|
+
fi
|
|
115
|
+
# Clean up any downloaded temp refs (set -u safe on empty arrays).
|
|
116
|
+
for tmp in ${tmp_files[@]+"${tmp_files[@]}"}; do rm -f "${tmp}"; done
|
|
117
|
+
|
|
118
|
+
http_code="$(printf '%s' "${resp}" | tail -n1)"
|
|
119
|
+
body="$(printf '%s' "${resp}" | sed '$d')"
|
|
120
|
+
case "${http_code}" in
|
|
121
|
+
2*) : ;;
|
|
122
|
+
*) aiv_die 8 "${ADAPTER_ID}: HTTP ${http_code}: $(printf '%s' "${body}" | jq -r '.error.message // .error // "unknown error"' 2>/dev/null | head -c 300)" ;;
|
|
123
|
+
esac
|
|
124
|
+
|
|
125
|
+
# gpt-image-1 always returns base64 (no url).
|
|
126
|
+
b64="$(printf '%s' "${body}" | jq -r '.data[0].b64_json // empty')"
|
|
127
|
+
[ -n "${b64}" ] || aiv_die 8 "${ADAPTER_ID}: no image data in response (got: $(printf '%s' "${body}" | head -c 200))"
|
|
128
|
+
|
|
129
|
+
# Portable base64 decode (GNU -d / BSD -D).
|
|
130
|
+
local b64dec
|
|
131
|
+
if printf '' | base64 -d >/dev/null 2>&1; then b64dec='base64 -d'; else b64dec='base64 -D'; fi
|
|
132
|
+
printf '%s' "${b64}" | ${b64dec} > "${out}" \
|
|
133
|
+
|| aiv_die 8 "${ADAPTER_ID}: base64 decode to ${out} failed"
|
|
134
|
+
|
|
135
|
+
case "${out}" in /*) : ;; *) out="$(pwd)/${out}" ;; esac
|
|
136
|
+
jq -n --arg p "${out}" '{video_path: $p, audio_embedded: false}'
|
|
51
137
|
}
|
|
52
138
|
|
|
53
139
|
aiv_cmd_submit() { aiv_cmd_run "$@"; }
|
|
@@ -25,8 +25,24 @@ from pathlib import Path
|
|
|
25
25
|
import yaml
|
|
26
26
|
|
|
27
27
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
28
|
-
|
|
29
|
-
|
|
28
|
+
sys.path.insert(0, str(REPO_ROOT / "scripts"))
|
|
29
|
+
from _lib.agent_src import artefact_roots # noqa: E402
|
|
30
|
+
|
|
31
|
+
# Pre-monorepo this was REPO_ROOT/.agent-src.uncondensed/rules. Post-move
|
|
32
|
+
# (ADR-017) source rules live under packages/*/.agent-src.uncondensed/rules.
|
|
33
|
+
def _src_rule_paths() -> list[Path]:
|
|
34
|
+
paths: list[Path] = []
|
|
35
|
+
seen: set[str] = set()
|
|
36
|
+
for root in artefact_roots():
|
|
37
|
+
d = root / "rules"
|
|
38
|
+
if d.is_dir():
|
|
39
|
+
for p in sorted(d.glob("*.md")):
|
|
40
|
+
if p.name not in seen:
|
|
41
|
+
seen.add(p.name)
|
|
42
|
+
paths.append(p)
|
|
43
|
+
return paths
|
|
44
|
+
|
|
45
|
+
PROJECTED_RULES = REPO_ROOT / ".agent-src" / "rules"
|
|
30
46
|
REPORT_DIR = REPO_ROOT / "agents" / "reports"
|
|
31
47
|
JSON_OUT = REPORT_DIR / "auto-rules-audit.json"
|
|
32
48
|
MD_OUT = REPORT_DIR / "auto-rules-audit.md"
|
|
@@ -67,7 +83,7 @@ def _trigger_summary(triggers: list) -> dict:
|
|
|
67
83
|
|
|
68
84
|
def collect() -> list[dict]:
|
|
69
85
|
rules: list[dict] = []
|
|
70
|
-
for path in
|
|
86
|
+
for path in _src_rule_paths():
|
|
71
87
|
text = path.read_text(encoding="utf-8")
|
|
72
88
|
fm, body = _split_frontmatter(text)
|
|
73
89
|
if fm.get("type") != "auto":
|
|
@@ -107,7 +123,7 @@ def render_markdown(rules: list[dict]) -> str:
|
|
|
107
123
|
"# Auto-Rule Audit",
|
|
108
124
|
"",
|
|
109
125
|
"Generated by `scripts/audit_auto_rules.py` for Phase 5 of",
|
|
110
|
-
"`agents/roadmaps/road-to-augment-limit-fit.md`. Re-run after",
|
|
126
|
+
"`agents/roadmaps/archive/road-to-augment-limit-fit.md`. Re-run after",
|
|
111
127
|
"any rule add/merge/deprecate to refresh the baseline.",
|
|
112
128
|
"",
|
|
113
129
|
"## Totals",
|
|
@@ -141,8 +157,8 @@ def render_markdown(rules: list[dict]) -> str:
|
|
|
141
157
|
|
|
142
158
|
|
|
143
159
|
def main() -> int:
|
|
144
|
-
if not
|
|
145
|
-
print(
|
|
160
|
+
if not _src_rule_paths():
|
|
161
|
+
print("❌ No source rules found under any artefact root's rules/", file=sys.stderr)
|
|
146
162
|
return 1
|
|
147
163
|
rules = collect()
|
|
148
164
|
REPORT_DIR.mkdir(parents=True, exist_ok=True)
|
|
@@ -37,7 +37,12 @@ from pathlib import Path
|
|
|
37
37
|
from typing import List
|
|
38
38
|
|
|
39
39
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
40
|
-
|
|
40
|
+
# Pre-monorepo: REPO_ROOT/.agent-src.uncondensed/commands. Post-move (ADR-017)
|
|
41
|
+
# the core command surface lives under packages/core/.agent-src.uncondensed.
|
|
42
|
+
# Fall back to the legacy path only if the packages layout is absent.
|
|
43
|
+
_CORE_COMMANDS = REPO_ROOT / "packages" / "core" / ".agent-src.uncondensed" / "commands"
|
|
44
|
+
_LEGACY_COMMANDS = REPO_ROOT / ".agent-src.uncondensed" / "commands"
|
|
45
|
+
DEFAULT_ROOT = _CORE_COMMANDS if _CORE_COMMANDS.is_dir() else _LEGACY_COMMANDS
|
|
41
46
|
REPORT_DIR = REPO_ROOT / "agents" / "reports"
|
|
42
47
|
OUT_JSON = REPORT_DIR / "command-surface.json"
|
|
43
48
|
OUT_MD = REPORT_DIR / "command-surface.md"
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Initial-context token audit (roadmap `road-to-lean-initial-context`).
|
|
3
|
+
|
|
4
|
+
Serves three roadmap steps with one analyzer (no new analyzer where one
|
|
5
|
+
exists — reuses `scripts/_lib/token_count.py`):
|
|
6
|
+
|
|
7
|
+
- **0B.2** — always-on rule-body footprint per tool projection.
|
|
8
|
+
- **0B.4** — description-catalog initial cost (skill + command name+desc).
|
|
9
|
+
- **1.3** — unified `audit:tokens` surfacing per-tool initial-token estimate,
|
|
10
|
+
longest rules in tokens, and the description-catalog pool.
|
|
11
|
+
|
|
12
|
+
`char != token`: every number is reported in both. GPT counts are exact when
|
|
13
|
+
`tiktoken` is installed, else a documented proxy (see `token_count`).
|
|
14
|
+
|
|
15
|
+
Usage:
|
|
16
|
+
python3 scripts/audit_initial_context.py # markdown report → stdout
|
|
17
|
+
python3 scripts/audit_initial_context.py --json # machine-readable
|
|
18
|
+
python3 scripts/audit_initial_context.py --write # write report files
|
|
19
|
+
python3 scripts/audit_initial_context.py --fail-if-over-budget # CI gate (1.4)
|
|
20
|
+
|
|
21
|
+
Exit codes: 0 = ok (or no budget set); 1 = a measured surface exceeds its
|
|
22
|
+
configured token budget (only with --fail-if-over-budget).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import argparse
|
|
28
|
+
import datetime as _dt
|
|
29
|
+
import glob
|
|
30
|
+
import json
|
|
31
|
+
import re
|
|
32
|
+
import sys
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
|
|
35
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
36
|
+
sys.path.insert(0, str(REPO_ROOT / "scripts"))
|
|
37
|
+
from _lib import token_count # noqa: E402
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
import yaml
|
|
41
|
+
except ImportError: # pragma: no cover
|
|
42
|
+
sys.stderr.write("error: PyYAML required (pip install pyyaml)\n")
|
|
43
|
+
sys.exit(2)
|
|
44
|
+
|
|
45
|
+
REPORT_DIR = REPO_ROOT / "internal" / "bench" / "reports"
|
|
46
|
+
|
|
47
|
+
# Tools whose rules/ dir holds one .md per rule (full body projected today).
|
|
48
|
+
DIR_RULE_TOOLS = (".claude", ".augment", ".cursor")
|
|
49
|
+
# Tools whose always-on surface is a single monolithic file.
|
|
50
|
+
MONOLITH_TOOLS = (".windsurfrules",)
|
|
51
|
+
|
|
52
|
+
# Initial-token budget per surface (None = advisory only, no gate). These are
|
|
53
|
+
# soft ceilings the audit can enforce once a baseline is agreed (1.4). Set
|
|
54
|
+
# generously now; tighten as Phase 3 lands.
|
|
55
|
+
BUDGETS: dict[str, int | None] = {
|
|
56
|
+
"rules.gpt": None,
|
|
57
|
+
"skill_catalog.gpt": None,
|
|
58
|
+
"command_catalog.gpt": None,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _frontmatter(path: Path) -> dict:
|
|
63
|
+
try:
|
|
64
|
+
text = path.read_text(encoding="utf-8", errors="ignore")
|
|
65
|
+
except OSError:
|
|
66
|
+
return {}
|
|
67
|
+
m = re.match(r"^---\s*\n(.*?)\n---\s*\n", text, re.DOTALL)
|
|
68
|
+
if not m:
|
|
69
|
+
return {}
|
|
70
|
+
try:
|
|
71
|
+
return yaml.safe_load(m.group(1)) or {}
|
|
72
|
+
except yaml.YAMLError:
|
|
73
|
+
return {}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _measure_files(paths: list[Path]) -> dict:
|
|
77
|
+
blob = "".join(p.read_text(encoding="utf-8", errors="ignore") for p in paths)
|
|
78
|
+
out = token_count.measure(blob)
|
|
79
|
+
out["files"] = len(paths)
|
|
80
|
+
return out
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def rule_footprint() -> dict:
|
|
84
|
+
"""0B.2 — always-on rule footprint per tool."""
|
|
85
|
+
tools: dict[str, dict] = {}
|
|
86
|
+
for tool in DIR_RULE_TOOLS:
|
|
87
|
+
files = sorted((REPO_ROOT / tool / "rules").glob("*.md"))
|
|
88
|
+
if files:
|
|
89
|
+
tools[tool] = _measure_files(files)
|
|
90
|
+
for tool in MONOLITH_TOOLS:
|
|
91
|
+
f = REPO_ROOT / tool
|
|
92
|
+
if f.is_file():
|
|
93
|
+
m = token_count.measure(f.read_text(encoding="utf-8", errors="ignore"))
|
|
94
|
+
m["files"] = 1
|
|
95
|
+
tools[tool] = m
|
|
96
|
+
return tools
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _catalog(glob_pat: str) -> dict:
|
|
100
|
+
entries = []
|
|
101
|
+
for f in glob.glob(str(REPO_ROOT / glob_pat), recursive=True):
|
|
102
|
+
fm = _frontmatter(Path(f))
|
|
103
|
+
name = fm.get("name") or Path(f).parent.name
|
|
104
|
+
desc = fm.get("description", "")
|
|
105
|
+
if desc:
|
|
106
|
+
entries.append(f"{name}: {desc}")
|
|
107
|
+
m = token_count.measure("\n".join(entries))
|
|
108
|
+
m["entries"] = len(entries)
|
|
109
|
+
return m
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def description_catalog() -> dict:
|
|
113
|
+
"""0B.4 — description-catalog cost (eager progressive-disclosure surface)."""
|
|
114
|
+
return {
|
|
115
|
+
"skills_projected": _catalog(".claude/skills/*/SKILL.md"),
|
|
116
|
+
"skills_core_source": _catalog("packages/core/.agent-src.uncondensed/skills/*/SKILL.md"),
|
|
117
|
+
"commands_core_source": _catalog("packages/core/.agent-src.uncondensed/commands/**/*.md"),
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def longest_rules(top: int = 10) -> list[dict]:
|
|
122
|
+
"""1.3 — longest rules in tokens (the trim candidates)."""
|
|
123
|
+
rows = []
|
|
124
|
+
for tool in DIR_RULE_TOOLS:
|
|
125
|
+
d = REPO_ROOT / tool / "rules"
|
|
126
|
+
if d.is_dir():
|
|
127
|
+
for p in d.glob("*.md"):
|
|
128
|
+
m = token_count.measure(p.read_text(encoding="utf-8", errors="ignore"))
|
|
129
|
+
rows.append({"id": p.stem, "tokens_gpt": m["tokens_gpt"], "chars": m["chars"]})
|
|
130
|
+
break # one tool is representative — bodies are identical across DIR tools
|
|
131
|
+
rows.sort(key=lambda r: (-r["tokens_gpt"], r["id"]))
|
|
132
|
+
return rows[:top]
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def build() -> dict:
|
|
136
|
+
return {
|
|
137
|
+
"generated": _dt.datetime.now(_dt.timezone.utc).isoformat(timespec="seconds"),
|
|
138
|
+
"token_method": token_count.method_note(),
|
|
139
|
+
"rule_footprint": rule_footprint(),
|
|
140
|
+
"description_catalog": description_catalog(),
|
|
141
|
+
"longest_rules": longest_rules(),
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def render_md(d: dict) -> str:
|
|
146
|
+
L = ["# Initial-context token audit", "",
|
|
147
|
+
f"- generated: `{d['generated']}`",
|
|
148
|
+
f"- token method: {d['token_method']}", "",
|
|
149
|
+
"## 0B.2 — always-on rule footprint per tool", "",
|
|
150
|
+
"| tool | files | chars | GPT tok | Claude tok |",
|
|
151
|
+
"|---|--:|--:|--:|--:|"]
|
|
152
|
+
for tool, m in d["rule_footprint"].items():
|
|
153
|
+
L.append(f"| `{tool}` | {m['files']} | {m['chars']:,} | {m['tokens_gpt']:,} | {m['tokens_claude']:,} |")
|
|
154
|
+
L += ["", "## 0B.4 — description-catalog cost (eager)", "",
|
|
155
|
+
"| catalog | entries | chars | GPT tok | Claude tok |",
|
|
156
|
+
"|---|--:|--:|--:|--:|"]
|
|
157
|
+
for name, m in d["description_catalog"].items():
|
|
158
|
+
L.append(f"| {name} | {m['entries']} | {m['chars']:,} | {m['tokens_gpt']:,} | {m['tokens_claude']:,} |")
|
|
159
|
+
L += ["", "## 1.3 — top-10 longest rules (token trim candidates)", "",
|
|
160
|
+
"| rule | GPT tok | chars |", "|---|--:|--:|"]
|
|
161
|
+
for r in d["longest_rules"]:
|
|
162
|
+
L.append(f"| `{r['id']}` | {r['tokens_gpt']:,} | {r['chars']:,} |")
|
|
163
|
+
L.append("")
|
|
164
|
+
return "\n".join(L)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def main(argv: list[str] | None = None) -> int:
|
|
168
|
+
ap = argparse.ArgumentParser(description=__doc__.splitlines()[0])
|
|
169
|
+
ap.add_argument("--json", action="store_true")
|
|
170
|
+
ap.add_argument("--write", action="store_true", help="write report files under internal/bench/reports/")
|
|
171
|
+
ap.add_argument("--fail-if-over-budget", action="store_true",
|
|
172
|
+
help="exit 1 if a surface exceeds its configured token budget (1.4)")
|
|
173
|
+
args = ap.parse_args(argv)
|
|
174
|
+
|
|
175
|
+
data = build()
|
|
176
|
+
|
|
177
|
+
if args.fail_if_over_budget:
|
|
178
|
+
breaches = []
|
|
179
|
+
rf = next(iter(data["rule_footprint"].values()), {})
|
|
180
|
+
checks = {
|
|
181
|
+
"rules.gpt": rf.get("tokens_gpt", 0),
|
|
182
|
+
"skill_catalog.gpt": data["description_catalog"]["skills_projected"]["tokens_gpt"],
|
|
183
|
+
"command_catalog.gpt": data["description_catalog"]["commands_core_source"]["tokens_gpt"],
|
|
184
|
+
}
|
|
185
|
+
for key, val in checks.items():
|
|
186
|
+
cap = BUDGETS.get(key)
|
|
187
|
+
if cap is not None and val > cap:
|
|
188
|
+
breaches.append(f"{key} {val} > budget {cap}")
|
|
189
|
+
if breaches:
|
|
190
|
+
print("❌ initial-context budget: " + "; ".join(breaches))
|
|
191
|
+
return 1
|
|
192
|
+
print("✅ initial-context budget: pass (or advisory-only)")
|
|
193
|
+
return 0
|
|
194
|
+
|
|
195
|
+
if args.json:
|
|
196
|
+
print(json.dumps(data, indent=2, sort_keys=True))
|
|
197
|
+
else:
|
|
198
|
+
print(render_md(data))
|
|
199
|
+
|
|
200
|
+
if args.write:
|
|
201
|
+
REPORT_DIR.mkdir(parents=True, exist_ok=True)
|
|
202
|
+
(REPORT_DIR / "projection-cost.json").write_text(
|
|
203
|
+
json.dumps(data, indent=2, sort_keys=True), encoding="utf-8")
|
|
204
|
+
(REPORT_DIR / "projection-cost.md").write_text(render_md(data), encoding="utf-8")
|
|
205
|
+
print(f"\n→ wrote {REPORT_DIR.relative_to(REPO_ROOT)}/projection-cost.{{json,md}}")
|
|
206
|
+
return 0
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
if __name__ == "__main__":
|
|
210
|
+
sys.exit(main())
|
package/scripts/bench_ab_diff.py
CHANGED
|
@@ -10,7 +10,7 @@ Inputs: two report JSON paths. Output: a JSON artefact under
|
|
|
10
10
|
The diff content depends on the corpus:
|
|
11
11
|
|
|
12
12
|
- `ab-tracka` — trigger-accuracy %, false-positive count, per-rule lift.
|
|
13
|
-
- `ab-trackb` — completion-rate per category, wall-time, tokens,
|
|
13
|
+
- `ab-trackb` — completion-rate per category, wall-time, tokens,
|
|
14
14
|
ask-vs-act ratio, tool-call count.
|
|
15
15
|
|
|
16
16
|
Phase 2 only writes the structural skeleton (delta object with `with`,
|
|
@@ -74,7 +74,7 @@ def compute_track_a_diff(with_results: dict, without_results: dict) -> dict:
|
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
def compute_track_b_diff(with_results: dict, without_results: dict) -> dict:
|
|
77
|
-
"""Track B: completion rate per category + wall-time + tokens +
|
|
77
|
+
"""Track B: completion rate per category + wall-time + tokens + ask-vs-act."""
|
|
78
78
|
def mean(d: dict, key: str) -> float:
|
|
79
79
|
try:
|
|
80
80
|
return float(d.get(key, 0.0))
|
|
@@ -111,15 +111,8 @@ def compute_track_b_diff(with_results: dict, without_results: dict) -> dict:
|
|
|
111
111
|
3,
|
|
112
112
|
),
|
|
113
113
|
},
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
"without": mean(without_results, "mean_cost_usd"),
|
|
117
|
-
"delta": round(
|
|
118
|
-
mean(with_results, "mean_cost_usd")
|
|
119
|
-
- mean(without_results, "mean_cost_usd"),
|
|
120
|
-
4,
|
|
121
|
-
),
|
|
122
|
-
},
|
|
114
|
+
# cost_usd comparison intentionally omitted — API pricing misleads
|
|
115
|
+
# subscription users; tokens are the currency-neutral metric.
|
|
123
116
|
"ask_vs_act_ratio": {
|
|
124
117
|
"with": mean(with_results, "ask_vs_act_ratio"),
|
|
125
118
|
"without": mean(without_results, "ask_vs_act_ratio"),
|
package/scripts/bench_run.py
CHANGED
|
@@ -150,7 +150,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
150
150
|
headline = (
|
|
151
151
|
f"bench {report['corpus']['id']} · "
|
|
152
152
|
f"selection {sel['selection_accuracy']:.2%} ({verdict['selection']}) · "
|
|
153
|
-
f"
|
|
153
|
+
f"tokens {cost.get('source', 'n/a')} · "
|
|
154
154
|
f"quality {qual['quality_score']:.2%} ({verdict['quality']}) · "
|
|
155
155
|
f"overall {verdict['overall']}"
|
|
156
156
|
)
|
|
@@ -252,8 +252,7 @@ def _run_telegraph(args: argparse.Namespace) -> int:
|
|
|
252
252
|
f"telegraph · prompts {report['corpus']['prompt_count']} · "
|
|
253
253
|
f"calls {cost['totals']['calls']} · errors {cost['totals']['errors']} · "
|
|
254
254
|
f"vs_raw med {report['telegraph']['aggregate']['savings_vs_raw']['median']:.2%} · "
|
|
255
|
-
f"vs_terse med {report['telegraph']['aggregate']['savings_vs_terse']['median']:.2%}
|
|
256
|
-
f"cost ${cost['totals']['total_cost_usd']:.6f}"
|
|
255
|
+
f"vs_terse med {report['telegraph']['aggregate']['savings_vs_terse']['median']:.2%}"
|
|
257
256
|
)
|
|
258
257
|
if args.quiet:
|
|
259
258
|
print(headline)
|
package/scripts/bench_runner.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"""Bench runner for the eval corpora — step-4 measurement-and-benchmark Phase 1.
|
|
3
3
|
|
|
4
4
|
Deterministic, no-API skill-selection baseline. For each prompt in a
|
|
5
|
-
corpus YAML, ranks the
|
|
5
|
+
corpus YAML, ranks the skills in the projected catalog `.agent-src/skills/`
|
|
6
6
|
by keyword overlap between the prompt text and each skill's
|
|
7
7
|
`description` frontmatter field. Reports selection accuracy as
|
|
8
8
|
`top-K contains >= 1 expected_skill`.
|
|
@@ -33,7 +33,7 @@ except ImportError:
|
|
|
33
33
|
sys.exit(2)
|
|
34
34
|
|
|
35
35
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
36
|
-
SKILLS_DIR = REPO_ROOT / ".agent-src
|
|
36
|
+
SKILLS_DIR = REPO_ROOT / ".agent-src" / "skills"
|
|
37
37
|
CORPUS_DIR = REPO_ROOT / "tests" / "eval"
|
|
38
38
|
|
|
39
39
|
STOPWORDS = frozenset({
|
package/scripts/condense.py
CHANGED
|
@@ -144,6 +144,31 @@ def _read_augment_rules_use_symlinks() -> bool:
|
|
|
144
144
|
return False
|
|
145
145
|
|
|
146
146
|
|
|
147
|
+
def _lean_projection_mode() -> str:
|
|
148
|
+
"""Read lean_projection.mode from .agent-settings.yml.
|
|
149
|
+
|
|
150
|
+
`eager-all` (default) → every rule body inlined into every projection
|
|
151
|
+
(today's behaviour). `thin` → kernel full-bodied + non-kernel rules as
|
|
152
|
+
router-resolved pointers (lean-initial-context Phase 3.1; ~36k GPT tok
|
|
153
|
+
lighter, measured). Missing / malformed → `eager-all`, so the thin path
|
|
154
|
+
is strictly opt-in and one-flip-revertible (see docs/contracts/rule-router.md
|
|
155
|
+
§ Kill-switch). The flip MUST be live-A/B-validated before it ships as the
|
|
156
|
+
default — a thin projection only holds behaviour if the agent resolves the
|
|
157
|
+
pointer on trigger-match.
|
|
158
|
+
"""
|
|
159
|
+
try:
|
|
160
|
+
from scripts._lib.agent_settings import load_agent_settings
|
|
161
|
+
except ImportError: # pragma: no cover — script-style invocation
|
|
162
|
+
import sys as _sys
|
|
163
|
+
from pathlib import Path as _Path
|
|
164
|
+
_sys.path.insert(0, str(_Path(__file__).resolve().parent))
|
|
165
|
+
from _lib.agent_settings import load_agent_settings # type: ignore[import-not-found]
|
|
166
|
+
|
|
167
|
+
data = load_agent_settings(project_path=SETTINGS_FILE)
|
|
168
|
+
lean = data.get("lean_projection")
|
|
169
|
+
if isinstance(lean, dict) and str(lean.get("mode", "")).strip().lower() == "thin":
|
|
170
|
+
return "thin"
|
|
171
|
+
return "eager-all"
|
|
147
172
|
|
|
148
173
|
|
|
149
174
|
def file_hash(filepath: Path) -> str:
|
|
@@ -654,6 +679,18 @@ def generate_rule_symlinks() -> int:
|
|
|
654
679
|
# All .md files in .agent-src/rules/ — not just universal ones
|
|
655
680
|
rules = sorted([f.name for f in RULES_SOURCE.glob("*.md")])
|
|
656
681
|
tool_dirs = _filter_tool_dirs(TOOL_DIRS)
|
|
682
|
+
|
|
683
|
+
# Thin-projection opt-in (lean-initial-context Phase 3.1). Default
|
|
684
|
+
# `eager-all` keeps the symlink behaviour below untouched; `thin` writes
|
|
685
|
+
# kernel rules full + non-kernel rules as router-resolved pointers.
|
|
686
|
+
thin_files: dict[str, str] | None = None
|
|
687
|
+
if _lean_projection_mode() == "thin":
|
|
688
|
+
try:
|
|
689
|
+
from scripts.project_thin_rules import build_thin
|
|
690
|
+
except ImportError: # pragma: no cover — script-style invocation
|
|
691
|
+
from project_thin_rules import build_thin # type: ignore[import-not-found]
|
|
692
|
+
thin_files = build_thin(RULES_SOURCE)
|
|
693
|
+
|
|
657
694
|
total = 0
|
|
658
695
|
for tool_dir, rel_prefix in tool_dirs.items():
|
|
659
696
|
target_dir = PROJECT_ROOT / tool_dir
|
|
@@ -666,17 +703,21 @@ def generate_rule_symlinks() -> int:
|
|
|
666
703
|
|
|
667
704
|
for rule in rules:
|
|
668
705
|
link = target_dir / rule
|
|
669
|
-
target = Path(rel_prefix) / rule
|
|
670
706
|
if link.exists() or link.is_symlink():
|
|
671
707
|
link.unlink()
|
|
672
|
-
|
|
708
|
+
if thin_files is not None:
|
|
709
|
+
# Thin mode: write a real file (kernel full / non-kernel pointer),
|
|
710
|
+
# not a symlink to the full source body.
|
|
711
|
+
link.write_text(thin_files[rule], encoding="utf-8")
|
|
712
|
+
else:
|
|
713
|
+
link.symlink_to(Path(rel_prefix) / rule)
|
|
673
714
|
total += 1
|
|
674
715
|
|
|
675
716
|
# Verify counts match across all tool directories
|
|
676
717
|
source_count = len(rules)
|
|
677
718
|
for tool_dir in tool_dirs:
|
|
678
719
|
target_dir = PROJECT_ROOT / tool_dir
|
|
679
|
-
tool_count = len([f for f in target_dir.iterdir() if f.
|
|
720
|
+
tool_count = len([f for f in target_dir.iterdir() if f.suffix == ".md"])
|
|
680
721
|
if tool_count != source_count:
|
|
681
722
|
print(f" ⚠️ {tool_dir}: {tool_count} rules (expected {source_count})")
|
|
682
723
|
|
package/scripts/iron_law_sha.py
CHANGED
|
@@ -26,7 +26,15 @@ import sys
|
|
|
26
26
|
from pathlib import Path
|
|
27
27
|
|
|
28
28
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
29
|
-
|
|
29
|
+
sys.path.insert(0, str(REPO_ROOT / "scripts"))
|
|
30
|
+
from _lib.agent_src import artefact_roots # noqa: E402
|
|
31
|
+
|
|
32
|
+
# Pre-monorepo this was REPO_ROOT/.agent-src.uncondensed/rules. Post-move
|
|
33
|
+
# (ADR-017) the source rules live under packages/*/.agent-src.uncondensed/rules.
|
|
34
|
+
# Resolve the same way measure_rule_budget does (multi-root aware) so the
|
|
35
|
+
# Iron-Law SHA gate keeps working against the current layout.
|
|
36
|
+
def _rules_dirs() -> list[Path]:
|
|
37
|
+
return [root / "rules" for root in artefact_roots() if (root / "rules").is_dir()]
|
|
30
38
|
|
|
31
39
|
# Locked kernel set — kept in sync with measure_rule_budget.KERNEL_RULES.
|
|
32
40
|
KERNEL_RULES = (
|
|
@@ -58,10 +66,11 @@ def iron_law_sha(text: str) -> str:
|
|
|
58
66
|
|
|
59
67
|
|
|
60
68
|
def rule_sha(rule_id: str) -> str:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
69
|
+
for rules_dir in _rules_dirs():
|
|
70
|
+
path = rules_dir / f"{rule_id}.md"
|
|
71
|
+
if path.exists():
|
|
72
|
+
return iron_law_sha(path.read_text(encoding="utf-8"))
|
|
73
|
+
raise FileNotFoundError(f"{rule_id}.md not found under any artefact root's rules/")
|
|
65
74
|
|
|
66
75
|
|
|
67
76
|
def main(argv: list[str] | None = None) -> int:
|
|
@@ -28,6 +28,7 @@ from pathlib import Path
|
|
|
28
28
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
29
29
|
sys.path.insert(0, str(REPO_ROOT / "scripts"))
|
|
30
30
|
from _lib.agent_src import artefact_roots # noqa: E402
|
|
31
|
+
from _lib import token_count # noqa: E402
|
|
31
32
|
|
|
32
33
|
OVERRIDES_FILE = REPO_ROOT / "docs" / "contracts" / "iron-law-overrides.txt"
|
|
33
34
|
TREND_FILE = REPO_ROOT / "agents" / "runtime" / ".rule-budget-history.jsonl"
|
|
@@ -91,6 +92,9 @@ def measure_rule(path: Path) -> dict[str, object]:
|
|
|
91
92
|
"tier": fields.get("tier", ""),
|
|
92
93
|
"chars": len(body),
|
|
93
94
|
"lines": body.count("\n"),
|
|
95
|
+
# Real-tokenizer truth alongside the char proxy (roadmap 0B.1).
|
|
96
|
+
"tokens_gpt": token_count.gpt_tokens(body).tokens,
|
|
97
|
+
"tokens_claude": token_count.claude_tokens(body).tokens,
|
|
94
98
|
}
|
|
95
99
|
|
|
96
100
|
|
|
@@ -141,6 +145,11 @@ def aggregate(rules: list[dict[str, object]]) -> dict[str, object]:
|
|
|
141
145
|
"auto_chars": sum(int(r["chars"]) for r in auto),
|
|
142
146
|
"kernel_chars": sum(int(r["chars"]) for r in kernel),
|
|
143
147
|
"total_chars": total_chars,
|
|
148
|
+
"kernel_tokens_gpt": sum(int(r.get("tokens_gpt", 0)) for r in kernel),
|
|
149
|
+
"kernel_tokens_claude": sum(int(r.get("tokens_claude", 0)) for r in kernel),
|
|
150
|
+
"total_tokens_gpt": sum(int(r.get("tokens_gpt", 0)) for r in rules),
|
|
151
|
+
"total_tokens_claude": sum(int(r.get("tokens_claude", 0)) for r in rules),
|
|
152
|
+
"token_method": token_count.method_note(),
|
|
144
153
|
"kernel_hard": KERNEL_HARD,
|
|
145
154
|
"kernel_target": KERNEL_TARGET,
|
|
146
155
|
"per_rule_hard": PER_RULE_HARD,
|
|
@@ -181,6 +190,12 @@ def render_table(rules: list[dict[str, object]], agg: dict[str, object]) -> str:
|
|
|
181
190
|
)
|
|
182
191
|
lines.append(f" total: {agg['total_chars']:>6} chars across {agg['rule_count']} rules")
|
|
183
192
|
lines.append("")
|
|
193
|
+
lines.append(
|
|
194
|
+
f"kernel-tokens: {agg['kernel_tokens_gpt']:>6} GPT · {agg['kernel_tokens_claude']:>6} Claude "
|
|
195
|
+
f"(total {agg['total_tokens_gpt']} GPT · {agg['total_tokens_claude']} Claude)"
|
|
196
|
+
)
|
|
197
|
+
lines.append(f" token method: {agg['token_method']}")
|
|
198
|
+
lines.append("")
|
|
184
199
|
lines.append(f"top-5 largest:")
|
|
185
200
|
for r in agg["top5_largest"]: # type: ignore[index]
|
|
186
201
|
lines.append(f" {r['chars']:>5} {r['id']} ({r['type']})")
|