npm - @event4u/agent-config - Versions diffs - 3.1.1 → 3.3.0 - Mend

@event4u/agent-config 3.1.1 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

package/.agent-src/commands/agent-status.md +1 -1
package/.agent-src/commands/analytics/prune.md +78 -0
package/.agent-src/commands/analytics/show.md +107 -0
package/.agent-src/commands/analytics.md +64 -0
package/.agent-src/commands/knowledge/forget.md +104 -0
package/.agent-src/commands/knowledge/ingest.md +122 -0
package/.agent-src/commands/knowledge/list.md +102 -0
package/.agent-src/commands/knowledge.md +75 -0
package/.agent-src/scripts/update_roadmap_progress.py +1 -1
package/.agent-src/skills/compress-memory/SKILL.md +1 -1
package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
package/.claude-plugin/marketplace.json +8 -1
package/AGENTS.md +5 -4
package/CHANGELOG.md +54 -222
package/README.md +12 -2
package/dist/discovery/deprecation-report.md +1 -1
package/dist/discovery/discovery-manifest.json +164 -10
package/dist/discovery/discovery-manifest.json.sha256 +1 -1
package/dist/discovery/discovery-manifest.summary.md +3 -3
package/dist/discovery/orphan-report.md +1 -1
package/dist/discovery/packs.json +12 -5
package/dist/discovery/trust-report.md +2 -2
package/dist/discovery/workspaces.json +11 -4
package/dist/mcp/mcp-cloudflare-catalogue.json +2 -0
package/dist/mcp/registry-manifest.json +5 -3
package/docs/architecture.md +1 -1
package/docs/archive/CHANGELOG-pre-3.2.0.md +268 -0
package/docs/benchmarks.md +4 -4
package/docs/catalog.md +9 -2
package/docs/contracts/CHANGELOG-conventions.md +20 -1
package/docs/contracts/adr-mcp-runtime.md +1 -1
package/docs/contracts/at-rest-encryption.md +146 -0
package/docs/contracts/benchmark-corpus-spec.md +3 -3
package/docs/contracts/benchmark-report-schema.md +5 -5
package/docs/contracts/caveman-telemetry.md +4 -4
package/docs/contracts/compression-default-kill-criterion.md +5 -5
package/docs/contracts/cost-enforcement.md +1 -1
package/docs/contracts/daily-workspace.md +137 -0
package/docs/contracts/explain-modes.md +146 -0
package/docs/contracts/host-agent-protocol.md +88 -0
package/docs/contracts/local-analytics.md +148 -0
package/docs/contracts/local-knowledge-ingestion.md +96 -0
package/docs/contracts/mcp-beta-criteria.md +1 -1
package/docs/contracts/mcp-cloud-scope.md +4 -4
package/docs/contracts/mcp-registry-manifest.schema.json +1 -1
package/docs/contracts/mcp-tool-inventory.md +1 -1
package/docs/contracts/mcp-tool-stub-envelope.md +1 -1
package/docs/contracts/measurement-baseline.md +6 -6
package/docs/contracts/role-experience.md +121 -0
package/docs/contracts/workspace-documents.md +140 -0
package/docs/decisions/ADR-022-daily-workspace-decomposition.md +140 -0
package/docs/decisions/ADR-023-host-agent-protocol.md +129 -0
package/docs/decisions/ADR-024-workspace-v0-feature-floor.md +126 -0
package/docs/decisions/ADR-025-workspace-chrome.md +119 -0
package/docs/decisions/ADR-026-explain-mode-translation.md +117 -0
package/docs/decisions/ADR-027-changelog-machine-vs-manual.md +129 -0
package/docs/decisions/ADR-028-root-layout.md +147 -0
package/docs/decisions/ADR-029-multi-workspace-deferred.md +122 -0
package/docs/decisions/INDEX.md +8 -0
package/docs/deploy/small-team-recipe.md +148 -0
package/docs/deploy/team-deployment-posture.md +91 -0
package/docs/getting-started-by-role.md +27 -0
package/docs/getting-started.md +1 -1
package/docs/guides/local-analytics.md +125 -0
package/docs/guides/local-knowledge.md +127 -0
package/docs/mcp-server.md +1 -1
package/docs/parity/bench-ruflo.json +3 -3
package/docs/parity/ruflo.md +1 -1
package/docs/setup/mcp-client-config.md +1 -1
package/docs/setup/mcp-cloud-endpoints.md +1 -1
package/docs/setup/mcp-cloud-setup.md +2 -2
package/docs/setup/mcp-r2-bootstrap.md +1 -1
package/package.json +4 -2
package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
package/scripts/_lib/bench_caveman.py +2 -2
package/scripts/_lib/bench_caveman_report.py +1 -1
package/scripts/_lib/bench_cost.py +2 -2
package/scripts/_lib/bench_report.py +2 -2
package/scripts/_lib/changelog_eras.py +330 -0
package/scripts/audit_mcp_tools.py +1 -1
package/scripts/bench_baseline_ready.py +3 -3
package/scripts/bench_compress_memory.py +4 -4
package/scripts/bench_drift_check.py +2 -2
package/scripts/bench_per_tool.py +2 -2
package/scripts/bench_run.py +4 -4
package/scripts/build_mcp_registry_manifest.py +2 -2
package/scripts/mcp_server/__init__.py +1 -1
package/scripts/mcp_server/catalog.py +1 -1
package/scripts/mcp_server/consumer_tool_catalog.json +1 -1
package/scripts/mcp_server/tools.py +1 -1
package/scripts/memory_lookup.py +78 -1
package/scripts/pack_mcp_content.py +6 -6
package/scripts/release.py +93 -3
package/scripts/skill_trigger_eval.py +2 -2

package/docs/guides/local-analytics.md ADDED Viewed

@@ -0,0 +1,125 @@
+# Local analytics — a 3-minute walkthrough
+> Phase 7 of [`road-to-employee-product-and-external-proof`](../../agents/roadmaps/road-to-employee-product-and-external-proof.md).
+> Contract: [`docs/contracts/local-analytics.md`](../contracts/local-analytics.md).
+## What this is
+A **local-only** event log of your workspace activity. The package never
+POSTs these records anywhere. Storage is a single append-only JSONL file
+under your home directory; pruning is a 90-day rolling window.
+If you want to know *"which prompts do I actually run on Tuesday
+mornings?"* — this is the file that knows. If you don't want that
+question answered, flip one flag and the file never opens.
+## Where it lives
+```
+~/.event4u/agent-config/workspace/analytics/
+├── events.jsonl          # one workspace_event/v0 record per line
+└── retention.lock        # presence = a prune pass is running
+```
+One event per line. Schema is `workspace_event/v0` (matches the
+3.1.0 telemetry SDK vocabulary, but the transports never touch each
+other — the SDK is the undeployed Worker surface, this is your disk).
+## What's collected
+Closed event set (rejected if not on the list):
+| Event | When |
+|---|---|
+| `launcher.opened`         | Workspace tab opens. |
+| `launcher.task_picked`    | User clicks a task in the launcher. |
+| `launcher.task_launched`  | Host agent receives the rendered prompt. |
+| `session.started` / `session.host_turn` / `session.completed` | Conversation lifecycle. |
+| `document.created` / `document.edited` / `document.exported` | Phase 5 document workflows. |
+| `explain.opened` / `explain.mode_toggled` / `why.invoked` | Phase 6 explain mode. |
+| `knowledge.queried` / `knowledge.source_clicked` | Phase 2 knowledge pane interactions. |
+Each record carries a UTC timestamp, the schema version, and a tiny
+`data` dict (role, task, host_tier, duration_ms — never prompt or
+response bodies).
+## How to read it
+```bash
+# Render the last 30 days as markdown
+python3 packages/core/installer/python/workspace_analytics.py show
+# Last 24 h, JSON
+python3 packages/core/installer/python/workspace_analytics.py show \
+    --window 24h --format json
+# Filter to one role
+python3 packages/core/installer/python/workspace_analytics.py show \
+    --role tradesperson --format csv
+```
+Output shape (markdown):
+```
+# Workspace analytics — last 30d
+## Top prompts
+- `tradesperson` · `estimate` — 12
+- `content-creator` · `script-video` — 7
+- `consultant` · `weekly-memo` — 4
+## Launcher → completion rate per role
+- `tradesperson` — 83% (12 launched · 10 completed)
+- `content-creator` — 71% (7 launched · 5 completed)
+**Average session length:** 3m 41s
+**Knowledge sources clicked:** 14
+```
+## How to opt out
+Two equivalent switches — either short-circuits before any file opens.
+```bash
+# Env (per-shell)
+export AGENT_CONFIG_NO_LOCAL_ANALYTICS=1
+```
+```yaml
+# .agent-settings.yml (per-project)
+analytics:
+  local: off
+```
+After either is in effect, `emit()` returns `False` and the JSONL is
+never appended. The `show` command still works against existing data,
+so you can opt out without losing what you already have.
+## How to delete it
+The file is plain JSONL. Delete it:
+```bash
+rm -rf ~/.event4u/agent-config/workspace/analytics/
+```
+Or prune the rolling window manually:
+```bash
+python3 packages/core/installer/python/workspace_analytics.py prune
+# → pruned 47 event(s)
+```
+`prune` drops anything older than 90 days. The lock file prevents two
+concurrent passes from racing each other.
+## What this guide does not cover
+- **Remote telemetry** — that's the Worker SDK (`packages/telemetry/`).
+  Deployment is out of v0 scope; kill-switch defaults to disabled.
+- **Workspace UI** — Phase 4 builds the browser tab that emits these
+  events. See [`docs/contracts/daily-workspace.md`](../contracts/daily-workspace.md).
+- **Encryption at rest** — Phase 8. Until then, the JSONL is plaintext
+  on your local disk.

package/docs/guides/local-knowledge.md ADDED Viewed

@@ -0,0 +1,127 @@
+---
+stability: beta
+keep-beta-until: 2026-08-24
+---
+# Local knowledge — 5-minute walkthrough
+Point the agent at a folder of local files (PDFs, Markdown, Word docs, spreadsheets). It chunks, redacts PII + secrets, and persists into the agent memory namespace — local-only, single-user, no OAuth, no remote fetch.
+Contract: [`local-knowledge-ingestion`](../contracts/local-knowledge-ingestion.md).
+Roadmap home: `agents/roadmaps/road-to-employee-product-and-external-proof.md` Phase 2.
+## Prerequisites
+- Python 3.10+ on the host.
+- `markitdown` on `PATH` if the corpus contains PDF / DOCX / XLSX / EPUB / images. Pure markdown / text corpora work without it.
+- An `agents/` directory in the project (created by the installer). The `agents/memory/knowledge/` subdirectory is created lazily on first ingest.
+## Step 1 — Pick a folder
+Anything local works: a customer folder, a project drop, a `.zip` archive, a single PDF. The walk skips hidden dirs (`.git`, `.venv`, `node_modules`) and does not follow symlinks.
+For this walkthrough we use a folder with one PDF and three markdown notes:
+```text
+/Users/maintainer/clients/acme/
+├── brief.pdf
+├── kickoff-notes.md
+├── meeting-2026-05-12.md
+└── pricing-v3.md
+```
+## Step 2 — Ingest
+```bash
+/knowledge ingest /Users/maintainer/clients/acme/
+```
+Realistic output (your ingest-id will differ — uuid7s are time-ordered):
+```text
+✅ ingested 01927f4a-2b1c from /Users/maintainer/clients/acme/
+   documents: 4, chunks: 18, bytes_stored: 47312
+   PII redacted: EMAIL=3, PHONE=1, IBAN=0, CC=0, SSN=0
+   secrets redacted: 0
+   skipped: 0 unsupported MIME
+```
+What just happened:
+- Each file routed through `markitdown` (PDF) or passthrough (Markdown).
+- Chunks split at ~2 KB boundaries, written to `agents/memory/knowledge/<ingest-id>/chunks/<n>.md`.
+- A `manifest.json` recorded the source path, doc count, redaction counters, and `created_at`.
+- PII regex pass replaced 3 emails + 1 phone with `[EMAIL]` / `[PHONE]` placeholders **before** the chunk hit disk.
+> Want the raw text in (no redaction)? `--no-redact`. The manifest captures the flag so the audit row names every bypass. Default is always redact.
+## Step 3 — Ask the agent
+Use the host model normally. The MCP tool `memory_retrieve` now returns knowledge chunks alongside curated and intake entries — same envelope, with an additional `body.source_kind: knowledge` tag so the model knows the source is user-supplied, not maintainer-curated.
+Example prompt:
+> *"What does the acme pricing-v3 note say about volume discounts?"*
+The agent retrieves the matching chunks (pinned chunks rank slightly higher than unpinned; knowledge entries are discounted ~15 % vs curated so hand-reviewed content still wins on equal relevance) and answers with a citation back to the source path stored in the manifest.
+If nothing matches, the model says so. The retrieval surface does not invent a citation.
+## Step 4 — List + pin
+See what's been ingested:
+```bash
+/knowledge list
+```
+```text
+ID        DOCS  CHUNKS  BYTES   PINNED  REDACTED  CREATED              SOURCE
+01927f4a  4     18      47312   no      yes       2026-05-25T08:14:02  /Users/maintainer/clients/acme
+```
+Pin so it survives LRU eviction when the 500 MB namespace cap is crossed:
+```bash
+/knowledge list --pin 01927f4a
+```
+```text
+✅ pinned 01927f4a
+```
+Prefix must be unambiguous — if it matches > 1 ingest, the command rejects with a structured error and asks for a longer prefix.
+## Step 5 — Forget
+When the work is done, drop the ingest atomically:
+```bash
+/knowledge forget 01927f4a
+```
+```text
+✅ forgot 01927f4a — removed 18 chunks, 47312 bytes
+```
+Forget is atomic — no partial state. Pinned ingests are dropped the same as unpinned; pinning protects from LRU, not from explicit forget.
+## What the guide does **not** cover
+- Multi-user share — single-user by design. Multi-user lives behind ADR-024 workspace work and Phase 4 of the parent roadmap.
+- Remote sources — every input must resolve to a local path. `http://`, `https://`, `s3://`, `gs://`, `azure://` are rejected at the input validator.
+- Connector contracts (GitHub / Jira / Confluence) — those sit behind Hard-Floor OAuth and stay cancelled in `road-to-internal-ai-os-deployment.md` Phase 5.
+## Troubleshooting
+- **"Bound exceeded: total_ingest_size"** — the corpus is > 100 MB. Split it, or ingest a sub-folder.
+- **"Bound exceeded: document_count"** — > 1000 files. Same fix.
+- **"unsupported MIME"** — file skipped, counted in the summary, no chunk written. Add the file as `.md` if you need it indexed.
+- **OCR confidence < 0.7** — the chunk is tagged `low_confidence`. The model still receives it but the citation surface flags the lower confidence.
+- **markitdown not on PATH** — install it (`pip install 'markitdown[all]'`) or pass `--markitdown=<bin>`. Markdown-only corpora work without it.
+## See also
+- [`local-knowledge-ingestion`](../contracts/local-knowledge-ingestion.md) — contract (input shapes, bounds, storage, redaction).
+- [`/knowledge ingest`](../../.agent-src/commands/knowledge/ingest.md) · [`/knowledge list`](../../.agent-src/commands/knowledge/list.md) · [`/knowledge forget`](../../.agent-src/commands/knowledge/forget.md)
+- [`markitdown` skill](../../.agent-src/skills/markitdown/SKILL.md) — peer-side adapter for binary formats.

package/docs/mcp-server.md CHANGED Viewed

@@ -13,7 +13,7 @@ coexist:
   over JSON-RPC. Used by clients that speak MCP natively. Default for personal
   installs.
 - **Remote MCP** *(experimental, opt-in)* — a Cloudflare-hosted TypeScript
-  Worker (`workers/mcp/`) serves the same wire surface over HTTP/SSE for
+  Worker (`internal/workers/mcp/`) serves the same wire surface over HTTP/SSE for
   hosted-agent platforms. URL shapes pinned in
   [`docs/setup/mcp-cloud-endpoints.md`](setup/mcp-cloud-endpoints.md);
   safety contract in

package/docs/parity/bench-ruflo.json CHANGED Viewed

@@ -11,11 +11,11 @@
     "type": "claimed_upstream_not_verified_in_repo"
   },
   "measurement_protocol": {
-    "corpus": "bench/corpus/* (25-prompt corpus owned by step-4-measurement-and-benchmark.md)",
+    "corpus": "internal/bench/corpus/* (25-prompt corpus owned by step-4-measurement-and-benchmark.md)",
     "tracker": "scripts/cost/track.mjs",
-    "pricing": "bench/pricing.yaml",
+    "pricing": "internal/bench/pricing.yaml",
     "session_source": "~/.claude/projects/*/sessions/*.jsonl (Claude Code-native, no manual tracking)",
-    "tokens_to_dollars": "track.mjs multiplies input/output/cache-read/cache-write tokens by per-1M pricing from bench/pricing.yaml, separated by model id",
+    "tokens_to_dollars": "track.mjs multiplies input/output/cache-read/cache-write tokens by per-1M pricing from internal/bench/pricing.yaml, separated by model id",
     "headline_output": "average dollar cost per 25-prompt run, with min / max / p50 / p90 across N reports"
   },
   "current_window": {

package/docs/parity/ruflo.md CHANGED Viewed

@@ -23,7 +23,7 @@ soak in [`bench.json`](bench.json) flips from `warmup` to `baseline_ready`
 | # | Ruflo pattern | Verdict | Evidence |
 |---|---|---|---|
-| 1 | **Cost-tracker plugin** — real model pricing, per-1M, separated input/output/cache | `[x] covered by` | [`scripts/cost/track.mjs`](../../scripts/cost/track.mjs) + [`bench/pricing.yaml`](../../bench/pricing.yaml) (Haiku/Sonnet/Opus per-1M, input/output/cache-read/cache-write split). Step-11 Phase 1. |
+| 1 | **Cost-tracker plugin** — real model pricing, per-1M, separated input/output/cache | `[x] covered by` | [`scripts/cost/track.mjs`](../../scripts/cost/track.mjs) + [`internal/bench/pricing.yaml`](../../bench/pricing.yaml) (Haiku/Sonnet/Opus per-1M, input/output/cache-read/cache-write split). Step-11 Phase 1. |
 | 2 | **Auto-capture from session jsonl** — reads Claude Code log, no manual tracking | `[x] covered by` | [`scripts/cost/track.mjs`](../../scripts/cost/track.mjs) reads `~/.claude/projects/*/sessions/*.jsonl` automatically. Step-11 Phase 1 Step 1. |
 | 3 | **50/75/90/100 % budget ladder with hard stop** | `[x] covered by` | [`scripts/cost/budget.mjs`](../../scripts/cost/budget.mjs) — exit codes 0/1/2/3 per tier; opt-in fail-closed via `cost.enforcement` setting. Fixtures: `tests/fixtures/cost/budget/{under-50,at-100,over-100}/`. Step-11 Phase 2. |
 | 4 | **Measured-vs-claimed disclaimer** — every percentage tagged "claimed upstream" | `[x] covered by` | One-line `**Measured-vs-claimed disclaimer:**` header block on all 9 active roadmaps in `agents/roadmaps/`. Verified 2026-05-16. Step-11 Phase 5 Step 4. |

package/docs/setup/mcp-client-config.md CHANGED Viewed

@@ -5,7 +5,7 @@ Worker. Read-only, identity-stable per release. Optional Bearer-token
 auth — see [§ Bearer auth](#bearer-auth) below.
 > **No public endpoint.** This package ships the Worker source under
-> `workers/mcp/`, but does **not** operate a shared hosted MCP server.
+> `internal/workers/mcp/`, but does **not** operate a shared hosted MCP server.
 > Deploy your own per [`mcp-cloud-setup.md`](mcp-cloud-setup.md) — your
 > URL will be `https://agent-config-mcp.<your-account>.workers.dev`
 > (or a custom domain you wire up in Step 7).

package/docs/setup/mcp-cloud-endpoints.md CHANGED Viewed

@@ -69,7 +69,7 @@ curl -s -X POST https://mcp.<your-domain>/ \
 ```
 After DNS is live, uncomment the `routes` block in
-`workers/mcp/wrangler.toml` and redeploy via `wrangler deploy` (or let
+`internal/workers/mcp/wrangler.toml` and redeploy via `wrangler deploy` (or let
 the GitHub Action pick it up on the next release).
 The fallback `*.workers.dev` URL stays live for free; the custom

package/docs/setup/mcp-cloud-setup.md CHANGED Viewed

@@ -82,7 +82,7 @@ Dashboard → **My Profile → API Tokens → Create Token → Custom token**:
 | Account · Workers R2 Storage | your account | Edit |
 | User · User Details | — | Read |
-If you uncomment the `routes` block in `workers/mcp/wrangler.toml`
+If you uncomment the `routes` block in `internal/workers/mcp/wrangler.toml`
 (custom domain cutover, Phase 5.2), add **Zone · DNS · Edit** on the
 relevant zone.
@@ -180,4 +180,4 @@ setup. Until cutover, the Worker serves on the free
 - [`docs/contracts/mcp-cloud-scope.md`](../contracts/mcp-cloud-scope.md) — A0-cloud contract
 - [`docs/setup/mcp-r2-bootstrap.md`](mcp-r2-bootstrap.md) — R2 layout & break-glass
 - [`docs/setup/mcp-cloud-endpoints.md`](mcp-cloud-endpoints.md) — URL shapes & DNS
-- [`workers/mcp/README.md`](../../workers/mcp/README.md) — Worker source overview
+- [`internal/workers/mcp/README.md`](../../internal/workers/mcp/README.md) — Worker source overview

package/docs/setup/mcp-r2-bootstrap.md CHANGED Viewed

@@ -44,7 +44,7 @@ npx wrangler r2 bucket create agent-config-mcp
 npx wrangler r2 bucket list | grep agent-config-mcp
 ```
-The Worker binding is declared in `workers/mcp/wrangler.toml` under
+The Worker binding is declared in `internal/workers/mcp/wrangler.toml` under
 `[[r2_buckets]]`. The pipeline reads/writes via the wrangler CLI in CI,
 not via the Worker — A0-cloud invariant 2 forbids the Worker from
 issuing R2 writes.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@event4u/agent-config",
-    "version": "3.1.1",
+    "version": "3.3.0",
     "description": "Universal AI Agent OS \u2014 audited skills, governance rules, commands, and templates for AI coding tools (Claude Code, Cursor, Windsurf, Copilot).",
     "license": "MIT",
     "private": false,
@@ -26,7 +26,9 @@
         "skills",
         "prompt-engineering",
         "typescript",
-        "python"
+        "python",
+        "agent-skills",
+        "cinematic-ai-video"
     ],
     "files": [
         ".agent-src/",

package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc CHANGED Viewed

Binary file

package/scripts/_lib/__pycache__/__init__.cpython-312.pyc CHANGED Viewed

Binary file

package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc CHANGED Viewed

Binary file

package/scripts/_lib/bench_caveman.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Caveman compression bench — step-16 Phase 1 Step 4.
 #
-# Three-arm live bench against bench/corpora/caveman/prompts.yaml:
+# Three-arm live bench against internal/bench/corpora/caveman/prompts.yaml:
 #   compressed     — system prompt embeds caveman-speak rule (aggressive).
 #   terse_control  — system prompt = "Answer concisely. …" (carve-out-free baseline).
 #   uncompressed   — generic helpful-assistant system prompt.
@@ -131,7 +131,7 @@ class PromptResult:
 # ── corpus + runner ────────────────────────────────────────────────────
 def load_corpus(corpus_path: Path) -> list[dict[str, Any]]:
-    """Read bench/corpora/caveman/prompts.yaml → list of prompt dicts."""
+    """Read internal/bench/corpora/caveman/prompts.yaml → list of prompt dicts."""
     data = yaml.safe_load(corpus_path.read_text(encoding="utf-8")) or {}
     prompts = data.get("prompts") or []
     if not prompts:

package/scripts/_lib/bench_caveman_report.py CHANGED Viewed

@@ -144,7 +144,7 @@ def render_caveman_markdown(report: dict[str, Any]) -> str:
         "## Notes",
         "",
         f"- corpus: `{report['corpus']['path']}`",
-        f"- pricing: `bench/pricing.yaml` (sourced {cost.get('pricing_sourced_on') or '—'})",
+        f"- pricing: `internal/bench/pricing.yaml` (sourced {cost.get('pricing_sourced_on') or '—'})",
         f"- schema: `caveman-v1` (see `docs/contracts/benchmark-report-schema.md`)",
         f"- bench_run version: `{report['runner']['bench_run_version']}`",
         "",

package/scripts/_lib/bench_cost.py CHANGED Viewed

@@ -2,7 +2,7 @@
 #
 # Reads Claude Code session jsonl summaries (one summary line per session)
 # from agents/cost-tracking/sessions.jsonl — produced by scripts/cost/track.mjs
-# — and aggregates totals using model rates from bench/pricing.yaml.
+# — and aggregates totals using model rates from internal/bench/pricing.yaml.
 #
 # Returns the dict shape declared in docs/contracts/benchmark-report-schema.md
 # § JSON schema (v1) `cost`. When the source jsonl is missing, returns the
@@ -24,7 +24,7 @@ TIER_KEYS = ("haiku", "sonnet", "opus", UNKNOWN_TIER)
 def load_pricing(pricing_path: Path) -> tuple[dict[str, dict[str, float]], str | None]:
-    """Return ({tier: rates}, oldest_sourced_on) from bench/pricing.yaml."""
+    """Return ({tier: rates}, oldest_sourced_on) from internal/bench/pricing.yaml."""
     if yaml is None or not pricing_path.is_file():
         return {}, None
     data = yaml.safe_load(pricing_path.read_text(encoding="utf-8")) or {}

package/scripts/_lib/bench_report.py CHANGED Viewed

@@ -2,7 +2,7 @@
 #
 # Serializes the unified report dict to JSON + Markdown per
 # docs/contracts/benchmark-report-schema.md. Filename format:
-# `bench/reports/<UTC ISO-8601 with : -> ->-<corpus_id>.{json,md}`.
+# `internal/bench/reports/<UTC ISO-8601 with : -> ->-<corpus_id>.{json,md}`.
 """Report emitter for the bench runner."""
 from __future__ import annotations
@@ -133,7 +133,7 @@ def render_markdown(report: dict[str, Any]) -> str:
     notes = (
         "## Notes\n\n"
         f"- corpus path: `{corpus['path']}` · prompts: **{corpus['prompt_count']}**\n"
-        f"- pricing: `bench/pricing.yaml`\n"
+        f"- pricing: `internal/bench/pricing.yaml`\n"
         f"- baseline collector: `{report['runner']['baseline_collector']}`\n"
     )
     return "\n\n".join([