@event4u/agent-config 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/.agent-src/commands/agent-status.md +1 -1
  2. package/.agent-src/skills/compress-memory/SKILL.md +1 -1
  3. package/.claude-plugin/marketplace.json +1 -1
  4. package/AGENTS.md +5 -4
  5. package/CHANGELOG.md +24 -0
  6. package/dist/discovery/deprecation-report.md +1 -1
  7. package/dist/discovery/discovery-manifest.json +4 -4
  8. package/dist/discovery/discovery-manifest.json.sha256 +1 -1
  9. package/dist/discovery/discovery-manifest.summary.md +1 -1
  10. package/dist/discovery/orphan-report.md +1 -1
  11. package/dist/discovery/packs.json +2 -2
  12. package/dist/discovery/trust-report.md +1 -1
  13. package/dist/discovery/workspaces.json +2 -2
  14. package/dist/mcp/registry-manifest.json +1 -1
  15. package/docs/benchmarks.md +4 -4
  16. package/docs/contracts/CHANGELOG-conventions.md +1 -1
  17. package/docs/contracts/adr-mcp-runtime.md +1 -1
  18. package/docs/contracts/benchmark-corpus-spec.md +3 -3
  19. package/docs/contracts/benchmark-report-schema.md +5 -5
  20. package/docs/contracts/caveman-telemetry.md +4 -4
  21. package/docs/contracts/compression-default-kill-criterion.md +5 -5
  22. package/docs/contracts/cost-enforcement.md +1 -1
  23. package/docs/contracts/mcp-beta-criteria.md +1 -1
  24. package/docs/contracts/mcp-cloud-scope.md +4 -4
  25. package/docs/contracts/mcp-registry-manifest.schema.json +1 -1
  26. package/docs/contracts/mcp-tool-inventory.md +1 -1
  27. package/docs/contracts/mcp-tool-stub-envelope.md +1 -1
  28. package/docs/contracts/measurement-baseline.md +6 -6
  29. package/docs/decisions/ADR-027-changelog-machine-vs-manual.md +129 -0
  30. package/docs/decisions/ADR-028-root-layout.md +147 -0
  31. package/docs/decisions/ADR-029-multi-workspace-deferred.md +122 -0
  32. package/docs/decisions/INDEX.md +8 -0
  33. package/docs/mcp-server.md +1 -1
  34. package/docs/parity/bench-ruflo.json +3 -3
  35. package/docs/parity/ruflo.md +1 -1
  36. package/docs/setup/mcp-client-config.md +1 -1
  37. package/docs/setup/mcp-cloud-endpoints.md +1 -1
  38. package/docs/setup/mcp-cloud-setup.md +2 -2
  39. package/docs/setup/mcp-r2-bootstrap.md +1 -1
  40. package/package.json +1 -1
  41. package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
  42. package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
  43. package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
  44. package/scripts/_lib/bench_caveman.py +2 -2
  45. package/scripts/_lib/bench_caveman_report.py +1 -1
  46. package/scripts/_lib/bench_cost.py +2 -2
  47. package/scripts/_lib/bench_report.py +2 -2
  48. package/scripts/audit_mcp_tools.py +1 -1
  49. package/scripts/bench_baseline_ready.py +3 -3
  50. package/scripts/bench_compress_memory.py +4 -4
  51. package/scripts/bench_drift_check.py +2 -2
  52. package/scripts/bench_per_tool.py +2 -2
  53. package/scripts/bench_run.py +4 -4
  54. package/scripts/build_mcp_registry_manifest.py +2 -2
  55. package/scripts/mcp_server/__init__.py +1 -1
  56. package/scripts/mcp_server/catalog.py +1 -1
  57. package/scripts/mcp_server/consumer_tool_catalog.json +1 -1
  58. package/scripts/mcp_server/tools.py +1 -1
  59. package/scripts/pack_mcp_content.py +6 -6
  60. package/scripts/skill_trigger_eval.py +2 -2
@@ -0,0 +1,147 @@
1
+ ---
2
+ adr: 028
3
+ status: accepted
4
+ date: 2026-05-25
5
+ decision: root-layout
6
+ supersedes: —
7
+ superseded_by: —
8
+ phase: v3.x · root-layout-cleanup Phase 1
9
+ type: structural
10
+ review_date: 2027-05-25
11
+ ---
12
+
13
+ # ADR-028 — Root layout — targeted prune now, multi-workspace deferred behind four audits
14
+
15
+ ## Status
16
+
17
+ **Accepted** · 2026-05-25. Encodes the AI Council verdict from
18
+ [`root-cleanup-organizing-principle-2026-05-25.synthesis.md`](../../agents/runtime/council/sessions/root-cleanup-organizing-principle-2026-05-25.synthesis.md)
19
+ and opens
20
+ [`road-to-root-layout-cleanup.md`](../../agents/roadmaps/archive/road-to-root-layout-cleanup.md) (archived; complete).
21
+ Time-boxed: review on **2027-05-25** or earlier if any trigger below fires.
22
+
23
+ ## Context
24
+
25
+ A request to move "everything not needed at root into `./src/`" failed
26
+ the reality check on three counts:
27
+
28
+ 1. **`./src/` is occupied** — the TypeScript application (CLI · Server · UI · shared) per
29
+ [`ADR-012`](ADR-012-typescript-cli-shell.md) / [`ADR-016`](ADR-016-installer-architecture.md).
30
+ Moving general tooling into `./src/` collides with the app boundary.
31
+ 2. **`router.json` is not at root** — lives under `./dist/`, governed by
32
+ [`ADR-019`](ADR-019-router-json-dist-location.md). Out of scope.
33
+ 3. **`setup.sh` is the curl entry point** — referenced by external installers
34
+ (`bash <(curl …setup.sh)`). Discovery surface, immovable.
35
+
36
+ A council session (2 members, $0.13 actual) reframed the problem:
37
+
38
+ > "Root noise" is not a *discoverability* problem (GitHub paginates
39
+ > anyway, npm consumers never see it). It is a *maintainability*
40
+ > problem — "where do new internal tools belong?"
41
+
42
+ With that reframe, the cheap-and-safe move (Option 1 — targeted prune)
43
+ becomes obvious, and the deep restructure (multi-workspace) drops to a
44
+ conditional follow-up gated by evidence the package does not yet have.
45
+
46
+ ### Consumer-contract surface check (Phase 1 scope)
47
+
48
+ Re-audit of `bench/`, `evals/`, `workers/`, `user-types/` against the
49
+ installer / projector / CI:
50
+
51
+ | Dir | `scripts/install.py` | `scripts/compress.py` projection | `.github/workflows/*` | Verdict |
52
+ |---|---|---|---|---|
53
+ | `bench/` | — | — | `bench-drift.yml` (path filter) | **movable** |
54
+ | `evals/` | — | — | — | **movable** |
55
+ | `workers/` | — | — | `deploy-mcp-worker.yml` (working-dir, 6+ refs) | **movable** (CI updates only) |
56
+ | `user-types/` | `USER_TYPES_DIR = "user-types"` (line 52) | `AUGMENT_SYMLINK_DIRS` includes `"user-types"` | — | **immovable — public contract** |
57
+
58
+ `user-types/` is dropped from Phase 1 — installer + projector reference
59
+ it as a stable root path. Surfaced during execution, not in the
60
+ council's original scope (the council recommended all four; the audit
61
+ narrowed it).
62
+
63
+ ## Decision
64
+
65
+ **Two-phase strategy.** Phase 1 ships now; Phases 2–3 are conditional.
66
+
67
+ ### Phase 1 — Targeted Prune (immediate, ≤ 1 day, no version bump)
68
+
69
+ Move into a new `./internal/` umbrella:
70
+
71
+ - `bench/` → `internal/bench/`
72
+ - `evals/` → `internal/evals/`
73
+ - `workers/` → `internal/workers/`
74
+
75
+ Update:
76
+
77
+ - `.github/workflows/bench-drift.yml` — path filter.
78
+ - `.github/workflows/deploy-mcp-worker.yml` — `working-directory` + `cache-dependency-path`.
79
+ - `taskfiles/engine.yml`, `taskfiles/mcp.yml` — `dir:` references.
80
+ - `AGENTS.md` — placement rule pointer.
81
+
82
+ Outcome: 3 fewer root entries; zero consumer risk; precedent for
83
+ "maintainer-internal → `./internal/`".
84
+
85
+ ### Phase 2 — Pre-audits (gates Phase 3, no time-box)
86
+
87
+ Four audits must complete and pass before Phase 3 opens:
88
+
89
+ 1. **Consumer-contract audit** — GitHub code search +
90
+ `node_modules/@event4u/agent-config/` path probing for
91
+ `scripts/`, `templates/`, `config/`, `schemas/` references.
92
+ 2. **Symlink-mobility test** — verify Cursor / Claude / Windsurf
93
+ honor symlinked projections (`.cursor/` → `./projections/.cursor/`).
94
+ 3. **Hash-sequencing audit** — confirm `.compression-hashes.json`
95
+ uses paths that survive `.agent-src/` relocation (or document the
96
+ regeneration migration).
97
+ 4. **CI-path audit** — every hardcoded path in
98
+ `.github/workflows/*.yml` and `taskfiles/*.yml`.
99
+
100
+ Each audit produces a verdict file under
101
+ `agents/evidence/audits/2026-XX-root-layout-phaseN/`.
102
+
103
+ ### Phase 3 — Conditional multi-workspace (deferred, gated)
104
+
105
+ Only if **all four audits pass**: restructure to npm-workspaces with
106
+ `tooling/` (Python maintainer scripts), `runtime/` (TS app), and
107
+ optionally `projections/` (host-agent configs). If any audit fails,
108
+ Phase 3 closes as "not feasible" and a successor ADR documents the
109
+ blocker.
110
+
111
+ ## Consequences
112
+
113
+ - New top-level `./internal/` directory becomes the home for
114
+ maintainer-only tooling. `AGENTS.md` documents this with one line.
115
+ - `.gitignore` and `eslint`/`pyproject` ignore rules updated as
116
+ needed.
117
+ - The four Phase 2 audits are pre-requirements, not work. They are
118
+ *not* sprint tasks — they run on demand when someone wants to
119
+ re-open multi-workspace.
120
+ - Review on **2027-05-25** or earlier if any trigger fires:
121
+ 1. A new maintainer-only dir is added at root (signal: `./internal/`
122
+ convention is breaking down).
123
+ 2. Phase 2 audits all return clean (signal: Phase 3 is ready).
124
+ 3. A consumer reports breakage from a Phase 1 path change (signal:
125
+ audit missed a contract).
126
+ 4. Council session re-opens the question with new evidence.
127
+
128
+ ## Alternatives considered
129
+
130
+ | Option | Why rejected |
131
+ |---|---|
132
+ | Move everything into `./src/` (original request) | `./src/` is the TS app per ADR-012/016. Collision. |
133
+ | Option 2 — `./tooling/` umbrella with `templates/` / `config/` / `schemas/` | Council: cost underestimated (duplicate-then-deprecate + 2-month window + major bump); installer paths in those dirs are unaudited. |
134
+ | Option 3 — full category-coded migration | Council hard-reject: projection mobility unproven; compression-hash sequencing risk; unshippable without symlink test. |
135
+ | Option 4 — `MAP.md` documentation | Council: adds 51st entry; documentation-as-apology; AGENTS.md already serves this role. |
136
+ | Skip Phase 1, jump to multi-workspace prototype | Loses the cheap visible win; Phase 2 audits unfunded; risks scope creep into Phase 3. |
137
+
138
+ ## References
139
+
140
+ - [`agents/runtime/council/questions/root-cleanup-organizing-principle-2026-05-25.md`](../../agents/runtime/council/questions/root-cleanup-organizing-principle-2026-05-25.md) — council brief.
141
+ - [`agents/runtime/council/sessions/root-cleanup-organizing-principle-2026-05-25.synthesis.md`](../../agents/runtime/council/sessions/root-cleanup-organizing-principle-2026-05-25.synthesis.md) — full synthesis.
142
+ - [`agents/roadmaps/archive/road-to-root-layout-cleanup.md`](../../agents/roadmaps/archive/road-to-root-layout-cleanup.md) — execution roadmap (archived; Phase 1 ✅, Phase 2 ✅, Phase 3 closed).
143
+ - [`agents/evidence/audits/2026-05-root-layout-phase2/`](../../agents/evidence/audits/2026-05-root-layout-phase2/) — Phase 2 audit verdict bundle.
144
+ - [`ADR-029`](ADR-029-multi-workspace-deferred.md) — Phase 3 close-out (multi-workspace deferred indefinitely).
145
+ - [`ADR-012`](ADR-012-typescript-cli-shell.md), [`ADR-016`](ADR-016-installer-architecture.md) — `./src/` is the TS app.
146
+ - [`ADR-019`](ADR-019-router-json-dist-location.md) — `router.json` lives in `./dist/`.
147
+ - `scripts/install.py:52` (`USER_TYPES_DIR`), `scripts/compress.py:1106` (`AUGMENT_SYMLINK_DIRS`) — evidence pinning `user-types/` to root.
@@ -0,0 +1,122 @@
1
+ ---
2
+ adr: 029
3
+ status: accepted
4
+ date: 2026-05-25
5
+ decision: multi-workspace-deferred
6
+ supersedes: —
7
+ superseded_by: —
8
+ phase: v3.x · root-layout-cleanup Phase 3 close-out
9
+ type: structural
10
+ review_date: 2027-05-25
11
+ ---
12
+
13
+ # ADR-029 — Multi-workspace restructure deferred; Phase 3 closed pending L0 symlink-mobility evidence
14
+
15
+ ## Status
16
+
17
+ **Accepted** · 2026-05-25. Successor to
18
+ [`ADR-028`](ADR-028-root-layout.md) Phase 3. Closes the multi-workspace
19
+ restructure (Option 5 from the original council session) as **not
20
+ feasible today**, with a re-open path documented below. Time-boxed:
21
+ review on **2027-05-25** or earlier if any re-open trigger fires.
22
+
23
+ ## Context
24
+
25
+ [`ADR-028`](ADR-028-root-layout.md) defined a three-phase strategy:
26
+
27
+ 1. **Phase 1** — move `bench/`, `evals/`, `workers/` to `internal/`. **Shipped.**
28
+ 2. **Phase 2** — run four pre-audits that gate Phase 3.
29
+ 3. **Phase 3** — conditional multi-workspace restructure
30
+ (`tooling/` · `runtime/` · `projections/`), only if all four
31
+ Phase 2 audits return clean.
32
+
33
+ Phase 2 ran in the same PR as Phase 1 (under maintainer mandate).
34
+ Verdict bundle:
35
+ [`agents/evidence/audits/2026-05-root-layout-phase2/`](../../agents/evidence/audits/2026-05-root-layout-phase2/).
36
+
37
+ | # | Audit | Verdict |
38
+ |---|---|---|
39
+ | 1 | Consumer-contract | ✅ Pass — published surface enumerated |
40
+ | 2 | Symlink-mobility | ⚠️ Partial — subdirectory symlinks proven, top-level untested |
41
+ | 3 | Hash-sequencing | ✅ Pass — source-relative keys, idempotent regeneration |
42
+ | 4 | CI-path inventory | ✅ Pass — ~27 edit points enumerated |
43
+
44
+ Audit 2 is the blocker. The multi-workspace option requires
45
+ **L0 symlinks** (tool root directory itself becomes a symlink, e.g.
46
+ `.cursor/ → projections/.cursor/`). The package today only proves
47
+ **L1 symlinks** (subdirectory level, e.g. `.augment/skills/ →
48
+ ../.agent-src/skills/`). L0 has never been tested against current
49
+ Cursor, Claude Code, or Windsurf builds — and one of those three
50
+ (Augment Code) is already known to refuse symlinked rule files at L1,
51
+ which is the precedent that motivates the audit in the first place.
52
+
53
+ Without L0 evidence, executing Phase 3 would either ship a broken
54
+ projection for at least one host agent or force a fallback to
55
+ per-directory copies that defeat the "single source of truth" win the
56
+ multi-workspace shape is meant to deliver.
57
+
58
+ ## Decision
59
+
60
+ **Defer Phase 3 indefinitely.** Close the Phase 3 roadmap step as "not
61
+ feasible today". Keep the audit bundle as the canonical evidence base
62
+ so that a future maintainer can re-open the question without redoing
63
+ the work.
64
+
65
+ The four root-layout claims survive Phase 1 unchanged:
66
+
67
+ - `bench/`, `evals/`, `workers/` are gone from root (under `internal/`).
68
+ - `user-types/` stays at root (immovable per Audit 1 + ADR-028).
69
+ - Top-level tool roots (`.augment/`, `.cursor/`, `.claude/`, `.clinerules/`)
70
+ stay as real directories with L1 symlinks pointing at `.agent-src/`.
71
+ - The "maintainer-internal → `./internal/`" precedent is the new
72
+ placement rule for new internal dirs (already in `AGENTS.md`).
73
+
74
+ ## Re-open conditions
75
+
76
+ Phase 3 becomes eligible when **all** of these hold:
77
+
78
+ 1. A maintainer (or community contributor) runs the L0 symlink test
79
+ documented in
80
+ [`02-symlink-mobility.md`](../../agents/evidence/audits/2026-05-root-layout-phase2/02-symlink-mobility.md)
81
+ against current Cursor + Claude Code + Windsurf, captures the
82
+ result, and amends Audit 2 to ✅ or ❌.
83
+ 2. If Audit 2 lands ✅: the ~27 CI-path edit points from Audit 4 are
84
+ accepted as in-scope for the migration window; no new hardcoded
85
+ paths added in the interim invalidate the inventory.
86
+ 3. A council session synthesizes the updated verdict bundle and
87
+ produces a fresh recommendation (multi-workspace vs. stay-as-is).
88
+ 4. The maintainer accepts the deprecation cycle cost (installer
89
+ version bump + dual-write window for the projection contract).
90
+
91
+ ## Consequences
92
+
93
+ - The root layout stabilizes at the Phase 1 shape for ≥ 1 year (the
94
+ `2027-05-25` review date).
95
+ - New maintainer-internal directories go under `internal/`. New
96
+ tooling that needs to ship to consumers goes under `scripts/`,
97
+ `config/`, or a new top-level entry that earns its own ADR.
98
+ - The `projections/` umbrella idea is **not dead** — it is gated on
99
+ fresh L0 evidence, not on a new design decision.
100
+ - The four audit files are reusable: Audit 1 (consumer surface),
101
+ Audit 3 (hash portability), and Audit 4 (CI path inventory) remain
102
+ valid until a structural change invalidates them; only Audit 2
103
+ needs runtime re-verification.
104
+
105
+ ## Alternatives considered
106
+
107
+ | Option | Why rejected |
108
+ |---|---|
109
+ | Execute Phase 3 anyway with L0 untested | Ships projection breakage to one of three host agents in the worst case; the win (single source of truth) collapses if any agent forces a fallback to copies. |
110
+ | Run the L0 test in CI | The L0 test requires the host agent's runtime (Cursor / Claude Code IDE plugins); CI cannot exercise it. |
111
+ | Defer **all** of Phase 3 to a separate roadmap | Phase 2 already produced the audit bundle; closing Phase 3 with an ADR captures the verdict without leaving a stale roadmap open. |
112
+ | Re-shape Phase 3 as L1-only | The council's recommendation specifically called out the `projections/` umbrella, which requires L0. Re-shaping to L1-only is a different decision the council did not weigh; would need a fresh council session. |
113
+
114
+ ## References
115
+
116
+ - [`ADR-028`](ADR-028-root-layout.md) — parent decision.
117
+ - [`agents/evidence/audits/2026-05-root-layout-phase2/`](../../agents/evidence/audits/2026-05-root-layout-phase2/) —
118
+ full audit bundle (4 verdict files + README).
119
+ - [`agents/roadmaps/archive/road-to-root-layout-cleanup.md`](../../agents/roadmaps/archive/road-to-root-layout-cleanup.md) —
120
+ execution roadmap, archived (Phase 1 ✅, Phase 2 ✅, Phase 3 closed via this ADR).
121
+ - [`agents/runtime/council/sessions/root-cleanup-organizing-principle-2026-05-25.synthesis.md`](../../agents/runtime/council/sessions/root-cleanup-organizing-principle-2026-05-25.synthesis.md) —
122
+ original council synthesis that proposed the multi-workspace shape.
@@ -25,6 +25,14 @@ _Auto-generated by `scripts/adr/regenerate_index.py`. Do not edit._
25
25
  | [ADR-019](ADR-019-router-json-dist-location.md) | Router Json Dist Location | accepted | 2026-05-23 | — |
26
26
  | [ADR-020](ADR-020-global-only-consumer-scope.md) | Global Only Consumer Scope | accepted | 2026-05-23 | — |
27
27
  | [ADR-021](ADR-021-deployment-shape.md) | Deployment Shape | accepted | 2026-05-24 | — |
28
+ | [ADR-022](ADR-022-daily-workspace-decomposition.md) | Daily Workspace Decomposition | accepted | 2026-05-24 | — |
29
+ | [ADR-023](ADR-023-host-agent-protocol.md) | Host Agent Protocol | accepted | 2026-05-24 | — |
30
+ | [ADR-024](ADR-024-workspace-v0-feature-floor.md) | Workspace V0 Feature Floor | accepted | 2026-05-24 | — |
31
+ | [ADR-025](ADR-025-workspace-chrome.md) | Workspace Chrome | accepted | 2026-05-24 | — |
32
+ | [ADR-026](ADR-026-explain-mode-translation.md) | Explain Mode Translation | accepted | 2026-05-24 | — |
33
+ | [ADR-027](ADR-027-changelog-machine-vs-manual.md) | Changelog Machine Vs Manual | accepted | 2026-05-25 | — |
34
+ | [ADR-028](ADR-028-root-layout.md) | Root Layout | accepted | 2026-05-25 | — |
35
+ | [ADR-029](ADR-029-multi-workspace-deferred.md) | Multi Workspace Deferred | accepted | 2026-05-25 | — |
28
36
 
29
37
  ## Unnumbered (legacy)
30
38
 
@@ -13,7 +13,7 @@ coexist:
13
13
  over JSON-RPC. Used by clients that speak MCP natively. Default for personal
14
14
  installs.
15
15
  - **Remote MCP** *(experimental, opt-in)* — a Cloudflare-hosted TypeScript
16
- Worker (`workers/mcp/`) serves the same wire surface over HTTP/SSE for
16
+ Worker (`internal/workers/mcp/`) serves the same wire surface over HTTP/SSE for
17
17
  hosted-agent platforms. URL shapes pinned in
18
18
  [`docs/setup/mcp-cloud-endpoints.md`](setup/mcp-cloud-endpoints.md);
19
19
  safety contract in
@@ -11,11 +11,11 @@
11
11
  "type": "claimed_upstream_not_verified_in_repo"
12
12
  },
13
13
  "measurement_protocol": {
14
- "corpus": "bench/corpus/* (25-prompt corpus owned by step-4-measurement-and-benchmark.md)",
14
+ "corpus": "internal/bench/corpus/* (25-prompt corpus owned by step-4-measurement-and-benchmark.md)",
15
15
  "tracker": "scripts/cost/track.mjs",
16
- "pricing": "bench/pricing.yaml",
16
+ "pricing": "internal/bench/pricing.yaml",
17
17
  "session_source": "~/.claude/projects/*/sessions/*.jsonl (Claude Code-native, no manual tracking)",
18
- "tokens_to_dollars": "track.mjs multiplies input/output/cache-read/cache-write tokens by per-1M pricing from bench/pricing.yaml, separated by model id",
18
+ "tokens_to_dollars": "track.mjs multiplies input/output/cache-read/cache-write tokens by per-1M pricing from internal/bench/pricing.yaml, separated by model id",
19
19
  "headline_output": "average dollar cost per 25-prompt run, with min / max / p50 / p90 across N reports"
20
20
  },
21
21
  "current_window": {
@@ -23,7 +23,7 @@ soak in [`bench.json`](bench.json) flips from `warmup` to `baseline_ready`
23
23
 
24
24
  | # | Ruflo pattern | Verdict | Evidence |
25
25
  |---|---|---|---|
26
- | 1 | **Cost-tracker plugin** — real model pricing, per-1M, separated input/output/cache | `[x] covered by` | [`scripts/cost/track.mjs`](../../scripts/cost/track.mjs) + [`bench/pricing.yaml`](../../bench/pricing.yaml) (Haiku/Sonnet/Opus per-1M, input/output/cache-read/cache-write split). Step-11 Phase 1. |
26
+ | 1 | **Cost-tracker plugin** — real model pricing, per-1M, separated input/output/cache | `[x] covered by` | [`scripts/cost/track.mjs`](../../scripts/cost/track.mjs) + [`internal/bench/pricing.yaml`](../../bench/pricing.yaml) (Haiku/Sonnet/Opus per-1M, input/output/cache-read/cache-write split). Step-11 Phase 1. |
27
27
  | 2 | **Auto-capture from session jsonl** — reads Claude Code log, no manual tracking | `[x] covered by` | [`scripts/cost/track.mjs`](../../scripts/cost/track.mjs) reads `~/.claude/projects/*/sessions/*.jsonl` automatically. Step-11 Phase 1 Step 1. |
28
28
  | 3 | **50/75/90/100 % budget ladder with hard stop** | `[x] covered by` | [`scripts/cost/budget.mjs`](../../scripts/cost/budget.mjs) — exit codes 0/1/2/3 per tier; opt-in fail-closed via `cost.enforcement` setting. Fixtures: `tests/fixtures/cost/budget/{under-50,at-100,over-100}/`. Step-11 Phase 2. |
29
29
  | 4 | **Measured-vs-claimed disclaimer** — every percentage tagged "claimed upstream" | `[x] covered by` | One-line `**Measured-vs-claimed disclaimer:**` header block on all 9 active roadmaps in `agents/roadmaps/`. Verified 2026-05-16. Step-11 Phase 5 Step 4. |
@@ -5,7 +5,7 @@ Worker. Read-only, identity-stable per release. Optional Bearer-token
5
5
  auth — see [§ Bearer auth](#bearer-auth) below.
6
6
 
7
7
  > **No public endpoint.** This package ships the Worker source under
8
- > `workers/mcp/`, but does **not** operate a shared hosted MCP server.
8
+ > `internal/workers/mcp/`, but does **not** operate a shared hosted MCP server.
9
9
  > Deploy your own per [`mcp-cloud-setup.md`](mcp-cloud-setup.md) — your
10
10
  > URL will be `https://agent-config-mcp.<your-account>.workers.dev`
11
11
  > (or a custom domain you wire up in Step 7).
@@ -69,7 +69,7 @@ curl -s -X POST https://mcp.<your-domain>/ \
69
69
  ```
70
70
 
71
71
  After DNS is live, uncomment the `routes` block in
72
- `workers/mcp/wrangler.toml` and redeploy via `wrangler deploy` (or let
72
+ `internal/workers/mcp/wrangler.toml` and redeploy via `wrangler deploy` (or let
73
73
  the GitHub Action pick it up on the next release).
74
74
 
75
75
  The fallback `*.workers.dev` URL stays live for free; the custom
@@ -82,7 +82,7 @@ Dashboard → **My Profile → API Tokens → Create Token → Custom token**:
82
82
  | Account · Workers R2 Storage | your account | Edit |
83
83
  | User · User Details | — | Read |
84
84
 
85
- If you uncomment the `routes` block in `workers/mcp/wrangler.toml`
85
+ If you uncomment the `routes` block in `internal/workers/mcp/wrangler.toml`
86
86
  (custom domain cutover, Phase 5.2), add **Zone · DNS · Edit** on the
87
87
  relevant zone.
88
88
 
@@ -180,4 +180,4 @@ setup. Until cutover, the Worker serves on the free
180
180
  - [`docs/contracts/mcp-cloud-scope.md`](../contracts/mcp-cloud-scope.md) — A0-cloud contract
181
181
  - [`docs/setup/mcp-r2-bootstrap.md`](mcp-r2-bootstrap.md) — R2 layout & break-glass
182
182
  - [`docs/setup/mcp-cloud-endpoints.md`](mcp-cloud-endpoints.md) — URL shapes & DNS
183
- - [`workers/mcp/README.md`](../../workers/mcp/README.md) — Worker source overview
183
+ - [`internal/workers/mcp/README.md`](../../internal/workers/mcp/README.md) — Worker source overview
@@ -44,7 +44,7 @@ npx wrangler r2 bucket create agent-config-mcp
44
44
  npx wrangler r2 bucket list | grep agent-config-mcp
45
45
  ```
46
46
 
47
- The Worker binding is declared in `workers/mcp/wrangler.toml` under
47
+ The Worker binding is declared in `internal/workers/mcp/wrangler.toml` under
48
48
  `[[r2_buckets]]`. The pipeline reads/writes via the wrangler CLI in CI,
49
49
  not via the Worker — A0-cloud invariant 2 forbids the Worker from
50
50
  issuing R2 writes.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@event4u/agent-config",
3
- "version": "3.2.0",
3
+ "version": "3.3.0",
4
4
  "description": "Universal AI Agent OS \u2014 audited skills, governance rules, commands, and templates for AI coding tools (Claude Code, Cursor, Windsurf, Copilot).",
5
5
  "license": "MIT",
6
6
  "private": false,
@@ -1,6 +1,6 @@
1
1
  # Caveman compression bench — step-16 Phase 1 Step 4.
2
2
  #
3
- # Three-arm live bench against bench/corpora/caveman/prompts.yaml:
3
+ # Three-arm live bench against internal/bench/corpora/caveman/prompts.yaml:
4
4
  # compressed — system prompt embeds caveman-speak rule (aggressive).
5
5
  # terse_control — system prompt = "Answer concisely. …" (carve-out-free baseline).
6
6
  # uncompressed — generic helpful-assistant system prompt.
@@ -131,7 +131,7 @@ class PromptResult:
131
131
  # ── corpus + runner ────────────────────────────────────────────────────
132
132
 
133
133
  def load_corpus(corpus_path: Path) -> list[dict[str, Any]]:
134
- """Read bench/corpora/caveman/prompts.yaml → list of prompt dicts."""
134
+ """Read internal/bench/corpora/caveman/prompts.yaml → list of prompt dicts."""
135
135
  data = yaml.safe_load(corpus_path.read_text(encoding="utf-8")) or {}
136
136
  prompts = data.get("prompts") or []
137
137
  if not prompts:
@@ -144,7 +144,7 @@ def render_caveman_markdown(report: dict[str, Any]) -> str:
144
144
  "## Notes",
145
145
  "",
146
146
  f"- corpus: `{report['corpus']['path']}`",
147
- f"- pricing: `bench/pricing.yaml` (sourced {cost.get('pricing_sourced_on') or '—'})",
147
+ f"- pricing: `internal/bench/pricing.yaml` (sourced {cost.get('pricing_sourced_on') or '—'})",
148
148
  f"- schema: `caveman-v1` (see `docs/contracts/benchmark-report-schema.md`)",
149
149
  f"- bench_run version: `{report['runner']['bench_run_version']}`",
150
150
  "",
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # Reads Claude Code session jsonl summaries (one summary line per session)
4
4
  # from agents/cost-tracking/sessions.jsonl — produced by scripts/cost/track.mjs
5
- # — and aggregates totals using model rates from bench/pricing.yaml.
5
+ # — and aggregates totals using model rates from internal/bench/pricing.yaml.
6
6
  #
7
7
  # Returns the dict shape declared in docs/contracts/benchmark-report-schema.md
8
8
  # § JSON schema (v1) `cost`. When the source jsonl is missing, returns the
@@ -24,7 +24,7 @@ TIER_KEYS = ("haiku", "sonnet", "opus", UNKNOWN_TIER)
24
24
 
25
25
 
26
26
  def load_pricing(pricing_path: Path) -> tuple[dict[str, dict[str, float]], str | None]:
27
- """Return ({tier: rates}, oldest_sourced_on) from bench/pricing.yaml."""
27
+ """Return ({tier: rates}, oldest_sourced_on) from internal/bench/pricing.yaml."""
28
28
  if yaml is None or not pricing_path.is_file():
29
29
  return {}, None
30
30
  data = yaml.safe_load(pricing_path.read_text(encoding="utf-8")) or {}
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # Serializes the unified report dict to JSON + Markdown per
4
4
  # docs/contracts/benchmark-report-schema.md. Filename format:
5
- # `bench/reports/<UTC ISO-8601 with : -> ->-<corpus_id>.{json,md}`.
5
+ # `internal/bench/reports/<UTC ISO-8601 with : -> ->-<corpus_id>.{json,md}`.
6
6
  """Report emitter for the bench runner."""
7
7
  from __future__ import annotations
8
8
 
@@ -133,7 +133,7 @@ def render_markdown(report: dict[str, Any]) -> str:
133
133
  notes = (
134
134
  "## Notes\n\n"
135
135
  f"- corpus path: `{corpus['path']}` · prompts: **{corpus['prompt_count']}**\n"
136
- f"- pricing: `bench/pricing.yaml`\n"
136
+ f"- pricing: `internal/bench/pricing.yaml`\n"
137
137
  f"- baseline collector: `{report['runner']['baseline_collector']}`\n"
138
138
  )
139
139
  return "\n\n".join([
@@ -105,7 +105,7 @@ def _render(catalog: dict, handlers: dict[str, int], cat_lines: dict[str, int])
105
105
  lines.append("## Glossary")
106
106
  lines.append("")
107
107
  lines.append("- **Side-effect** — `ro` (read-only) · `fs-write` (filesystem write) · `shell` (spawns processes).")
108
- lines.append("- **Transports** — `stdio` (`scripts/mcp_server/`) · `worker` (`workers/mcp/`). A tool may live on both.")
108
+ lines.append("- **Transports** — `stdio` (`scripts/mcp_server/`) · `worker` (`internal/workers/mcp/`). A tool may live on both.")
109
109
  lines.append("- **Stub** — catalog-listed for discovery; returns the `not_implemented` envelope from")
110
110
  lines.append(" [`mcp-tool-stub-envelope.md`](mcp-tool-stub-envelope.md) until promoted.")
111
111
  lines.append("")
@@ -2,7 +2,7 @@
2
2
  """Baseline-closure check — step-4 Phase 3 Step 4.
3
3
 
4
4
  Returns exit 0 iff the 60-day clock has elapsed since
5
- `bench/baseline-start.txt` AND `bench/reports/` contains at least
5
+ `internal/bench/baseline-start.txt` AND `internal/bench/reports/` contains at least
6
6
  `--min-reports` complete runs for the named corpus (default 30).
7
7
 
8
8
  Read by P2 enforcement roadmaps as their precondition (G1 gate in
@@ -50,8 +50,8 @@ def main(argv: list[str] | None = None) -> int:
50
50
  formatter_class=argparse.RawDescriptionHelpFormatter,
51
51
  )
52
52
  ap.add_argument("--corpus", default="dev")
53
- ap.add_argument("--reports-dir", default="bench/reports")
54
- ap.add_argument("--baseline-file", default="bench/baseline-start.txt")
53
+ ap.add_argument("--reports-dir", default="internal/bench/reports")
54
+ ap.add_argument("--baseline-file", default="internal/bench/baseline-start.txt")
55
55
  ap.add_argument("--min-days", type=int, default=60)
56
56
  ap.add_argument("--min-reports", type=int, default=30)
57
57
  ap.add_argument("--json", action="store_true")
@@ -3,10 +3,10 @@
3
3
 
4
4
  Runs `compress_memory.py` over a fixed corpus of memory-target files, records
5
5
  pre/post char counts, approximates input-token savings (chars / 4 — the
6
- GPT-4 / Claude rule of thumb), and emits `bench/reports/caveman-v2.{json,md}`.
6
+ GPT-4 / Claude rule of thumb), and emits `internal/bench/reports/caveman-v2.{json,md}`.
7
7
 
8
8
  Offline (no API calls). Cadence-aligned with `docs/benchmarks.md`. Citation
9
- in `bench/reports/caveman-v2.md` notes the chars→tokens approximation and
9
+ in `internal/bench/reports/caveman-v2.md` notes the chars→tokens approximation and
10
10
  points at upstream tiktoken / claude-tokenizer if a calibrated number is
11
11
  later needed.
12
12
  """
@@ -23,8 +23,8 @@ from pathlib import Path
23
23
 
24
24
  REPO_ROOT = Path(__file__).resolve().parent.parent
25
25
  COMPRESS_SCRIPT = REPO_ROOT / "scripts" / "compress_memory.py"
26
- REPORT_JSON = REPO_ROOT / "bench" / "reports" / "caveman-v2.json"
27
- REPORT_MD = REPO_ROOT / "bench" / "reports" / "caveman-v2.md"
26
+ REPORT_JSON = REPO_ROOT / "internal" / "bench" / "reports" / "caveman-v2.json"
27
+ REPORT_MD = REPO_ROOT / "internal" / "bench" / "reports" / "caveman-v2.md"
28
28
 
29
29
  CORPUS: list[tuple[str, str]] = [
30
30
  ("AGENTS.md", "thin-root-package"),
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  """Drift detector for the bench corpus — step-4 Phase 3 Step 2.
3
3
 
4
- Compares the latest `bench/reports/<stamp>-<corpus>.json` against the
4
+ Compares the latest `internal/bench/reports/<stamp>-<corpus>.json` against the
5
5
  previous N reports (default 5) for the same corpus. Drift defined as:
6
6
 
7
7
  - selection-accuracy: latest is more than `accuracy_drop_pp` below
@@ -99,7 +99,7 @@ def _check(latest: dict[str, Any], baseline: list[dict[str, Any]],
99
99
  def main(argv: list[str] | None = None) -> int:
100
100
  ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
101
101
  ap.add_argument("--corpus", default="dev")
102
- ap.add_argument("--reports-dir", default="bench/reports")
102
+ ap.add_argument("--reports-dir", default="internal/bench/reports")
103
103
  ap.add_argument("--window", type=int, default=5, help="rolling window size (default 5)")
104
104
  ap.add_argument("--accuracy-drop-pp", type=float, default=5.0)
105
105
  ap.add_argument("--cost-increase-pct", type=float, default=20.0)
@@ -43,7 +43,7 @@ from bench_runner import rank_skills # type: ignore # noqa: E402
43
43
 
44
44
  REPO_ROOT = Path(__file__).resolve().parent.parent
45
45
  CORPUS_DIR = REPO_ROOT / "tests" / "eval"
46
- REPORTS_DIR = REPO_ROOT / "bench" / "reports"
46
+ REPORTS_DIR = REPO_ROOT / "internal" / "bench" / "reports"
47
47
 
48
48
  # tool_id -> (skills_root, kind). kind = "skills" | "rules_only" | "single_file".
49
49
  SURFACES: dict[str, tuple[Path, str]] = {
@@ -185,7 +185,7 @@ def main(argv=None) -> int:
185
185
  ap.add_argument("--threshold", type=float, default=0.85)
186
186
  ap.add_argument("--json", action="store_true")
187
187
  ap.add_argument("--write-report", action="store_true",
188
- help="emit bench/reports/<ts>-<corpus>-projection.{json,md}")
188
+ help="emit internal/bench/reports/<ts>-<corpus>-projection.{json,md}")
189
189
  args = ap.parse_args(argv)
190
190
 
191
191
  corpus_path = CORPUS_DIR / f"corpus-{args.corpus}.yaml"
@@ -5,7 +5,7 @@ Wraps the selection-accuracy baseline collector (`scripts/bench_runner.py`),
5
5
  captures token / cost data from `agents/cost-tracking/sessions.jsonl` if
6
6
  present (per ruflo pattern, external-findings § 2), runs structural
7
7
  quality assertions per prompt, and emits a versioned JSON + Markdown
8
- report under `bench/reports/` per
8
+ report under `internal/bench/reports/` per
9
9
  `docs/contracts/benchmark-report-schema.md`.
10
10
 
11
11
  Usage:
@@ -46,11 +46,11 @@ except ImportError:
46
46
  sys.exit(2)
47
47
 
48
48
  BENCH_RUN_VERSION = "0.2.0"
49
- PRICING_PATH = REPO_ROOT / "bench" / "pricing.yaml"
49
+ PRICING_PATH = REPO_ROOT / "internal" / "bench" / "pricing.yaml"
50
50
  SESSIONS_JSONL = REPO_ROOT / "agents" / "cost-tracking" / "sessions.jsonl"
51
- REPORTS_DIR = REPO_ROOT / "bench" / "reports"
51
+ REPORTS_DIR = REPO_ROOT / "internal" / "bench" / "reports"
52
52
  CORPUS_DIR = REPO_ROOT / "tests" / "eval"
53
- CAVEMAN_CORPUS = REPO_ROOT / "bench" / "corpora" / "caveman" / "prompts.yaml"
53
+ CAVEMAN_CORPUS = REPO_ROOT / "internal" / "bench" / "corpora" / "caveman" / "prompts.yaml"
54
54
  BASELINE_COLLECTOR = REPO_ROOT / "scripts" / "bench_runner.py"
55
55
 
56
56
 
@@ -4,7 +4,7 @@
4
4
  Reads three on-disk sources:
5
5
  * `package.json` — name, version, description, homepage, repository
6
6
  * `.github/topics.yml` — topics list (for registries that accept tags)
7
- * `workers/mcp/content.json` — `tool_catalog` (tools_count, install_hint_stdio)
7
+ * `internal/workers/mcp/content.json` — `tool_catalog` (tools_count, install_hint_stdio)
8
8
  * `dist/discovery/discovery-manifest.json` — artefact_count + scanner_version (HARD prereq per AI-Council R5)
9
9
 
10
10
  Emits:
@@ -37,7 +37,7 @@ import yaml
37
37
  ROOT = Path(__file__).resolve().parents[1]
38
38
  PKG_FILE = ROOT / "package.json"
39
39
  TOPICS_FILE = ROOT / ".github" / "topics.yml"
40
- CONTENT_FILE = ROOT / "workers" / "mcp" / "content.json"
40
+ CONTENT_FILE = ROOT / "internal" / "workers" / "mcp" / "content.json"
41
41
  DISCOVERY_FILE = ROOT / "dist" / "discovery" / "discovery-manifest.json"
42
42
  OUT_DIR = ROOT / "dist" / "mcp"
43
43
  OUT_MANIFEST = OUT_DIR / "registry-manifest.json"
@@ -2,7 +2,7 @@
2
2
 
3
3
  mcp_scope: full — local stdio access can be extended to tool execution
4
4
  under the Phase 7 wake-up triggers in `docs/contracts/mcp-cloud-scope.md`.
5
- The hosted Worker (`workers/mcp/`) is `mcp_scope: lite` and is
5
+ The hosted Worker (`internal/workers/mcp/`) is `mcp_scope: lite` and is
6
6
  intentionally narrower.
7
7
 
8
8
  Exposes a hand-picked subset of `.agent-src/skills/` as MCP `prompts`
@@ -108,7 +108,7 @@ def not_implemented_envelope(
108
108
  ) -> dict[str, Any]:
109
109
  """Wire-shape error envelope used when a stub is invoked.
110
110
 
111
- Mirrored verbatim by the Cloud Worker (`workers/mcp/src/stubs.ts`).
111
+ Mirrored verbatim by the Cloud Worker (`internal/workers/mcp/src/stubs.ts`).
112
112
  """
113
113
  return {
114
114
  "code": NOT_IMPLEMENTED_CODE,
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "schema_version": 1,
3
- "description": "Source-of-truth catalog of consumer-relevant MCP tools. Read by the stdio server (scripts/mcp_server/) and packed into the Cloud Worker bundle (workers/mcp/). Phase 1 of road-to-mcp-full-coverage: tools without 'implemented' transports return the 'not_implemented' envelope defined in docs/contracts/mcp-tool-stub-envelope.md. The 'implemented_on' field lists transports where the real handler is wired; everything else is a discovery stub. See agents/roadmaps/archive/road-to-mcp-full-coverage.md.",
3
+ "description": "Source-of-truth catalog of consumer-relevant MCP tools. Read by the stdio server (scripts/mcp_server/) and packed into the Cloud Worker bundle (internal/workers/mcp/). Phase 1 of road-to-mcp-full-coverage: tools without 'implemented' transports return the 'not_implemented' envelope defined in docs/contracts/mcp-tool-stub-envelope.md. The 'implemented_on' field lists transports where the real handler is wired; everything else is a discovery stub. See agents/roadmaps/archive/road-to-mcp-full-coverage.md.",
4
4
  "install_hint_stdio": "pip install agent-config[mcp] && ./agent-config mcp:run",
5
5
  "tools": [
6
6
  {
@@ -43,7 +43,7 @@ from .catalog import (
43
43
  from .telemetry import Outcome, record_call
44
44
 
45
45
  # Stable transport tag for the stub envelope. Mirrored verbatim by
46
- # `workers/mcp/src/stubs.ts` with ``"worker"``.
46
+ # `internal/workers/mcp/src/stubs.ts` with ``"worker"``.
47
47
  STDIO_TRANSPORT = "stdio"
48
48
 
49
49
  # Allowlisted directories (relative to consumer_root) where tool writes