@event4u/agent-config 5.6.1 → 5.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/cost-report.md +12 -7
- package/.agent-src/commands/prediction-pool.md +215 -0
- package/.agent-src/commands/set-cost-profile.md +8 -8
- package/.agent-src/commands/sync-agent-settings.md +2 -2
- package/.agent-src/presets/README.md +1 -1
- package/.agent-src/profiles/README.md +1 -1
- package/.agent-src/rules/non-destructive-by-default.md +2 -1
- package/.agent-src/skills/prediction-pool-optimizer/SKILL.md +196 -0
- package/.agent-src/skills/prediction-pool-optimizer/evals/triggers.json +18 -0
- package/.agent-src/skills/prediction-pool-optimizer/reference/ev-fixtures.md +80 -0
- package/.agent-src/templates/agent-settings.md +7 -7
- package/.agent-src/templates/agents/agent-project-settings.example.yml +2 -2
- package/.agent-src/templates/scripts/work_engine/_lib/agent_settings.py +2 -1
- package/.agent-src/templates/scripts/work_engine/hook_bootstrap.py +1 -1
- package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py +9 -7
- package/.agent-src/templates/scripts/work_engine/hooks/settings.py +9 -10
- package/.agent-src/templates/scripts/work_engine/scoring/memory_visibility.py +17 -4
- package/.claude-plugin/marketplace.json +3 -1
- package/CHANGELOG.md +48 -0
- package/README.md +2 -2
- package/config/agent-settings.template.yml +11 -2
- package/config/discovery/packs.yml +11 -0
- package/config/discovery/workspaces.yml +1 -1
- package/config/profiles/balanced.ini +1 -1
- package/config/profiles/full.ini +1 -1
- package/config/profiles/minimal.ini +1 -1
- package/dist/discovery/deprecation-report.md +1 -1
- package/dist/discovery/discovery-manifest.json +80 -14
- package/dist/discovery/discovery-manifest.json.sha256 +1 -1
- package/dist/discovery/discovery-manifest.summary.md +3 -2
- package/dist/discovery/orphan-report.md +1 -1
- package/dist/discovery/packs.json +34 -3
- package/dist/discovery/trust-report.md +2 -2
- package/dist/discovery/workspaces.json +13 -4
- package/dist/mcp/registry-manifest.json +2 -2
- package/dist/server/io/substituteTemplate.js +3 -3
- package/dist/server/io/substituteTemplate.js.map +1 -1
- package/dist/server/routes/settings.js +2 -2
- package/dist/server/routes/settings.js.map +1 -1
- package/dist/server/schemas/settings.js +4 -2
- package/dist/server/schemas/settings.js.map +1 -1
- package/dist/ui/assets/{index-DVsyUMZe.js → index-5lFqAKL0.js} +2 -2
- package/dist/ui/assets/index-5lFqAKL0.js.map +1 -0
- package/dist/ui/index.html +1 -1
- package/docs/architecture/current-onboard-baseline.md +3 -3
- package/docs/architecture.md +2 -2
- package/docs/catalog.md +7 -5
- package/docs/contracts/adr-level-6-productization.md +1 -1
- package/docs/contracts/config-presets.md +2 -2
- package/docs/contracts/cost-profile-defaults.md +5 -5
- package/docs/contracts/discovery-manifest.schema.json +1 -1
- package/docs/contracts/explain-trace.schema.json +3 -3
- package/docs/contracts/memory-visibility-v1.md +15 -7
- package/docs/contracts/profile-system.md +2 -2
- package/docs/contracts/settings-api.md +3 -3
- package/docs/contracts/value-report-schema.md +14 -1
- package/docs/customization.md +21 -5
- package/docs/decisions/ADR-010-profile-pack-preset-boundary.md +11 -11
- package/docs/decisions/ADR-013-discovery-frontmatter-contract.md +16 -2
- package/docs/decisions/ADR-034-per-skill-model-recommendation-transport.md +1 -1
- package/docs/decisions/ADR-036-global-install-browser-wizard-handoff.md +106 -0
- package/docs/decisions/ADR-037-cost-profile-untangle.md +117 -0
- package/docs/decisions/ADR-rule-kernel-and-router.md +1 -1
- package/docs/decisions/INDEX.md +2 -0
- package/docs/getting-started.md +2 -2
- package/docs/guidelines/agent-infra/layered-settings.md +2 -2
- package/docs/installation.md +3 -3
- package/docs/setup/mcp-client-config.md +1 -1
- package/docs/value.md +9 -7
- package/docs/wizard.md +1 -1
- package/package.json +1 -1
- package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
- package/scripts/_cli/cmd_explain.py +1 -1
- package/scripts/_cli/explain_last/inputs.py +11 -8
- package/scripts/_cli/explain_last/sections/inputs.py +1 -1
- package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
- package/scripts/_lib/agent_settings.py +2 -1
- package/scripts/_lib/value_ladder.py +99 -2
- package/scripts/_lib/value_report.py +30 -16
- package/scripts/ai_council/modes.py +1 -1
- package/scripts/audit_initial_context.py +16 -0
- package/scripts/check_skill_requires.py +143 -0
- package/scripts/condense.py +13 -2
- package/scripts/first-run.sh +11 -11
- package/scripts/install +14 -1
- package/scripts/install.py +127 -428
- package/scripts/install_anthropic_key.sh +1 -1
- package/scripts/install_openai_key.sh +1 -1
- package/scripts/lint_discovery_vocabulary.py +5 -5
- package/scripts/lint_value_dashboard.py +1 -1
- package/scripts/prediction-pool/adapters/_schema.md +42 -0
- package/scripts/prediction-pool/adapters/kicktipp.yml +23 -0
- package/scripts/prediction-pool/poisson_sim.py +167 -0
- package/scripts/render_value_md.py +1 -0
- package/scripts/schemas/agent-settings.schema.json +77 -0
- package/scripts/schemas/skill.schema.json +7 -0
- package/scripts/smoke_quickstart.py +4 -4
- package/scripts/sync_agent_settings.py +4 -2
- package/scripts/validate_agent_settings.py +120 -0
- package/templates/minimal/.agent-settings.yml +1 -1
- package/dist/ui/assets/index-DVsyUMZe.js.map +0 -1
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
---
|
|
2
|
+
adr: 037
|
|
3
|
+
status: accepted
|
|
4
|
+
date: 2026-06-01
|
|
5
|
+
decision: cost-profile-untangle
|
|
6
|
+
supersedes: —
|
|
7
|
+
superseded_by: —
|
|
8
|
+
phase: cost-profile-untangle
|
|
9
|
+
type: structural
|
|
10
|
+
review_date: 2026-09-01
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# ADR-037 — Untangle `cost_profile` into single-purpose settings
|
|
14
|
+
|
|
15
|
+
## Status
|
|
16
|
+
|
|
17
|
+
**Accepted** · 2026-06-01. AI Council (anthropic/claude-sonnet-4-5 +
|
|
18
|
+
openai/gpt-4o, analysis lens, 3 rounds, 2026-06-01) converged on the
|
|
19
|
+
rename + keep-decoupled design below; findings were critically evaluated
|
|
20
|
+
against the codebase before adoption.
|
|
21
|
+
|
|
22
|
+
## Context
|
|
23
|
+
|
|
24
|
+
The `cost_profile` setting had accreted multiple responsibilities plus a
|
|
25
|
+
hard naming collision:
|
|
26
|
+
|
|
27
|
+
1. **Canonical meaning** — rule-tier loading (`minimal | balanced | full |
|
|
28
|
+
custom`), a token-footprint lever resolved from `dist/router.json`.
|
|
29
|
+
2. **Colliding second meaning** — a separate contract
|
|
30
|
+
(`memory-visibility-v1`) and the work-engine hook read the *same key*
|
|
31
|
+
`cost_profile` with an incompatible value set (`lean | standard |
|
|
32
|
+
verbose`) to gate the `🧠 Memory` visibility-line cadence. Because no
|
|
33
|
+
real install ever wrote `cost_profile: lean`, the suppress branch was
|
|
34
|
+
**unreachable dead code**.
|
|
35
|
+
3. **Default drift** — the default was declared in four places with three
|
|
36
|
+
different answers (`balanced`, `minimal`, `standard`).
|
|
37
|
+
4. **Migrated intent** — `cost_profile` was originally meant to gate
|
|
38
|
+
self-optimization; that capability had already moved to the independent
|
|
39
|
+
`pipelines.skill_improvement` setting + tier-2a rule loading.
|
|
40
|
+
5. **Naming confusion** — four "cost"-sounding concepts (`cost_profile`,
|
|
41
|
+
the memory cadence, `/cost:report` + `cost.budgets`, and
|
|
42
|
+
`model_tier`/`model.auto_switch`) competed for one mental slot; only the
|
|
43
|
+
first two were literally named `cost_profile`, and the second was the
|
|
44
|
+
broken one.
|
|
45
|
+
|
|
46
|
+
The root cause was the absence of a settings schema: nothing prevented one
|
|
47
|
+
key from carrying two value vocabularies.
|
|
48
|
+
|
|
49
|
+
## Decision
|
|
50
|
+
|
|
51
|
+
1. **One key, one job.** Rename the canonical setting `cost_profile` →
|
|
52
|
+
`rule_loading_tier` (values unchanged: `minimal | balanced | full |
|
|
53
|
+
custom`). The name now describes the mechanism, not a side effect, which
|
|
54
|
+
makes the `/cost:report` mismatch obvious ("you can't lower your rule
|
|
55
|
+
loading tier to save money without losing guardrails").
|
|
56
|
+
2. **Memory cadence owns its own key.** The visibility-line cadence moves
|
|
57
|
+
to `memory.cadence` (`auto | always | never`, default `always` —
|
|
58
|
+
behaviour-neutral; the previously-dead suppress path is now reachable as
|
|
59
|
+
`auto`). Named `memory.cadence`, not `memory_status`, to avoid a clash
|
|
60
|
+
with the existing `scripts/memory_status.py`.
|
|
61
|
+
3. **Self-optimization stays decoupled.** `pipelines.skill_improvement`
|
|
62
|
+
remains the independent lever; `rule_loading_tier` is *not* re-coupled to
|
|
63
|
+
learning behaviour. The council was unanimous that coupling token
|
|
64
|
+
footprint to learning behaviour is the wrong axis.
|
|
65
|
+
4. **Schema prevents recurrence.** A new
|
|
66
|
+
`scripts/schemas/agent-settings.schema.json` (+ CI validator) enum-
|
|
67
|
+
constrains the value-bearing keys so a value-vocabulary collision is a
|
|
68
|
+
hard CI failure, not silent dead code.
|
|
69
|
+
5. **Migration, not break.** `install.py`'s `LEGACY_RENAME_MAP` rewrites
|
|
70
|
+
`cost_profile` → `rule_loading_tier`; loaders read the legacy key as a
|
|
71
|
+
fallback during a grace period. The default is consolidated to one
|
|
72
|
+
source of truth (`balanced`).
|
|
73
|
+
|
|
74
|
+
### Deliberately *not* done (scope discipline)
|
|
75
|
+
|
|
76
|
+
- **Command + file names kept** (`/set-cost-profile`,
|
|
77
|
+
`cost-profile-defaults.md`) — renaming them cascades through the
|
|
78
|
+
ownership-matrix, command-surface, discovery manifest, and marketplace
|
|
79
|
+
(all CI-enforced/generated); the cost/benefit did not clear the bar.
|
|
80
|
+
Their *content* now uses `rule_loading_tier`.
|
|
81
|
+
- **`custom` left unchanged** — it is absent from `dist/router.json`
|
|
82
|
+
profiles (documented but not router-dispatched). Its implementation is a
|
|
83
|
+
pre-existing question, tracked as a follow-up, not part of this rename.
|
|
84
|
+
- **`dist/router.json` `profiles` key kept** — it is the tier-list
|
|
85
|
+
structure, not the setting name; consumers may parse it.
|
|
86
|
+
|
|
87
|
+
## Consequences
|
|
88
|
+
|
|
89
|
+
- The silent dead-code bug is gone; the memory cadence is reachable and
|
|
90
|
+
tested.
|
|
91
|
+
- A future settings-key collision fails CI immediately.
|
|
92
|
+
- Existing installs migrate automatically; no manual action.
|
|
93
|
+
- Internal naming is slightly inconsistent (command/file still say
|
|
94
|
+
"cost-profile" while the setting is `rule_loading_tier`) — an accepted
|
|
95
|
+
trade-off against a high-churn, low-value rename of generated surfaces.
|
|
96
|
+
- Breaking change to a public settings key → next major (see
|
|
97
|
+
`BREAKING_CHANGES.md`).
|
|
98
|
+
|
|
99
|
+
## Alternatives considered
|
|
100
|
+
|
|
101
|
+
- **Re-couple `cost_profile` to self-optimization** (the original intent) —
|
|
102
|
+
rejected: couples two orthogonal axes (token footprint + learning
|
|
103
|
+
behaviour); the council and the existing decoupled architecture both
|
|
104
|
+
argue against it.
|
|
105
|
+
- **Minimal fix — resolve only the collision, keep the `cost_profile`
|
|
106
|
+
name** — rejected: leaves the misleading "cost" name and so leaves part
|
|
107
|
+
of the four-concept confusion intact.
|
|
108
|
+
- **Rename the command + doc files too** — deferred: disproportionate
|
|
109
|
+
cascade through CI-enforced manifests for marginal benefit.
|
|
110
|
+
|
|
111
|
+
## References
|
|
112
|
+
|
|
113
|
+
- `BREAKING_CHANGES.md` (next-major entry).
|
|
114
|
+
- `docs/contracts/memory-visibility-v1.md` § Cadence interaction.
|
|
115
|
+
- `docs/contracts/cost-profile-defaults.md` (rule-loading defaults).
|
|
116
|
+
- `scripts/schemas/agent-settings.schema.json` + `scripts/validate_agent_settings.py`.
|
|
117
|
+
- ADR-010 (profile / pack / preset boundary) — `cost_profile` axis renamed.
|
|
@@ -74,7 +74,7 @@ size budgets.
|
|
|
74
74
|
| `balanced` *(default)* | kernel + tier-1 | Day-to-day engineering work; matches pre-roadmap behaviour superset |
|
|
75
75
|
| `full` | kernel + tier-1 + tier-2 | Architectural / cross-wing / governance sessions |
|
|
76
76
|
|
|
77
|
-
Consumer projects opt in via `personal.
|
|
77
|
+
Consumer projects opt in via `personal.rule_loading_tier` in
|
|
78
78
|
`.agent-settings.yml`. The install script keeps user-set values; only
|
|
79
79
|
the template default is `balanced`.
|
|
80
80
|
|
package/docs/decisions/INDEX.md
CHANGED
|
@@ -39,6 +39,8 @@ _Auto-generated by `scripts/adr/regenerate_index.py`. Do not edit._
|
|
|
39
39
|
| [ADR-033](ADR-033-distribution-identity-npm-primary.md) | Distribution Identity Npm Primary | accepted | 2026-05-29 | — |
|
|
40
40
|
| [ADR-034](ADR-034-per-skill-model-recommendation-transport.md) | Per Skill Model Recommendation Transport | superseded | 2026-05-30 | — |
|
|
41
41
|
| [ADR-035](ADR-035-model-capability-tiers.md) | Model Capability Tiers | accepted | 2026-05-30 | 034 |
|
|
42
|
+
| [ADR-036](ADR-036-global-install-browser-wizard-handoff.md) | Global Install Browser Wizard Handoff | accepted | 2026-06-01 | — |
|
|
43
|
+
| [ADR-037](ADR-037-cost-profile-untangle.md) | Cost Profile Untangle | accepted | 2026-06-01 | — |
|
|
42
44
|
|
|
43
45
|
## Unnumbered (legacy)
|
|
44
46
|
|
package/docs/getting-started.md
CHANGED
|
@@ -146,7 +146,7 @@ The system supports four configuration profiles:
|
|
|
146
146
|
Set your profile in `.agent-settings.yml`:
|
|
147
147
|
|
|
148
148
|
```yaml
|
|
149
|
-
|
|
149
|
+
rule_loading_tier: balanced
|
|
150
150
|
```
|
|
151
151
|
|
|
152
152
|
`balanced` is the default — kernel + tier-1 auto-rules. Rationale:
|
|
@@ -169,7 +169,7 @@ Your agent now understands slash commands:
|
|
|
169
169
|
| `/quality-fix` | Run and fix all quality checks |
|
|
170
170
|
| `/chat-history` | Inspect the persistent chat-history log (read-only `show`) |
|
|
171
171
|
|
|
172
|
-
→ [Browse all
|
|
172
|
+
→ [Browse all 146 active commands](../.agent-src/commands/)
|
|
173
173
|
|
|
174
174
|
---
|
|
175
175
|
|
|
@@ -18,7 +18,7 @@ on user request.
|
|
|
18
18
|
| File | Git | Scope | Owner | Example values |
|
|
19
19
|
|---|---|---|---|---|
|
|
20
20
|
| `.agent-project-settings.yml` | **committed** | team / repo | lead maintainer | `project.stack`, `quality.php.tools`, `memory.dogfood` |
|
|
21
|
-
| `~/.event4u/agent-config/agent-settings.yml` | **n/a** (outside repo) | individual developer · cross-project | individual | `name`, `ide`, `
|
|
21
|
+
| `~/.event4u/agent-config/agent-settings.yml` | **n/a** (outside repo) | individual developer · cross-project | individual | `name`, `ide`, `rule_loading_tier`, `personal.bot_icon`, `personal.autonomy`, `telegraph.speak_scope` (legacy `~/.config/agent-config/agent-settings.yml` read as fallback) |
|
|
22
22
|
| `.agent-settings.yml` | **gitignored** | individual developer · this project | individual | `personal.ide`, `personal.user_name`, `subagents.max_parallel`, `onboarding.onboarded` |
|
|
23
23
|
|
|
24
24
|
All three are YAML. Schemas:
|
|
@@ -52,7 +52,7 @@ intentionally tiny — adding a key requires an ADR.
|
|
|
52
52
|
```
|
|
53
53
|
name
|
|
54
54
|
ide
|
|
55
|
-
|
|
55
|
+
rule_loading_tier
|
|
56
56
|
personal.bot_icon
|
|
57
57
|
personal.autonomy
|
|
58
58
|
telegraph.speak_scope
|
package/docs/installation.md
CHANGED
|
@@ -303,7 +303,7 @@ explicitly by adding `agent_config_version: <semver>` to
|
|
|
303
303
|
The orchestrator chains payload sync and bridge generation:
|
|
304
304
|
|
|
305
305
|
```bash
|
|
306
|
-
bash scripts/install # defaults to
|
|
306
|
+
bash scripts/install # defaults to rule_loading_tier=balanced
|
|
307
307
|
bash scripts/install --profile=minimal
|
|
308
308
|
bash scripts/install --force # overwrite existing bridges
|
|
309
309
|
bash scripts/install --skip-bridges # payload only
|
|
@@ -359,7 +359,7 @@ regardless of which AI tool they use.** No per-developer plugin installation nee
|
|
|
359
359
|
After initial setup, commit these files:
|
|
360
360
|
|
|
361
361
|
```
|
|
362
|
-
.agent-settings.yml ← shared profile (e.g.,
|
|
362
|
+
.agent-settings.yml ← shared profile (e.g., rule_loading_tier: balanced)
|
|
363
363
|
agents/installed-tools.lock ← AI bill of materials (ADR-008, Phase 3)
|
|
364
364
|
.augment/ ← rules, skills, commands (symlinks)
|
|
365
365
|
.cursor/rules/ ← Cursor rules (symlinks)
|
|
@@ -589,7 +589,7 @@ The system works immediately with sensible defaults. Optionally, create `.agent-
|
|
|
589
589
|
to choose a profile:
|
|
590
590
|
|
|
591
591
|
```yaml
|
|
592
|
-
|
|
592
|
+
rule_loading_tier: balanced
|
|
593
593
|
```
|
|
594
594
|
|
|
595
595
|
| Profile | What's active | For whom |
|
|
@@ -33,7 +33,7 @@ look for MCP server config inside `.agent-settings.yml`.
|
|
|
33
33
|
| File | Where | Who reads it | Purpose |
|
|
34
34
|
|---|---|---|---|
|
|
35
35
|
| MCP client config (this page) | client-specific path per section above | the MCP client at startup | which MCP servers to talk to (name + URL / command) |
|
|
36
|
-
| `.agent-settings.yml` | consumer project root (`<repo>/.agent-settings.yml`) | the agent at runtime (Claude / Cursor / …) | per-developer preferences: `name`, `ide`, `
|
|
36
|
+
| `.agent-settings.yml` | consumer project root (`<repo>/.agent-settings.yml`) | the agent at runtime (Claude / Cursor / …) | per-developer preferences: `name`, `ide`, `rule_loading_tier`, `personal.autonomy`, `pipelines.skill_improvement`, `telegraph.speak_scope`, … |
|
|
37
37
|
|
|
38
38
|
The Worker is **stateless** and **project-agnostic** — it serves the
|
|
39
39
|
same skill / rule / command catalog to every client. Personalization
|
package/docs/value.md
CHANGED
|
@@ -23,15 +23,17 @@ Liest sich von oben nach unten. Positive Δ-Werte = das Paket *kostet* Tokens (R
|
|
|
23
23
|
| Stufe | Was sie tut | Δ Tokens | Kumulativ | Quelle |
|
|
24
24
|
|---|---|---:|---:|---|
|
|
25
25
|
| **Ohne Paket / Without package** | Baseline — der nackte Request ohne Paket-Regeln. | +0 | +0.00% | `n/a` · ✅ gemessen |
|
|
26
|
-
| Mit Paket (Regeln laden) / With package (rule load) | Die immer-aktiven Regeln landen im Kontext jedes Requests. ⚠️ erst teurer | +
|
|
27
|
-
| | _Fußnote:_
|
|
28
|
-
| +
|
|
26
|
+
| Mit Paket (Regeln laden) / With package (rule load) | Die immer-aktiven Regeln landen im Kontext jedes Requests. ⚠️ erst teurer | +59 359 | +741.99% | `internal/bench/reports/projection-cost.json` · ✅ gemessen |
|
|
27
|
+
| | _Fußnote:_ Eager-Default: alle 79 Rule-Files always-on im `.claude`-Projektionspfad (0B.6-bestätigt fürs primäre Tool). Nicht nur der Kernel — das ist die ehrliche Up-Front-Last; tokens ≈ chars / 4. | | | |
|
|
28
|
+
| + thin (Regeln als Pointer) / + thin (rules as pointers) | Nicht-Kernel-Regel-Bodies werden zu router-aufgelösten Pointern. | -45 857 | +741.99% | `internal/bench/reports/projection-cost.json` · 🔁 verfügbar (Default aus) |
|
|
29
|
+
| | _Fußnote:_ Verfügbar hinter `lean_projection.mode=thin` (Default `eager-all` — deshalb NICHT im Default-NETTO). Mit Thin aktiv: Rule-Layer 59359 → 13502 GPT tok (−45857, −77.3%). MUST-LOAD-Floor `task trigger-coverage` 26/26 grün; Live-A/B-Validierung ausstehend (Harness abgelehnt). Rollback = ein Flip. | | | |
|
|
30
|
+
| + condense (Regeln eindampfen) / + condense (rule shrink) | Build-Schritt schrumpft Regel-Dateien vor dem Ausliefern. | -186 | +739.66% | `internal/bench/reports/telegraph-v2.json` · ✅ gemessen |
|
|
29
31
|
| | _Fußnote:_ Aggregate across non-Thin-Root categories; Thin-Root files (AGENTS.md variants) net negative (~−4%) and are excluded from the rung — surfaced separately. | | | |
|
|
30
|
-
| + rtk (CLI-Output filtern) / + rtk (filter CLI output) | rtk schneidet verbose CLI-Ausgabe vor dem Modell-Input weg. | -
|
|
31
|
-
| + terse (Antworten knapper) / + terse (shorter replies) | Telegraph-Stil zielt auf knappere Modell-Antworten. | +56 | +
|
|
32
|
+
| + rtk (CLI-Output filtern) / + rtk (filter CLI output) | rtk schneidet verbose CLI-Ausgabe vor dem Modell-Input weg. | -585 | +732.35% | `internal/bench/reports/rtk/latest.json` · ✅ gemessen |
|
|
33
|
+
| + terse (Antworten knapper) / + terse (shorter replies) | Telegraph-Stil zielt auf knappere Modell-Antworten. | +56 | +733.05% | `internal/bench/reports/telegraph-v1.json` · ✅ gemessen |
|
|
32
34
|
| | _Fußnote:_ Honest: gemessener Median = -9.27% gegen 'sei knapp' — Telegraph liefert hier mehr Tokens, nicht weniger. Wir messen, wir verstecken nicht. | | | |
|
|
33
35
|
|
|
34
|
-
**NETTO: Mehrkosten** ⚠️ — **+
|
|
36
|
+
**NETTO: Mehrkosten** ⚠️ — **+58 644 Tokens / Request**, kumulativ **+733.05%** vs. Baseline.
|
|
35
37
|
|
|
36
38
|
## Panel B — Verhalten (mit vs. ohne)
|
|
37
39
|
|
|
@@ -81,4 +83,4 @@ Diese Seite ist eine **abgeleitete** Sicht — keine eigene Messung. Sie fasst d
|
|
|
81
83
|
- Pending rungs contribute 0 to the cumulative until measured.
|
|
82
84
|
- Reference scale: 1000 requests × 8000 input / 600 output tokens per request.
|
|
83
85
|
|
|
84
|
-
_Last rendered: `2026-
|
|
86
|
+
_Last rendered: `2026-06-01T12:10:59+00:00`_
|
package/docs/wizard.md
CHANGED
|
@@ -33,7 +33,7 @@ selects which surface renders.
|
|
|
33
33
|
|---|---|---|
|
|
34
34
|
| 1 | Identity | `personal.user_name`, `personal.ide` |
|
|
35
35
|
| 2 | Personality | `personal.minimal_output`, `personal.play_by_play`, `personal.open_edited_files` |
|
|
36
|
-
| 3 | Cost profile | `
|
|
36
|
+
| 3 | Cost profile | `rule_loading_tier` (minimal · balanced · full) |
|
|
37
37
|
| 4 | Roadmap quality | `roadmap.quality_floor`, `roadmap.run_tests_inline` |
|
|
38
38
|
| 5 | Memory | `memory.enabled`, MCP server presence |
|
|
39
39
|
| 6 | `.agent-user.md` | Optional long-form persona / preferences |
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@event4u/agent-config",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.7.0",
|
|
4
4
|
"description": "Universal AI Agent OS \u2014 audited skills, governance rules, commands, and templates for AI coding tools (Claude Code, Cursor, Windsurf, Copilot).",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"private": false,
|
|
Binary file
|
|
@@ -262,7 +262,7 @@ makes network calls. Output is the ExplainTrace v1 contract:
|
|
|
262
262
|
|
|
263
263
|
why-slots answered (Markdown sections; JSON keys in parens):
|
|
264
264
|
|
|
265
|
-
inputs — profile / preset /
|
|
265
|
+
inputs — profile / preset / rule_loading_tier with per-knob source
|
|
266
266
|
(pack | profile | preset | user | env | runtime |
|
|
267
267
|
default)
|
|
268
268
|
route — matched tier-1 rules · kernel rules · active persona
|
|
@@ -16,7 +16,7 @@ from scripts._cli.explain_last.scrubber import scrub_string
|
|
|
16
16
|
from scripts._lib.agent_settings import DEFAULT_PROJECT_FILE, load_agent_settings
|
|
17
17
|
from scripts.config import presets, profiles
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
_DEFAULT_RULE_LOADING_TIER = "balanced"
|
|
20
20
|
_SILENCED_LOGGERS = ("scripts.config.profiles", "scripts.config.presets")
|
|
21
21
|
|
|
22
22
|
|
|
@@ -65,19 +65,22 @@ def build(project_root: Path) -> dict[str, Any] | None:
|
|
|
65
65
|
)
|
|
66
66
|
except (profiles.ProfileError, presets.PresetError, OSError):
|
|
67
67
|
return None
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
68
|
+
rule_loading_tier = (
|
|
69
|
+
(settings.get("rule_loading_tier") or settings.get("cost_profile"))
|
|
70
|
+
if isinstance(settings, dict) else None
|
|
71
|
+
)
|
|
72
|
+
rule_loading_tier_source = "user" if rule_loading_tier else "default"
|
|
73
|
+
if not rule_loading_tier or rule_loading_tier == "__RULE_LOADING_TIER__":
|
|
74
|
+
rule_loading_tier = _DEFAULT_RULE_LOADING_TIER
|
|
75
|
+
rule_loading_tier_source = "default"
|
|
73
76
|
return {
|
|
74
77
|
"profile": scrub_string(resolved_profile.id),
|
|
75
78
|
"preset": scrub_string(resolved_preset.id),
|
|
76
|
-
"
|
|
79
|
+
"rule_loading_tier": scrub_string(str(rule_loading_tier)),
|
|
77
80
|
"source_per_knob": {
|
|
78
81
|
"profile": resolved_profile.source,
|
|
79
82
|
"preset": resolved_preset.source,
|
|
80
|
-
"
|
|
83
|
+
"rule_loading_tier": rule_loading_tier_source,
|
|
81
84
|
},
|
|
82
85
|
}
|
|
83
86
|
|
|
@@ -15,7 +15,7 @@ def render(trace: dict[str, Any]) -> str:
|
|
|
15
15
|
rows = [
|
|
16
16
|
("profile.id", inputs.get("profile"), sources.get("profile")),
|
|
17
17
|
("preset.id", inputs.get("preset"), sources.get("preset")),
|
|
18
|
-
("
|
|
18
|
+
("rule_loading_tier", inputs.get("rule_loading_tier"), sources.get("rule_loading_tier")),
|
|
19
19
|
]
|
|
20
20
|
out.append("| knob | value | source |")
|
|
21
21
|
out.append("|---|---|---|")
|
|
Binary file
|
|
Binary file
|
|
@@ -35,6 +35,13 @@ DEFAULT_REFERENCE_SCALE = {
|
|
|
35
35
|
"model_tier": "sonnet",
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
# Confidence levels that contribute to the cumulative / NETTO headline.
|
|
39
|
+
# `pending` (not yet measured) and `available` (measured but behind a
|
|
40
|
+
# default-off kill-switch, e.g. the thin projection) are shown with their
|
|
41
|
+
# token_delta but excluded from the default cumulative — the headline must
|
|
42
|
+
# reflect what actually ships by default.
|
|
43
|
+
_COUNTING_CONFIDENCES = ("measured", "estimated", "vendor-claim")
|
|
44
|
+
|
|
38
45
|
# ── Pricing ─────────────────────────────────────────────────────────────
|
|
39
46
|
|
|
40
47
|
|
|
@@ -228,6 +235,96 @@ def load_rung_from_frugality(
|
|
|
228
235
|
}
|
|
229
236
|
|
|
230
237
|
|
|
238
|
+
def load_rung_from_projection(
|
|
239
|
+
projection: Optional[Dict[str, Any]],
|
|
240
|
+
reference_scale: Dict[str, Any],
|
|
241
|
+
pricing_row: Dict[str, Any],
|
|
242
|
+
tool: str = ".claude",
|
|
243
|
+
) -> Optional[Dict[str, Any]]:
|
|
244
|
+
"""Build the load rung from the REAL eager always-on footprint.
|
|
245
|
+
|
|
246
|
+
Phase 3.1 honesty fix: the older `load_rung_from_router` counts only the
|
|
247
|
+
kernel + charter (~8.5k tok), modelling non-kernel rules as on-demand.
|
|
248
|
+
But 0B.6 confirmed the primary tool **eager-loads every rule body**
|
|
249
|
+
(~59k tok always-on). This rung reads that measured footprint from
|
|
250
|
+
`internal/bench/reports/projection-cost.json::rule_footprint[<tool>]`
|
|
251
|
+
so Panel A reflects what actually lands in context per request.
|
|
252
|
+
|
|
253
|
+
Returns None when the projection report lacks the footprint, so the
|
|
254
|
+
caller can fall back to the router/frugality rung.
|
|
255
|
+
"""
|
|
256
|
+
rf = (projection or {}).get("rule_footprint", {})
|
|
257
|
+
entry = rf.get(tool) or next(iter(rf.values()), None)
|
|
258
|
+
if not entry or "tokens_gpt" not in entry:
|
|
259
|
+
return None
|
|
260
|
+
token_delta = int(entry["tokens_gpt"])
|
|
261
|
+
files = int(entry.get("files", 0))
|
|
262
|
+
return {
|
|
263
|
+
"id": "load",
|
|
264
|
+
"label": "Mit Paket (Regeln laden) / With package (rule load)",
|
|
265
|
+
"what_it_does": "Die immer-aktiven Regeln landen im Kontext jedes Requests.",
|
|
266
|
+
"token_delta": token_delta,
|
|
267
|
+
"eur_delta": price_input_delta_eur(token_delta, reference_scale, pricing_row),
|
|
268
|
+
"cumulative_pct": 0.0,
|
|
269
|
+
"confidence": "measured",
|
|
270
|
+
"source_report": "internal/bench/reports/projection-cost.json",
|
|
271
|
+
"footnote": (
|
|
272
|
+
f"Eager-Default: alle {files} Rule-Files always-on im "
|
|
273
|
+
f"`{tool}`-Projektionspfad (0B.6-bestätigt fürs primäre Tool). "
|
|
274
|
+
"Nicht nur der Kernel — das ist die ehrliche Up-Front-Last; "
|
|
275
|
+
"tokens ≈ chars / 4."
|
|
276
|
+
),
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def thin_rung_from_projection(
|
|
281
|
+
projection: Optional[Dict[str, Any]],
|
|
282
|
+
reference_scale: Dict[str, Any],
|
|
283
|
+
pricing_row: Dict[str, Any],
|
|
284
|
+
) -> Dict[str, Any]:
|
|
285
|
+
"""Build the thin-projection rung (Phase 3.1 lever).
|
|
286
|
+
|
|
287
|
+
The thin projection keeps the kernel full-bodied and demotes every
|
|
288
|
+
non-kernel rule body to a router-resolved pointer, measured at
|
|
289
|
+
−`saved_gpt` tokens. It ships **behind a kill-switch**
|
|
290
|
+
(`lean_projection.mode`, default `eager-all`), so this rung is
|
|
291
|
+
`confidence: available` — its measured delta is shown but does NOT
|
|
292
|
+
enter the default cumulative (the default reality is eager). The
|
|
293
|
+
footnote states the would-be always-on total and the validation state.
|
|
294
|
+
"""
|
|
295
|
+
tp = (projection or {}).get("thin_projection", {})
|
|
296
|
+
if not tp or "saved_gpt" not in tp:
|
|
297
|
+
return pending_rung(
|
|
298
|
+
"thin",
|
|
299
|
+
"+ thin (Regeln als Pointer) / + thin (rules as pointers)",
|
|
300
|
+
"Nicht-Kernel-Regel-Bodies werden zu router-aufgelösten Pointern.",
|
|
301
|
+
"internal/bench/reports/projection-cost.json",
|
|
302
|
+
footnote="Run scripts/project_thin_rules.py --measure to populate.",
|
|
303
|
+
)
|
|
304
|
+
saved = int(tp["saved_gpt"])
|
|
305
|
+
thin_total = int(tp.get("thin_gpt", 0))
|
|
306
|
+
eager_total = int(tp.get("eager_gpt", 0))
|
|
307
|
+
pct = tp.get("saved_pct", 0)
|
|
308
|
+
return {
|
|
309
|
+
"id": "thin",
|
|
310
|
+
"label": "+ thin (Regeln als Pointer) / + thin (rules as pointers)",
|
|
311
|
+
"what_it_does": "Nicht-Kernel-Regel-Bodies werden zu router-aufgelösten Pointern.",
|
|
312
|
+
"token_delta": -saved,
|
|
313
|
+
"eur_delta": price_input_delta_eur(-saved, reference_scale, pricing_row),
|
|
314
|
+
"cumulative_pct": 0.0,
|
|
315
|
+
"confidence": "available",
|
|
316
|
+
"source_report": "internal/bench/reports/projection-cost.json",
|
|
317
|
+
"footnote": (
|
|
318
|
+
f"Verfügbar hinter `lean_projection.mode=thin` (Default `eager-all` "
|
|
319
|
+
f"— deshalb NICHT im Default-NETTO). Mit Thin aktiv: Rule-Layer "
|
|
320
|
+
f"{eager_total} → {thin_total} GPT tok (−{saved}, −{pct}%). "
|
|
321
|
+
"MUST-LOAD-Floor `task trigger-coverage` 26/26 grün; "
|
|
322
|
+
"Live-A/B-Validierung ausstehend (Harness abgelehnt). "
|
|
323
|
+
"Rollback = ein Flip."
|
|
324
|
+
),
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
|
|
231
328
|
def condense_rung_from_telegraph_v2(
|
|
232
329
|
telegraph_v2: Optional[Dict[str, Any]],
|
|
233
330
|
baseline_input_tokens: int,
|
|
@@ -551,7 +648,7 @@ def assemble_ladder(
|
|
|
551
648
|
rung_copy = dict(rung)
|
|
552
649
|
delta = (
|
|
553
650
|
int(rung_copy.get("token_delta", 0))
|
|
554
|
-
if rung_copy.get("confidence")
|
|
651
|
+
if rung_copy.get("confidence") in _COUNTING_CONFIDENCES
|
|
555
652
|
else 0
|
|
556
653
|
)
|
|
557
654
|
running += delta
|
|
@@ -575,7 +672,7 @@ def compute_totals(
|
|
|
575
672
|
cumulative_token_delta = sum(
|
|
576
673
|
int(r.get("token_delta", 0))
|
|
577
674
|
for r in rungs
|
|
578
|
-
if r.get("confidence")
|
|
675
|
+
if r.get("confidence") in _COUNTING_CONFIDENCES
|
|
579
676
|
)
|
|
580
677
|
cumulative_pct = 0.0
|
|
581
678
|
if baseline_input_tokens > 0:
|
|
@@ -43,10 +43,12 @@ try:
|
|
|
43
43
|
condense_rung_from_telegraph_v2,
|
|
44
44
|
destructive_stops_metric,
|
|
45
45
|
load_rung_from_frugality,
|
|
46
|
+
load_rung_from_projection,
|
|
46
47
|
load_rung_from_router,
|
|
47
48
|
rtk_rung_from_report,
|
|
48
49
|
selection_metric_from_dev_reports,
|
|
49
50
|
terse_rung_from_telegraph_v1,
|
|
51
|
+
thin_rung_from_projection,
|
|
50
52
|
)
|
|
51
53
|
except ImportError:
|
|
52
54
|
from scripts._lib.value_ladder import ( # type: ignore[no-redef]
|
|
@@ -59,15 +61,18 @@ except ImportError:
|
|
|
59
61
|
condense_rung_from_telegraph_v2,
|
|
60
62
|
destructive_stops_metric,
|
|
61
63
|
load_rung_from_frugality,
|
|
64
|
+
load_rung_from_projection,
|
|
62
65
|
load_rung_from_router,
|
|
63
66
|
rtk_rung_from_report,
|
|
64
67
|
selection_metric_from_dev_reports,
|
|
65
68
|
terse_rung_from_telegraph_v1,
|
|
69
|
+
thin_rung_from_projection,
|
|
66
70
|
)
|
|
67
71
|
|
|
68
72
|
|
|
69
73
|
REPO_ROOT = Path(__file__).resolve().parent.parent.parent
|
|
70
74
|
ROUTER_JSON = REPO_ROOT / "dist" / "router.json"
|
|
75
|
+
PROJECTION_COST = REPO_ROOT / "internal" / "bench" / "reports" / "projection-cost.json"
|
|
71
76
|
RULES_DIR = REPO_ROOT / ".agent-src" / "rules"
|
|
72
77
|
CHARTER_PATH = REPO_ROOT / ".agent-src" / "contexts" / "contracts" / "frugality-charter.md"
|
|
73
78
|
FRUGALITY_BASELINE = REPO_ROOT / "agents" / "runtime" / "frugality" / "baseline.jsonl"
|
|
@@ -240,28 +245,37 @@ def assemble_value_v1(
|
|
|
240
245
|
# Load rung — prefer the canonical kernel list from dist/router.json
|
|
241
246
|
# (real always-loaded footprint), fall back to the frugality canon
|
|
242
247
|
# baseline only when the router is missing on disk.
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
248
|
+
# Prefer the REAL eager footprint (projection-cost.json) — 0B.6 confirmed
|
|
249
|
+
# the primary tool eager-loads every rule body. Fall back to the
|
|
250
|
+
# kernel-only router rung, then the frugality canon, when the projection
|
|
251
|
+
# report is missing.
|
|
252
|
+
projection = safe_load_json(PROJECTION_COST)
|
|
253
|
+
load_rung = load_rung_from_projection(projection, ref, pricing_row)
|
|
254
|
+
if load_rung is None:
|
|
255
|
+
router = safe_load_json(ROUTER_JSON)
|
|
256
|
+
if router and "kernel" in router:
|
|
257
|
+
rule_chars = {
|
|
258
|
+
p.stem: len(p.read_text())
|
|
259
|
+
for p in RULES_DIR.glob("*.md")
|
|
260
|
+
} if RULES_DIR.exists() else {}
|
|
261
|
+
charter_chars = (
|
|
262
|
+
len(CHARTER_PATH.read_text()) if CHARTER_PATH.exists() else 0
|
|
263
|
+
)
|
|
264
|
+
load_rung = load_rung_from_router(
|
|
265
|
+
router, rule_chars, charter_chars, ref, pricing_row
|
|
266
|
+
)
|
|
267
|
+
else:
|
|
268
|
+
load_rung = load_rung_from_frugality(
|
|
269
|
+
latest_frugality_record(), ref, pricing_row
|
|
270
|
+
)
|
|
271
|
+
thin_rung = thin_rung_from_projection(projection, ref, pricing_row)
|
|
259
272
|
t2 = safe_load_json(TELEGRAPH_V2)
|
|
260
273
|
t1 = safe_load_json(TELEGRAPH_V1)
|
|
261
274
|
rtk = safe_load_json(RTK_LATEST)
|
|
262
275
|
ladder: List[Dict[str, Any]] = [
|
|
263
276
|
baseline_rung(ref),
|
|
264
277
|
load_rung,
|
|
278
|
+
thin_rung,
|
|
265
279
|
condense_rung_from_telegraph_v2(t2, baseline_input_tokens, ref, pricing_row),
|
|
266
280
|
rtk_rung_from_report(rtk, ref, pricing_row),
|
|
267
281
|
terse_rung_from_telegraph_v1(t1, ref, pricing_row),
|
|
@@ -17,7 +17,7 @@ Resolution precedence — first non-empty wins:
|
|
|
17
17
|
3. Global setting ``ai_council.mode``
|
|
18
18
|
4. Built-in default ``manual``
|
|
19
19
|
|
|
20
|
-
This mirrors how ``
|
|
20
|
+
This mirrors how ``rule_loading_tier`` resolves in
|
|
21
21
|
``.augment/guidelines/agent-infra/layered-settings.md``.
|
|
22
22
|
|
|
23
23
|
The resolver is pure — it never touches the filesystem or environment.
|
|
@@ -132,11 +132,27 @@ def longest_rules(top: int = 10) -> list[dict]:
|
|
|
132
132
|
return rows[:top]
|
|
133
133
|
|
|
134
134
|
|
|
135
|
+
def thin_projection() -> dict:
|
|
136
|
+
"""Eager-vs-thin rule-layer footprint (Phase 3.1 lever).
|
|
137
|
+
|
|
138
|
+
Reuses `scripts/project_thin_rules.py::measure` so the value dashboard can
|
|
139
|
+
cite a single persisted source for both the eager always-on cost and the
|
|
140
|
+
thin-projection saving. Returns an empty dict if the measurer is
|
|
141
|
+
unavailable, so the audit never hard-fails on it.
|
|
142
|
+
"""
|
|
143
|
+
try:
|
|
144
|
+
from project_thin_rules import measure as _measure # noqa: E402
|
|
145
|
+
return _measure()
|
|
146
|
+
except Exception: # pragma: no cover — best-effort enrichment
|
|
147
|
+
return {}
|
|
148
|
+
|
|
149
|
+
|
|
135
150
|
def build() -> dict:
|
|
136
151
|
return {
|
|
137
152
|
"generated": _dt.datetime.now(_dt.timezone.utc).isoformat(timespec="seconds"),
|
|
138
153
|
"token_method": token_count.method_note(),
|
|
139
154
|
"rule_footprint": rule_footprint(),
|
|
155
|
+
"thin_projection": thin_projection(),
|
|
140
156
|
"description_catalog": description_catalog(),
|
|
141
157
|
"longest_rules": longest_rules(),
|
|
142
158
|
}
|