@event4u/agent-config 5.6.0 → 5.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/cost-report.md +12 -7
- package/.agent-src/commands/prediction-pool.md +215 -0
- package/.agent-src/commands/set-cost-profile.md +8 -8
- package/.agent-src/commands/sync-agent-settings.md +2 -2
- package/.agent-src/presets/README.md +1 -1
- package/.agent-src/profiles/README.md +1 -1
- package/.agent-src/rules/non-destructive-by-default.md +2 -1
- package/.agent-src/skills/prediction-pool-optimizer/SKILL.md +196 -0
- package/.agent-src/skills/prediction-pool-optimizer/evals/triggers.json +18 -0
- package/.agent-src/skills/prediction-pool-optimizer/reference/ev-fixtures.md +80 -0
- package/.agent-src/templates/agent-settings.md +7 -7
- package/.agent-src/templates/agents/agent-project-settings.example.yml +2 -2
- package/.agent-src/templates/scripts/work_engine/_lib/agent_settings.py +2 -1
- package/.agent-src/templates/scripts/work_engine/hook_bootstrap.py +1 -1
- package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py +9 -7
- package/.agent-src/templates/scripts/work_engine/hooks/settings.py +9 -10
- package/.agent-src/templates/scripts/work_engine/scoring/memory_visibility.py +17 -4
- package/.claude-plugin/marketplace.json +3 -1
- package/CHANGELOG.md +57 -0
- package/README.md +2 -2
- package/config/agent-settings.template.yml +11 -2
- package/config/discovery/packs.yml +11 -0
- package/config/discovery/workspaces.yml +1 -1
- package/config/profiles/balanced.ini +1 -1
- package/config/profiles/full.ini +1 -1
- package/config/profiles/minimal.ini +1 -1
- package/dist/discovery/deprecation-report.md +1 -1
- package/dist/discovery/discovery-manifest.json +80 -14
- package/dist/discovery/discovery-manifest.json.sha256 +1 -1
- package/dist/discovery/discovery-manifest.summary.md +3 -2
- package/dist/discovery/orphan-report.md +1 -1
- package/dist/discovery/packs.json +34 -3
- package/dist/discovery/trust-report.md +2 -2
- package/dist/discovery/workspaces.json +13 -4
- package/dist/mcp/registry-manifest.json +3 -3
- package/dist/server/io/substituteTemplate.js +3 -3
- package/dist/server/io/substituteTemplate.js.map +1 -1
- package/dist/server/routes/settings.js +2 -2
- package/dist/server/routes/settings.js.map +1 -1
- package/dist/server/schemas/settings.js +4 -2
- package/dist/server/schemas/settings.js.map +1 -1
- package/dist/ui/assets/{index-DVsyUMZe.js → index-5lFqAKL0.js} +2 -2
- package/dist/ui/assets/index-5lFqAKL0.js.map +1 -0
- package/dist/ui/index.html +1 -1
- package/docs/architecture/current-onboard-baseline.md +3 -3
- package/docs/architecture.md +2 -2
- package/docs/catalog.md +7 -5
- package/docs/contracts/adr-level-6-productization.md +1 -1
- package/docs/contracts/config-presets.md +2 -2
- package/docs/contracts/cost-profile-defaults.md +5 -5
- package/docs/contracts/discovery-manifest.schema.json +1 -1
- package/docs/contracts/explain-trace.schema.json +3 -3
- package/docs/contracts/memory-visibility-v1.md +15 -7
- package/docs/contracts/profile-system.md +2 -2
- package/docs/contracts/settings-api.md +3 -3
- package/docs/contracts/value-report-schema.md +14 -1
- package/docs/customization.md +21 -5
- package/docs/decisions/ADR-010-profile-pack-preset-boundary.md +11 -11
- package/docs/decisions/ADR-013-discovery-frontmatter-contract.md +16 -2
- package/docs/decisions/ADR-034-per-skill-model-recommendation-transport.md +1 -1
- package/docs/decisions/ADR-036-global-install-browser-wizard-handoff.md +106 -0
- package/docs/decisions/ADR-037-cost-profile-untangle.md +117 -0
- package/docs/decisions/ADR-rule-kernel-and-router.md +1 -1
- package/docs/decisions/INDEX.md +2 -0
- package/docs/getting-started.md +2 -2
- package/docs/guidelines/agent-infra/layered-settings.md +2 -2
- package/docs/installation.md +3 -3
- package/docs/setup/mcp-client-config.md +1 -1
- package/docs/value.md +9 -7
- package/docs/wizard.md +1 -1
- package/package.json +1 -1
- package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
- package/scripts/_cli/cmd_explain.py +1 -1
- package/scripts/_cli/explain_last/inputs.py +11 -8
- package/scripts/_cli/explain_last/sections/inputs.py +1 -1
- package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
- package/scripts/_lib/agent_settings.py +2 -1
- package/scripts/_lib/value_ladder.py +99 -2
- package/scripts/_lib/value_report.py +30 -16
- package/scripts/ai_council/modes.py +1 -1
- package/scripts/audit_initial_context.py +16 -0
- package/scripts/check_skill_requires.py +143 -0
- package/scripts/condense.py +13 -2
- package/scripts/first-run.sh +11 -11
- package/scripts/install +14 -1
- package/scripts/install.py +127 -428
- package/scripts/install_anthropic_key.sh +1 -1
- package/scripts/install_openai_key.sh +1 -1
- package/scripts/lint_discovery_vocabulary.py +5 -5
- package/scripts/lint_value_dashboard.py +1 -1
- package/scripts/pack_mcp_content.py +1 -1
- package/scripts/prediction-pool/adapters/_schema.md +42 -0
- package/scripts/prediction-pool/adapters/kicktipp.yml +23 -0
- package/scripts/prediction-pool/poisson_sim.py +167 -0
- package/scripts/render_value_md.py +1 -0
- package/scripts/schemas/agent-settings.schema.json +77 -0
- package/scripts/schemas/skill.schema.json +7 -0
- package/scripts/smoke_quickstart.py +4 -4
- package/scripts/sync_agent_settings.py +4 -2
- package/scripts/validate_agent_settings.py +120 -0
- package/templates/minimal/.agent-settings.yml +1 -1
- package/dist/ui/assets/index-DVsyUMZe.js.map +0 -1
|
@@ -56,15 +56,20 @@ profile recommendation per the table:
|
|
|
56
56
|
|---|---|---|
|
|
57
57
|
| 🟢 OK | < 50% | within budget — no action |
|
|
58
58
|
| 🟡 INFO | ≥ 50% | log notification, no UX disruption |
|
|
59
|
-
| 🟠 WARNING | ≥ 75% | suggest
|
|
60
|
-
| 🔴 CRITICAL | ≥ 90% | recommend model
|
|
59
|
+
| 🟠 WARNING | ≥ 75% | suggest dropping the **model tier** — `model.auto_switch: auto`, or pick a lighter `model_tier` for routine work (the dominant spend lever) |
|
|
60
|
+
| 🔴 CRITICAL | ≥ 90% | recommend a lighter model tier first; only as a last resort suggest [`/set-cost-profile minimal`](set-cost-profile.md) **with a capability-loss warning** |
|
|
61
61
|
| 🛑 HARD_STOP | ≥ 100% | halt non-essential work; review before continuing (`budget.mjs check` exits 1) |
|
|
62
62
|
|
|
63
|
-
|
|
64
|
-
is
|
|
63
|
+
**Lever order matters.** Per-turn spend is dominated by the **model** (a high→lite
|
|
64
|
+
tier swap is roughly a 10× cost delta) and by query/response volume — not by how many
|
|
65
|
+
rules load. `rule_loading_tier` loads its rule bodies roughly once per session (a small,
|
|
66
|
+
fixed cost), so lowering it saves little and *reduces the agent's guardrail coverage*.
|
|
67
|
+
Recommend the model lever first:
|
|
65
68
|
|
|
66
|
-
>
|
|
67
|
-
>
|
|
69
|
+
> Budget ≥ {level}. The biggest lever is the **model tier** — set
|
|
70
|
+
> `model.auto_switch: auto` (or pick a lighter `model_tier` for routine turns).
|
|
71
|
+
> Lowering `rule_loading_tier` saves far less and drops behavioural guardrails —
|
|
72
|
+
> use [`/set-cost-profile minimal`](set-cost-profile.md) only as a last resort.
|
|
68
73
|
|
|
69
74
|
### 4. First-run: prompt to set a budget
|
|
70
75
|
|
|
@@ -109,7 +114,7 @@ Allowed values: `today`, `week`, `month`, `all` (default).
|
|
|
109
114
|
|
|
110
115
|
## See also
|
|
111
116
|
|
|
112
|
-
- [`/set-cost-profile`](set-cost-profile.md) — change `
|
|
117
|
+
- [`/set-cost-profile`](set-cost-profile.md) — change `rule_loading_tier` in
|
|
113
118
|
`.agent-settings.yml`.
|
|
114
119
|
- [`/agent-status`](agent-status.md) — per-conversation token estimate
|
|
115
120
|
(different scope: in-flight estimate, not historical actuals).
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
---
|
|
2
|
+
model_tier: inherit
|
|
3
|
+
name: prediction-pool
|
|
4
|
+
tier: 2
|
|
5
|
+
description: "Fill a prediction pool (kicktipp, football/basketball WM): optimize expected points under the rules, enter tips via Playwright. Triggers 'Tippspiel', 'kicktipp', 'predict the pool'."
|
|
6
|
+
skills: [prediction-pool-optimizer]
|
|
7
|
+
suggestion:
|
|
8
|
+
eligible: true
|
|
9
|
+
trigger_description: "fill my Tippspiel, kicktipp tips, predict the pool, betting/prediction pool predictions, optimize my pool tips for an event"
|
|
10
|
+
trigger_context: "user wants tips for a prediction pool (kicktipp etc.) for an upcoming sports event and wants them computed and/or entered into the pool's web UI"
|
|
11
|
+
workspaces:
|
|
12
|
+
- small-business
|
|
13
|
+
packs:
|
|
14
|
+
- fun
|
|
15
|
+
lifecycle: experimental
|
|
16
|
+
trust:
|
|
17
|
+
level: experimental
|
|
18
|
+
install:
|
|
19
|
+
default: false
|
|
20
|
+
removable: true
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
# /prediction-pool
|
|
24
|
+
|
|
25
|
+
`/prediction-pool [<event>] [--pool-url <url>] [--council off|event|match] [--submit] [--fresh|--continue]`
|
|
26
|
+
|
|
27
|
+
Fills out a **prediction pool** (kicktipp-style company pools: football
|
|
28
|
+
WM, basketball WM, …) by optimizing for the **maximum expected points
|
|
29
|
+
under the pool's actual scoring rules** — not for the most likely match
|
|
30
|
+
outcome. Researches market odds, computes expected-value tips, presents a
|
|
31
|
+
table for approval, then enters them into the pool's web UI via Playwright.
|
|
32
|
+
|
|
33
|
+
The cognition lives in the [`prediction-pool-optimizer`](../skills/prediction-pool-optimizer/SKILL.md)
|
|
34
|
+
skill (rules → market odds → expected value → participant field → tip).
|
|
35
|
+
This command is the orchestrator: event selection, the persisted analysis,
|
|
36
|
+
the optional AI-council pass, the Playwright entry, and the human gates.
|
|
37
|
+
|
|
38
|
+
**Hard gates (always):**
|
|
39
|
+
- **You log in.** The agent opens the page headful; it never reads,
|
|
40
|
+
stores, or types your credentials.
|
|
41
|
+
- **The agent never submits.** It fills the candidate tips and stops —
|
|
42
|
+
*you* submit, unless you passed `--submit` (or say so this turn).
|
|
43
|
+
- **Not betting or financial advice.** Decision support for a game; you
|
|
44
|
+
approve and submit.
|
|
45
|
+
|
|
46
|
+
**Block on ambiguity:** an unresolvable event, a missing pool URL with no
|
|
47
|
+
saved analysis, or contradictory flags (`--fresh` and `--continue`,
|
|
48
|
+
`--council` value not in `off|event|match`) halts with a precise message —
|
|
49
|
+
no silent best-guess.
|
|
50
|
+
|
|
51
|
+
## Inputs
|
|
52
|
+
|
|
53
|
+
| Input | Required | Meaning |
|
|
54
|
+
|---|---|---|
|
|
55
|
+
| `<event>` | no | The event to tip (e.g. "Football WM 2026"). Omitted → the command proposes 1–3 upcoming events + free input (Step 1). |
|
|
56
|
+
| `--pool-url <url>` | once per event | The pool's tip page. Saved into the per-event analysis; reused on later runs. |
|
|
57
|
+
| `--council off\|event\|match` | no | AI-council depth for the analysis. Default `off`; the command offers it at Step 4. `event` = one pass over the whole sheet; `match` = per-match (costlier). |
|
|
58
|
+
| `--submit` | no | Pre-authorize the agent to submit after entry. Default: never submit. |
|
|
59
|
+
| `--fresh` / `--continue` | no | Force a new analysis vs. build on the saved one (Step 2). Default: ask. |
|
|
60
|
+
|
|
61
|
+
## Steps
|
|
62
|
+
|
|
63
|
+
### 1. Resolve the event (block on ambiguity)
|
|
64
|
+
|
|
65
|
+
- `<event>` supplied → use it; derive a slug (`football-wm-2026`).
|
|
66
|
+
- **Omitted → propose, then stop and wait.** Name 1–3 *upcoming* events
|
|
67
|
+
(use the current date; research what is imminent) and offer free input:
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
> Which event should I generate tips for?
|
|
71
|
+
>
|
|
72
|
+
> 1. <imminent event A>
|
|
73
|
+
> 2. <imminent event B>
|
|
74
|
+
> 3. <imminent event C>
|
|
75
|
+
> 4. Something else — name it
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### 2. Resolve the per-event analysis (state ground-truth)
|
|
79
|
+
|
|
80
|
+
Look for `agents/tmp/prediction-pool/<slug>.md`.
|
|
81
|
+
|
|
82
|
+
- **None →** start a new analysis (created at Step 9, never before).
|
|
83
|
+
- **Exists →** read it. Then:
|
|
84
|
+
- It was **edited externally** since the last run (mtime newer than the
|
|
85
|
+
last `## Run <ts>` header, or it no longer parses) → surface that and
|
|
86
|
+
ask **merge / overwrite / abort** — never silently overwrite manual edits.
|
|
87
|
+
- Otherwise ask **build on it** (`--continue`) or **start fresh**
|
|
88
|
+
(`--fresh`); the flags pre-answer this for non-interactive runs.
|
|
89
|
+
|
|
90
|
+
The analysis holds the pool URL, the parsed rules, the current tip state,
|
|
91
|
+
and standing notes — so re-tuning later is fast.
|
|
92
|
+
|
|
93
|
+
### 3. Read the pool rules FIRST
|
|
94
|
+
|
|
95
|
+
Resolve the pool URL (`--pool-url` → saved analysis → ask). Then run the
|
|
96
|
+
optimizer skill's **rules pass** against the pool's rule page: exact result
|
|
97
|
+
vs. goal-difference vs. tendency points, bonus questions, joker rules,
|
|
98
|
+
quote/rarity scoring, special rules, deadlines, strategy limits. **No tips
|
|
99
|
+
before the rules are understood** — the whole strategy depends on them.
|
|
100
|
+
|
|
101
|
+
### 4. Run the optimizer + offer the AI-council pass
|
|
102
|
+
|
|
103
|
+
Run [`prediction-pool-optimizer`](../skills/prediction-pool-optimizer/SKILL.md),
|
|
104
|
+
adapted to the event's sport (football / basketball / …): market odds as
|
|
105
|
+
the primary signal → expected value under the Step-3 rules → participant
|
|
106
|
+
field → tip. Tournament/outright probabilities come from real outright
|
|
107
|
+
odds **or** the skill's executed Poisson helper — **never** a hallucinated
|
|
108
|
+
"I simulated 10,000 runs".
|
|
109
|
+
|
|
110
|
+
**Offer the AI-council pass (default off).** Unless `--council` is set,
|
|
111
|
+
ask once:
|
|
112
|
+
|
|
113
|
+
```
|
|
114
|
+
> Run the AI council over the analysis for a sharper second opinion?
|
|
115
|
+
>
|
|
116
|
+
> 1. No — use my analysis as is
|
|
117
|
+
> 2. Yes, per event — one council pass over the whole tip sheet
|
|
118
|
+
> 3. Yes, per match — each match judged separately (more accurate, costlier)
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
When on, run **graduated** to control cost: a cheap single-model pass
|
|
122
|
+
flags the riskiest matches first, then the full council reviews only the
|
|
123
|
+
flagged matches (`event`) or every match (`match`). Fold the council's
|
|
124
|
+
verdicts back into the table before Step 5. Council spend always asks
|
|
125
|
+
first per [`ai-council`](../../../core/.agent-src.uncondensed/skills/ai-council/SKILL.md).
|
|
126
|
+
|
|
127
|
+
### 5. Output the approval table — ask whether & where to enter
|
|
128
|
+
|
|
129
|
+
Present the tips exactly as they would be entered, then **wait**:
|
|
130
|
+
|
|
131
|
+
| Match | Tip | Prob / EV | Risk | 1-line reason | Odds used |
|
|
132
|
+
|---|---|---|---|---|---|
|
|
133
|
+
|
|
134
|
+
Follow with group standings, the full bracket, and bonus-question answers
|
|
135
|
+
where the event has them. Then ask **whether to enter** and **into which
|
|
136
|
+
pool** (the saved URL, a different one, or none). Do **not** write the
|
|
137
|
+
analysis yet (Step 9) — tips are not yet confirmed.
|
|
138
|
+
|
|
139
|
+
### 6. Enter via Playwright (you log in)
|
|
140
|
+
|
|
141
|
+
Open the pool page **headful**. **You log in yourself** — the agent waits
|
|
142
|
+
and never touches credentials. Resolve the platform adapter:
|
|
143
|
+
|
|
144
|
+
- **Known platform** (e.g. kicktipp) → load the declarative selector map
|
|
145
|
+
`scripts/prediction-pool/adapters/<platform>.yml` (field → CSS selector). The
|
|
146
|
+
generic, trusted driver fills the inputs from the map — the adapter is
|
|
147
|
+
**data, not code**.
|
|
148
|
+
- **Unknown platform → vision-assisted synthesis.** Screenshot the page,
|
|
149
|
+
identify the tip inputs, **highlight them** and ask you to confirm the
|
|
150
|
+
mapping, then fill from the confirmed ephemeral map. No code is run from
|
|
151
|
+
an untrusted source.
|
|
152
|
+
|
|
153
|
+
### 7. Stop before submit
|
|
154
|
+
|
|
155
|
+
Fill the candidate results and **stop**. *You* press submit. The agent
|
|
156
|
+
submits only if `--submit` was passed or you authorize it **this turn**
|
|
157
|
+
(mirrors [`non-destructive-by-default`](../../../core/.agent-src.uncondensed/rules/non-destructive-by-default.md)).
|
|
158
|
+
|
|
159
|
+
### 8. Offer a second pool
|
|
160
|
+
|
|
161
|
+
Ask whether to also enter the same (or re-optimized) tips on another
|
|
162
|
+
pool / site. If yes, loop Steps 3–7 for that pool.
|
|
163
|
+
|
|
164
|
+
### 9. Persist / extend the analysis (only now)
|
|
165
|
+
|
|
166
|
+
Append a run-stamped section to `agents/tmp/prediction-pool/<slug>.md`: pool
|
|
167
|
+
URL(s), parsed rules, the entered tips with state `entered, not submitted`
|
|
168
|
+
(or `submitted` if Step 7 submitted), council verdicts if any, and
|
|
169
|
+
standing notes. Append-only — earlier runs stay as history. This is the
|
|
170
|
+
base the next run reads (Step 2).
|
|
171
|
+
|
|
172
|
+
### 10. New-platform adapter — offer to contribute (gated)
|
|
173
|
+
|
|
174
|
+
If Step 6 synthesized a new selector map, offer to (a) save it locally and
|
|
175
|
+
(b) open a **PR** adding `scripts/prediction-pool/adapters/<platform>.yml` so
|
|
176
|
+
coverage grows for everyone. The PR carries **declarative selector data
|
|
177
|
+
only** — never executable code — and only on explicit permission (no
|
|
178
|
+
auto-commit, no auto-push).
|
|
179
|
+
|
|
180
|
+
### 11. Report
|
|
181
|
+
|
|
182
|
+
Print: event + slug, pool URL(s), rules summary, council depth used,
|
|
183
|
+
matches tipped, entry result (`entered, not submitted` | `submitted`),
|
|
184
|
+
adapter (`<platform>.yml` | `vision-synthesized` | `pr-offered`), analysis
|
|
185
|
+
file path. No commit. No push.
|
|
186
|
+
|
|
187
|
+
## Rules
|
|
188
|
+
|
|
189
|
+
- **You log in; the agent never handles credentials.** Headful only.
|
|
190
|
+
- **The agent never submits** unless `--submit` or this-turn authorization.
|
|
191
|
+
- **Rules first.** No tips before the pool's scoring is parsed.
|
|
192
|
+
- **No hallucinated simulation.** Outright odds or executed Poisson code —
|
|
193
|
+
never a claimed-but-unrun Monte-Carlo.
|
|
194
|
+
- **Analysis is written only after tips are confirmed** (Step 9), never
|
|
195
|
+
before — and never silently over an externally edited file.
|
|
196
|
+
- **Adapters are declarative data, not code.** Unknown platforms use the
|
|
197
|
+
ephemeral vision path; contributed adapters are selector maps via PR.
|
|
198
|
+
- **AI council is opt-in, default off**, and always asks before spending.
|
|
199
|
+
- **Not betting or financial advice.** A fun tool; you decide and submit.
|
|
200
|
+
- **No commit, no push, no PR** without explicit permission (the adapter
|
|
201
|
+
PR offer is gated).
|
|
202
|
+
- **Kill-switch.** Ships `lifecycle: experimental` · `install.default:
|
|
203
|
+
false`. Disable = remove the command + `prediction-pool-optimizer` skill, then
|
|
204
|
+
regenerate the projected tool trees.
|
|
205
|
+
|
|
206
|
+
## See also
|
|
207
|
+
|
|
208
|
+
- [`prediction-pool-optimizer`](../skills/prediction-pool-optimizer/SKILL.md) — the
|
|
209
|
+
rules → odds → EV → field → tip cognition.
|
|
210
|
+
- [`scripts/prediction-pool/adapters/_schema.md`](../../../scripts/prediction-pool/adapters/_schema.md) —
|
|
211
|
+
the declarative adapter data contract (for PR contributions).
|
|
212
|
+
- [`ai-council`](../../../core/.agent-src.uncondensed/skills/ai-council/SKILL.md) —
|
|
213
|
+
the optional second-opinion pass (Step 4).
|
|
214
|
+
- [`playwright-architect`](../../../core/.agent-src.uncondensed/skills/playwright-architect/SKILL.md) —
|
|
215
|
+
browser-automation patterns for the entry step.
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
model_tier: inherit
|
|
3
3
|
name: set-cost-profile
|
|
4
4
|
tier: 1
|
|
5
|
-
description: Change the
|
|
5
|
+
description: Change the rule_loading_tier in .agent-settings.yml — shows each profile's meaning and applies the selection
|
|
6
6
|
skills: [file-editor]
|
|
7
7
|
suggestion:
|
|
8
8
|
eligible: false
|
|
@@ -17,7 +17,7 @@ packs:
|
|
|
17
17
|
|
|
18
18
|
# /set-cost-profile
|
|
19
19
|
|
|
20
|
-
Changes `
|
|
20
|
+
Changes `rule_loading_tier` in `.agent-settings.yml`. Four profiles are defined in
|
|
21
21
|
the [`agent-settings` template](../templates/agent-settings.md#cost-profiles):
|
|
22
22
|
|
|
23
23
|
- `minimal` · `balanced` · `full` · `custom`
|
|
@@ -32,7 +32,7 @@ the [`agent-settings` template](../templates/agent-settings.md#cost-profiles):
|
|
|
32
32
|
directly or ask the agent — the merge rules live in
|
|
33
33
|
[`layered-settings`](../docs/guidelines/agent-infra/layered-settings.md#section-aware-merge-rules).
|
|
34
34
|
- For role modes use [`/mode`](mode.md) — different concept (sets
|
|
35
|
-
`roles.active_role`, not `
|
|
35
|
+
`roles.active_role`, not `rule_loading_tier`).
|
|
36
36
|
|
|
37
37
|
## Steps
|
|
38
38
|
|
|
@@ -49,7 +49,7 @@ Profile names are case-insensitive on input; the file value stays lowercase.
|
|
|
49
49
|
Read `.agent-settings.yml`. If missing, tell the user to run
|
|
50
50
|
`scripts/install` first and stop — do not create the file here.
|
|
51
51
|
|
|
52
|
-
Extract the current `
|
|
52
|
+
Extract the current `rule_loading_tier` value.
|
|
53
53
|
|
|
54
54
|
### 3. Load profile descriptions
|
|
55
55
|
|
|
@@ -63,7 +63,7 @@ Render the current value and present numbered choices with the hint text
|
|
|
63
63
|
extracted in step 3:
|
|
64
64
|
|
|
65
65
|
```
|
|
66
|
-
> Current:
|
|
66
|
+
> Current: rule_loading_tier = {current}
|
|
67
67
|
>
|
|
68
68
|
> 1. minimal — {hint from template}
|
|
69
69
|
> 2. balanced — {hint from template}
|
|
@@ -77,7 +77,7 @@ value directly — still echo the old → new line in step 6.
|
|
|
77
77
|
|
|
78
78
|
### 5. Write the value
|
|
79
79
|
|
|
80
|
-
Update `
|
|
80
|
+
Update `rule_loading_tier` in `.agent-settings.yml` using the
|
|
81
81
|
[section-aware merge rules](../docs/guidelines/agent-infra/layered-settings.md#section-aware-merge-rules)
|
|
82
82
|
(preserve comments, preserve key order, touch only the changed field).
|
|
83
83
|
|
|
@@ -86,7 +86,7 @@ If the user picked "Keep current", do nothing and stop.
|
|
|
86
86
|
### 6. Confirm
|
|
87
87
|
|
|
88
88
|
```
|
|
89
|
-
>
|
|
89
|
+
> rule_loading_tier: {old} → {new}
|
|
90
90
|
```
|
|
91
91
|
|
|
92
92
|
If the new profile activates a surface the user hasn't used before
|
|
@@ -106,7 +106,7 @@ steps here, that's the docs' job.
|
|
|
106
106
|
## Cloud Behavior
|
|
107
107
|
|
|
108
108
|
On cloud surfaces (Claude.ai Web, Skills API) this command is **fully inert** —
|
|
109
|
-
there is no `.agent-settings.yml` to write and no `
|
|
109
|
+
there is no `.agent-settings.yml` to write and no `rule_loading_tier` toggle to
|
|
110
110
|
flip. Cost behaviour on those surfaces is governed by the platform itself.
|
|
111
111
|
|
|
112
112
|
## See also
|
|
@@ -41,7 +41,7 @@ Use when:
|
|
|
41
41
|
|
|
42
42
|
## When NOT to use
|
|
43
43
|
|
|
44
|
-
- To change a value (`ide`, `
|
|
44
|
+
- To change a value (`ide`, `rule_loading_tier`, `max_parallel`) → edit the
|
|
45
45
|
file directly or ask the agent; the sync only reconciles structure.
|
|
46
46
|
- To create `.agent-project-settings.yml` (team file) → that is a
|
|
47
47
|
separate concern; this command only touches the developer file.
|
|
@@ -95,7 +95,7 @@ Free-text replies (`"nö"`, `"leave it"`, unrecognized input) count as
|
|
|
95
95
|
|
|
96
96
|
### 4. Profile override
|
|
97
97
|
|
|
98
|
-
The script auto-detects the profile from the target's `
|
|
98
|
+
The script auto-detects the profile from the target's `rule_loading_tier`
|
|
99
99
|
key and falls back to `minimal`. To sync against a different profile
|
|
100
100
|
(e.g. during a profile change), pass `--profile balanced` or
|
|
101
101
|
`--profile full` — but ask the user first; changing the profile is a
|
|
@@ -4,7 +4,7 @@ Seed presets for the [preset system](../../docs/contracts/config-presets.md).
|
|
|
4
4
|
Each preset bundles governance knobs (autonomy / confidence / risk /
|
|
5
5
|
council / mcp / cost / notifications) so the user picks a stance, not
|
|
6
6
|
a dozen individual values. Boundary against `profile.id`, `pack.id`,
|
|
7
|
-
and `
|
|
7
|
+
and `rule_loading_tier` lives in
|
|
8
8
|
[ADR-010](../../docs/decisions/ADR-010-profile-pack-preset-boundary.md).
|
|
9
9
|
|
|
10
10
|
## Seed set (v2.x — fixed)
|
|
@@ -4,7 +4,7 @@ Seed profiles for the [profile system](../../docs/contracts/profile-system.md).
|
|
|
4
4
|
Each profile answers *who is the user?* — audience identity that
|
|
5
5
|
selects the default skill/command surface, README entry-paragraph,
|
|
6
6
|
and persona pre-selection. Boundary against `preset.id`, `pack.id`,
|
|
7
|
-
and `
|
|
7
|
+
and `rule_loading_tier` lives in
|
|
8
8
|
[ADR-010](../../docs/decisions/ADR-010-profile-pack-preset-boundary.md).
|
|
9
9
|
|
|
10
10
|
## Seed set (v2.x — fixed)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
type: "always"
|
|
3
3
|
tier: "safety-floor"
|
|
4
|
-
description: "
|
|
4
|
+
description: "Hard Floor: agent asks before prod-trunk commits/merges, deploys, pushes, prod data/infra, bulk deletions/infra commits; verify branch before each commit; no autonomy or roadmap bypass"
|
|
5
5
|
alwaysApply: true
|
|
6
6
|
load_context:
|
|
7
7
|
- ../contexts/authority/destructive-mechanics.md
|
|
@@ -28,6 +28,7 @@ Triggers below require explicit user confirmation **on this turn** — not from
|
|
|
28
28
|
| Trigger | Examples |
|
|
29
29
|
|---|---|
|
|
30
30
|
| **Production-branch merge** | `main`, `master`, `prod`, `production`, `release/*`, or any branch the project marks as deployment trunk |
|
|
31
|
+
| **Commit on a production branch** | any `git commit` while `HEAD` is on a prod trunk (set above). **Verify branch before every commit** — `main` is opt-in only, never inferred from a prior turn or a merged PR that left the repo on `main` |
|
|
31
32
|
| **Deploy / release** | `terraform apply` on prod, `kubectl apply` on prod, deploy scripts, release commands, tag pushes that trigger CI deployment |
|
|
32
33
|
| **Push to remote** | any `git push` (also covered by [`scope-control`](scope-control.md), restated so the floor never weakens) |
|
|
33
34
|
| **Production data / infra** | prod DB writes / migrations, prod config, secrets rotation, IAM / role / policy, DNS, anything in a `prod`-scoped path or pipeline |
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
---
|
|
2
|
+
model_tier: high
|
|
3
|
+
name: prediction-pool-optimizer
|
|
4
|
+
description: "Optimize prediction-pool tips (kicktipp etc.): pool rules + market odds → the expected-points-maximizing tip per match. Triggers 'optimize my pool tips', 'best kicktipp picks', 'predict'."
|
|
5
|
+
domain: product
|
|
6
|
+
personas: []
|
|
7
|
+
workspaces:
|
|
8
|
+
- small-business
|
|
9
|
+
packs:
|
|
10
|
+
- fun
|
|
11
|
+
lifecycle: experimental
|
|
12
|
+
trust:
|
|
13
|
+
level: experimental
|
|
14
|
+
install:
|
|
15
|
+
default: false
|
|
16
|
+
removable: true
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
# prediction-pool-optimizer
|
|
20
|
+
|
|
21
|
+
> Turn a prediction pool's **scoring rules** plus **market odds** into the
|
|
22
|
+
> tip that maximizes **expected points** — not the most likely outcome.
|
|
23
|
+
> Sport-agnostic core with per-sport probability blocks. Consumed by
|
|
24
|
+
> [`/prediction-pool`](../../commands/prediction-pool.md). The optimization target is
|
|
25
|
+
> the pool's score, so the chain is always **rules → odds → expected value
|
|
26
|
+
> → participant field → tip**, never "who wins this match?".
|
|
27
|
+
|
|
28
|
+
## When to use
|
|
29
|
+
|
|
30
|
+
Use when someone wants the best tips for a prediction / betting pool
|
|
31
|
+
(kicktipp-style company pools — football WM, basketball WM, …) and the
|
|
32
|
+
target is **pool points**, not match truth. Triggered by the
|
|
33
|
+
[`/prediction-pool`](../../commands/prediction-pool.md) command (Steps 3–5) or directly
|
|
34
|
+
when a user asks to optimize / maximize their pool picks.
|
|
35
|
+
|
|
36
|
+
**The one idea that makes this skill correct:** the highest-probability
|
|
37
|
+
result is **not** the highest-expected-value tip. Under most pool rules a
|
|
38
|
+
2:1 or 1:0 scores the same partial points as the "obvious" pick but hits
|
|
39
|
+
more often; under quote/rarity rules a rare-but-plausible result is worth
|
|
40
|
+
more. **Always optimize the pool's points, never the truth of the match.**
|
|
41
|
+
|
|
42
|
+
## Hard rules
|
|
43
|
+
|
|
44
|
+
- **Rules before tips.** Never produce a tip before the pool's scoring is
|
|
45
|
+
parsed (Procedure step 1). Strategy is a function of the rules.
|
|
46
|
+
- **Odds are the primary signal.** Bookmaker / market probabilities already
|
|
47
|
+
fold in form, squad, injuries, travel, climate. Use them as the
|
|
48
|
+
calibration base; only override with *current* information (confirmed
|
|
49
|
+
lineups, late injuries, suspensions, manager change).
|
|
50
|
+
- **No invented numbers.** Emit no probability you cannot derive from odds
|
|
51
|
+
or from **actually executed** code. Tournament/outright numbers come from
|
|
52
|
+
real outright odds **or** the executed Poisson helper — never a claimed
|
|
53
|
+
"I ran 10,000 simulations".
|
|
54
|
+
- **One-sentence justification** per tip. Short.
|
|
55
|
+
|
|
56
|
+
## Procedure
|
|
57
|
+
|
|
58
|
+
### 1. Parse the pool rules
|
|
59
|
+
|
|
60
|
+
From the pool's rule page, extract and document:
|
|
61
|
+
|
|
62
|
+
- Points for **exact result** / **goal (point) difference** / **tendency**.
|
|
63
|
+
- **Bonus questions** (champion, top scorer, group winners …).
|
|
64
|
+
- **Joker / multiplier** rules.
|
|
65
|
+
- **Quote / rarity** scoring (rare correct tips score more)? — flips the
|
|
66
|
+
whole strategy toward contrarian (step 4).
|
|
67
|
+
- Special scorings, **deadlines**, and **strategy limits** (e.g. max N
|
|
68
|
+
identical tips).
|
|
69
|
+
- **The goal**: place well, or *win* a large pool? (changes variance — step 4.)
|
|
70
|
+
|
|
71
|
+
### 2. Build the data base
|
|
72
|
+
|
|
73
|
+
Primary: current bookmaker odds, aggregated market probabilities, model
|
|
74
|
+
forecasts (e.g. Opta), Elo/SPI ratings. Secondary (only when it adds signal
|
|
75
|
+
the odds have not yet absorbed): confirmed lineups, injuries, suspensions,
|
|
76
|
+
manager change, recent form, home advantage, head-to-head, rest/travel,
|
|
77
|
+
weather. De-vig the odds (remove the bookmaker margin) before treating them
|
|
78
|
+
as probabilities.
|
|
79
|
+
|
|
80
|
+
### 3. Per-match probabilities (sport block)
|
|
81
|
+
|
|
82
|
+
Compute, per match, the outcome distribution and the most plausible exact
|
|
83
|
+
results. Pick the block for the event's sport:
|
|
84
|
+
|
|
85
|
+
**Football / soccer**
|
|
86
|
+
- Model goals as **Poisson** per side from each team's expected goals;
|
|
87
|
+
draws are real (~22–28% baseline) — people under-tip them.
|
|
88
|
+
- Outcome split: home-win / draw / away-win; then the exact-score grid.
|
|
89
|
+
- Common EV-strong exact results: 1:0, 2:1, 1:1, 2:0.
|
|
90
|
+
|
|
91
|
+
**Basketball**
|
|
92
|
+
- **No draws.** Model the points margin as roughly **Gaussian** around the
|
|
93
|
+
market spread; pair with the moneyline for win probability and the
|
|
94
|
+
total (over/under) for the score level.
|
|
95
|
+
- Tendency = sign of (margin); "exact result" rules are rare — read step 1.
|
|
96
|
+
|
|
97
|
+
**Generic fallback (other sports)**
|
|
98
|
+
- Derive the outcome split straight from de-vigged moneyline odds; estimate
|
|
99
|
+
a plausible score from the market total. State the model used.
|
|
100
|
+
|
|
101
|
+
Cross-check the model against the market; on a large divergence, re-check
|
|
102
|
+
the data and explain the cause before trusting it.
|
|
103
|
+
|
|
104
|
+
### 4. Convert to the EV-maximizing tip
|
|
105
|
+
|
|
106
|
+
Map probabilities to the tip with the **highest expected points under the
|
|
107
|
+
step-1 rules** — not the prettiest match.
|
|
108
|
+
|
|
109
|
+
- **Standard fixed-point scoring + goal "place well"** → tip the EV-maximal
|
|
110
|
+
result per match. Favourites with modest scorelines dominate. **No
|
|
111
|
+
contrarian** — only your tip matters for your score, so deliberately
|
|
112
|
+
tipping "different" just burns EV.
|
|
113
|
+
- **Quote / rarity scoring** → weigh rarer-but-plausible results against
|
|
114
|
+
their higher payout; take rarity when `payout × probability` wins.
|
|
115
|
+
- **Goal = win a large pool** → on a *subset* of matches, take calculated
|
|
116
|
+
variance (plausible underdogs) to create upside, poker-tournament style.
|
|
117
|
+
|
|
118
|
+
**Participant-field thresholds** (when two tips are close, prefer the one
|
|
119
|
+
with the higher edge over the typical participant):
|
|
120
|
+
|
|
121
|
+
- Pool **N < 20** → maximize EV, ignore the field.
|
|
122
|
+
- **20 ≤ N < 100 and you are in the prize positions** → maximize EV.
|
|
123
|
+
- **N ≥ 100, or you are outside the top ~20%** → add field-relative
|
|
124
|
+
variance (move off the consensus on a subset; rough Kelly-fraction sizing).
|
|
125
|
+
|
|
126
|
+
Respect all strategy limits from step 1 (max identical tips, etc.).
|
|
127
|
+
|
|
128
|
+
### 5. Tournament & bonus questions (no hallucination)
|
|
129
|
+
|
|
130
|
+
For group winners, KO rounds, champion, and bonus questions, use **either**:
|
|
131
|
+
|
|
132
|
+
- real **outright market odds** ("to win group", "to reach final",
|
|
133
|
+
"outright winner"), **or**
|
|
134
|
+
- the executed Poisson tournament simulator:
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
python3 scripts/prediction-pool/poisson_sim.py <teams-xg.json> --runs 20000
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
It plays the bracket from per-team expected goals and prints empirical
|
|
141
|
+
advancement / title probabilities. **Run it — never report simulated
|
|
142
|
+
numbers you did not actually compute.**
|
|
143
|
+
|
|
144
|
+
Optimize bonus answers on the same expected-points basis. Re-run as late as
|
|
145
|
+
the deadline allows: re-check confirmed lineups, injuries, suspensions, and
|
|
146
|
+
odds movement, then adjust. The pool's per-match deadline is the only hard
|
|
147
|
+
constraint.
|
|
148
|
+
|
|
149
|
+
## Output format
|
|
150
|
+
|
|
151
|
+
1. **Approval table** — one row per match:
|
|
152
|
+
|
|
153
|
+
```
|
|
154
|
+
Match | Tip | Prob / EV | Risk (low/med/high) | 1-line reason | Odds used
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
2. **Group standings, the full bracket, and bonus-question answers** where
|
|
158
|
+
the event has them.
|
|
159
|
+
3. **Self-check note** — confirm the tips reconcile with
|
|
160
|
+
[`reference/ev-fixtures.md`](reference/ev-fixtures.md) (known pool rules +
|
|
161
|
+
market odds → a known-good EV tip). If your method disagrees with a
|
|
162
|
+
fixture, your method is wrong — find the error (usually a forgotten
|
|
163
|
+
partial-points term or un-de-vigged odds), don't ship the tip.
|
|
164
|
+
|
|
165
|
+
Handed back to [`/prediction-pool`](../../commands/prediction-pool.md) for the approval
|
|
166
|
+
gate — the skill never enters or submits anything.
|
|
167
|
+
|
|
168
|
+
## Gotcha
|
|
169
|
+
|
|
170
|
+
- **Tipping the modal result, not the EV-maximal one.** The single most
|
|
171
|
+
likely scoreline rarely maximizes partial points — compute EV across the
|
|
172
|
+
result grid, don't eyeball the favourite.
|
|
173
|
+
- **Forgetting to de-vig.** Raw bookmaker odds sum to >100%; treating them
|
|
174
|
+
as probabilities inflates the favourite. Remove the margin first.
|
|
175
|
+
- **Contrarian under fixed points.** Deviating "to stand out" only helps
|
|
176
|
+
under quote/rarity rules or a win-a-large-pool goal — otherwise it burns EV.
|
|
177
|
+
- **Claimed-but-unrun simulation.** Numbers like "I ran 10,000 tournaments"
|
|
178
|
+
without executing `poisson_sim.py` are hallucinated — run the code or use
|
|
179
|
+
outright odds.
|
|
180
|
+
|
|
181
|
+
## Do NOT
|
|
182
|
+
|
|
183
|
+
- Tip the most likely result instead of the EV-maximal one.
|
|
184
|
+
- Go contrarian under standard fixed-point scoring with a "place well" goal.
|
|
185
|
+
- Report Monte-Carlo numbers without running `poisson_sim.py`.
|
|
186
|
+
- Treat raw odds as probabilities without removing the vig.
|
|
187
|
+
- Give betting or financial advice — this optimizes a game; the human submits.
|
|
188
|
+
|
|
189
|
+
## See also
|
|
190
|
+
|
|
191
|
+
- [`/prediction-pool`](../../commands/prediction-pool.md) — the orchestrator (event,
|
|
192
|
+
persistence, Playwright entry, gates).
|
|
193
|
+
- [`reference/ev-fixtures.md`](reference/ev-fixtures.md) — known-good
|
|
194
|
+
rules+odds → EV examples.
|
|
195
|
+
- [`scripts/prediction-pool/poisson_sim.py`](../../../../scripts/prediction-pool/poisson_sim.py) —
|
|
196
|
+
the executed tournament simulator.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "prediction-pool-optimizer",
|
|
3
|
+
"description": "7 should-trigger + 5 should-not-trigger queries. Should-trigger covers DE + EN phrasings and the core intent (pool tips, kicktipp, expected-points optimization across sports). Should-not-trigger covers near-miss neighbours: regulated financial advice (finance pack), plain match-result prediction with no pool, generic web research, AI video, and real-money sportsbook betting (out of scope / refuse).",
|
|
4
|
+
"queries": [
|
|
5
|
+
{"q": "optimize my kicktipp tips for the football WM 2026", "trigger": true},
|
|
6
|
+
{"q": "fill my company Tippspiel for the basketball world cup", "trigger": true},
|
|
7
|
+
{"q": "welche Tipps maximieren meine Punkte im kicktipp-Tippspiel?", "trigger": true},
|
|
8
|
+
{"q": "best picks for our office prediction pool given the scoring rules", "trigger": true},
|
|
9
|
+
{"q": "maximiere meine erwarteten Punkte im Tippspiel, nicht nur wer gewinnt", "trigger": true},
|
|
10
|
+
{"q": "predict our office kicktipp pool for the WM", "trigger": true},
|
|
11
|
+
{"q": "mach mein Tippspiel für die WM", "trigger": true},
|
|
12
|
+
{"q": "should we invest in this startup based on a DCF?", "trigger": false, "note": "regulated financial valuation → dcf-modeling / finance pack"},
|
|
13
|
+
{"q": "who will win tonight's match?", "trigger": false, "note": "plain result prediction, no pool / no scoring rules to optimize"},
|
|
14
|
+
{"q": "research the best running shoes for me", "trigger": false, "note": "generic web research → research / deep-research"},
|
|
15
|
+
{"q": "make a hype video for the world cup final", "trigger": false, "note": "AI video pipeline → /video"},
|
|
16
|
+
{"q": "place a €50 bet on the favourite at my bookmaker", "trigger": false, "note": "real-money sportsbook wagering — out of scope, not what this fun pool tool does"}
|
|
17
|
+
]
|
|
18
|
+
}
|