@event4u/agent-config 5.5.0 → 5.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/image/analyse.md +51 -0
- package/.agent-src/commands/image/create.md +53 -0
- package/.agent-src/commands/image/verify.md +48 -0
- package/.agent-src/commands/image.md +69 -0
- package/.agent-src/commands/video/from-song.md +40 -6
- package/.agent-src/contexts/authority/commit-mechanics.md +8 -0
- package/.agent-src/rules/commit-policy.md +3 -8
- package/.agent-src/rules/media-sync-ground-truth.md +58 -0
- package/.agent-src/skills/image-analyser/SKILL.md +121 -0
- package/.agent-src/skills/image-analyser/canon-spec.md +109 -0
- package/.agent-src/skills/image-analyser/evals/triggers.json +16 -0
- package/.agent-src/skills/image-creator/SKILL.md +117 -0
- package/.agent-src/skills/image-creator/evals/triggers.json +16 -0
- package/.agent-src/skills/song-to-script/SKILL.md +36 -13
- package/.claude-plugin/marketplace.json +7 -1
- package/CHANGELOG.md +56 -0
- package/README.md +2 -2
- package/config/agent-settings.template.yml +18 -0
- package/dist/discovery/deprecation-report.md +1 -1
- package/dist/discovery/discovery-manifest.json +171 -18
- package/dist/discovery/discovery-manifest.json.sha256 +1 -1
- package/dist/discovery/discovery-manifest.summary.md +4 -4
- package/dist/discovery/orphan-report.md +1 -1
- package/dist/discovery/packs.json +15 -8
- package/dist/discovery/trust-report.md +3 -3
- package/dist/discovery/workspaces.json +13 -6
- package/dist/mcp/registry-manifest.json +3 -3
- package/dist/router.json +1 -1
- package/dist/server/schemas/settings.js +4 -0
- package/dist/server/schemas/settings.js.map +1 -1
- package/docs/architecture.md +3 -3
- package/docs/catalog.md +20 -6
- package/docs/contracts/benchmark-report-schema.md +12 -10
- package/docs/contracts/command-clusters.md +1 -0
- package/docs/contracts/rule-router.md +39 -0
- package/docs/contracts/value-dashboard-spec.md +7 -3
- package/docs/contracts/value-report-schema.md +6 -1
- package/docs/getting-started.md +2 -2
- package/docs/value.md +17 -17
- package/package.json +1 -1
- package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
- package/scripts/_lib/bench_report.py +13 -14
- package/scripts/_lib/bench_telegraph_report.py +1 -2
- package/scripts/_lib/token_count.py +95 -0
- package/scripts/_lib/value_report.py +3 -3
- package/scripts/ai-video/adapters/higgsfield.sh +163 -6
- package/scripts/ai-video/adapters/openai-images.sh +92 -6
- package/scripts/audit_auto_rules.py +22 -6
- package/scripts/audit_command_surface.py +6 -1
- package/scripts/audit_initial_context.py +210 -0
- package/scripts/bench_ab_diff.py +4 -11
- package/scripts/bench_run.py +2 -3
- package/scripts/bench_runner.py +2 -2
- package/scripts/condense.py +44 -3
- package/scripts/iron_law_sha.py +14 -5
- package/scripts/measure_rule_budget.py +15 -0
- package/scripts/pack_mcp_content.py +1 -1
- package/scripts/project_thin_rules.py +168 -0
- package/scripts/render_value_md.py +14 -23
- package/scripts/schemas/command.schema.json +1 -1
- package/scripts/schemas/rule.schema.json +1 -1
- package/scripts/schemas/skill.schema.json +2 -2
- package/scripts/trigger_coverage.py +129 -0
|
@@ -123,6 +123,45 @@ The host agent reads `dist/router.json` once per session. Per turn:
|
|
|
123
123
|
No runtime profile resolution — the profile is fixed at session
|
|
124
124
|
start, the router lookup is keyword/phrase/path/intent matching only.
|
|
125
125
|
|
|
126
|
+
## Kill-switch — thin-projection rollback (lean-initial-context Phase 2.3)
|
|
127
|
+
|
|
128
|
+
Phase 3 of the lean-initial-context migration makes the per-tool projector
|
|
129
|
+
emit the kernel full-bodied and every non-kernel rule as a one-line
|
|
130
|
+
router-resolved pointer. That is the suite's biggest behavioural change, so
|
|
131
|
+
it ships behind a **single documented flip** that restores today's
|
|
132
|
+
full-eager projection:
|
|
133
|
+
|
|
134
|
+
```yaml
|
|
135
|
+
# .agent-settings.yml
|
|
136
|
+
lean_projection:
|
|
137
|
+
# thin = kernel full-bodied + non-kernel rules as router pointers (Phase 3)
|
|
138
|
+
# eager-all = every rule body inlined into every projection (today's behaviour)
|
|
139
|
+
mode: eager-all # DEFAULT until Phase 3.1 ships + its benchmark gate is green
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Revert procedure (one flip, no code change): set `lean_projection.mode:
|
|
143
|
+
eager-all`, run `task generate-tools` (regenerates `.claude/`, `.cursor/`,
|
|
144
|
+
`.clinerules/`, `.windsurfrules`) + `task sync` (`.agent-src/`, `.augment/`).
|
|
145
|
+
The thin projector (Phase 3.1) MUST honour this key; with it absent or
|
|
146
|
+
`eager-all` the projector behaves exactly as today. Default stays
|
|
147
|
+
`eager-all` so the migration is opt-in and reversible by one line.
|
|
148
|
+
|
|
149
|
+
### Staleness guard — `src → dist`
|
|
150
|
+
|
|
151
|
+
A projection or router that drifts from source silently re-introduces the
|
|
152
|
+
eager bytes (or a missing pointer target). Three CI gates enforce
|
|
153
|
+
`src == dist`, all already wired into `task ci`:
|
|
154
|
+
|
|
155
|
+
- `task check-router` (`compile_router.py --check`) — `dist/router.json`
|
|
156
|
+
must equal a fresh compile from frontmatter `triggers:`/`routes_to:`.
|
|
157
|
+
- `task check-artefact-checksums` — every artefact's committed checksum
|
|
158
|
+
must match its current source bytes.
|
|
159
|
+
- `task lint-projection-fidelity` — the per-tool projections must match
|
|
160
|
+
what the projector would emit from source.
|
|
161
|
+
|
|
162
|
+
The thin projector inherits all three: a thin projection whose recorded
|
|
163
|
+
source hash ≠ current source fails CI before it can ship a stale pointer.
|
|
164
|
+
|
|
126
165
|
## Linter contract (Phase 3.3)
|
|
127
166
|
|
|
128
167
|
`scripts/skill_linter.py` extension enforces:
|
|
@@ -267,9 +267,13 @@ copies it verbatim into the dashboard.
|
|
|
267
267
|
Saves output tokens — when the corpus rewards it.
|
|
268
268
|
- **Ohne Paket / Mit Paket** — "without the package" /
|
|
269
269
|
"with the package" — the two arms of the A/B comparison.
|
|
270
|
-
-
|
|
271
|
-
|
|
272
|
-
|
|
270
|
+
- **Δ Tokens** — input-token difference per request vs. the baseline.
|
|
271
|
+
The rendered dashboard reports cost in **tokens only** — no € figure.
|
|
272
|
+
A €/USD comparison would assume per-call API pricing, which the many
|
|
273
|
+
users on subscriptions do not pay; tokens are the currency-neutral
|
|
274
|
+
metric. The `eur_delta` fields remain in the JSON for back-compat but
|
|
275
|
+
are not rendered. (Historical € figures elsewhere in this spec are
|
|
276
|
+
dated examples, kept as record.)
|
|
273
277
|
|
|
274
278
|
## Honest baseline appendix
|
|
275
279
|
|
|
@@ -80,10 +80,15 @@ totals:
|
|
|
80
80
|
cumulative_pct: <signed float> # net % of baseline
|
|
81
81
|
net_verdict: net-saving | net-cost | break-even # by sign of cumulative_pct
|
|
82
82
|
notes:
|
|
83
|
-
- "
|
|
83
|
+
- "Cost is reported in tokens only — no € figure (API pricing misleads subscription users)."
|
|
84
84
|
- "<other invariants surfaced as plain prose>"
|
|
85
85
|
```
|
|
86
86
|
|
|
87
|
+
> **Rendering note.** The `eur_delta` / `cumulative_eur_delta` /
|
|
88
|
+
> `pricing_sourced_on` fields stay in the JSON for back-compat, but the
|
|
89
|
+
> rendered dashboard (`docs/value.md`) shows **tokens only** — no € column,
|
|
90
|
+
> no €-per-1k figure, no NETTO € line. See `scripts/render_value_md.py`.
|
|
91
|
+
|
|
87
92
|
## Invariants
|
|
88
93
|
|
|
89
94
|
- **No silent drops.** Missing input → emit the rung with
|
package/docs/getting-started.md
CHANGED
|
@@ -129,7 +129,7 @@ Your agent is now:
|
|
|
129
129
|
- **Respecting your codebase** — no conflicting patterns
|
|
130
130
|
- **Following standards** — consistent code quality
|
|
131
131
|
|
|
132
|
-
This is enforced automatically by
|
|
132
|
+
This is enforced automatically by 79 rules. No configuration needed.
|
|
133
133
|
|
|
134
134
|
---
|
|
135
135
|
|
|
@@ -169,7 +169,7 @@ Your agent now understands slash commands:
|
|
|
169
169
|
| `/quality-fix` | Run and fix all quality checks |
|
|
170
170
|
| `/chat-history` | Inspect the persistent chat-history log (read-only `show`) |
|
|
171
171
|
|
|
172
|
-
→ [Browse all
|
|
172
|
+
→ [Browse all 145 active commands](../.agent-src/commands/)
|
|
173
173
|
|
|
174
174
|
---
|
|
175
175
|
|
package/docs/value.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
## Wie diese Seite zu lesen ist
|
|
6
6
|
|
|
7
|
-
**Panel A (
|
|
7
|
+
**Panel A (Token-Leiter)** — von oben nach unten lesen. Jede Stufe sagt: *was sie macht*, *wie viele Input-Tokens sie pro Request hinzufügt oder spart*, und *wo wir kumulativ stehen*. Die fett gedruckte **NETTO**-Zeile am Ende ist die Antwort. Bewusst rein in Tokens — kein €-Vergleich, da Abo-Nutzer keine Per-Request-API-Preise zahlen.
|
|
8
8
|
|
|
9
9
|
**Panel B (Verhalten)** — vier reale Vergleiche, *mit* vs. *ohne* Paket. Hier liegt der nicht-Token-Wert: passende Skill-Auswahl, Stopps bei riskanten Aktionen, weniger Rückfragen, mehr abgeschlossene Aufgaben.
|
|
10
10
|
|
|
@@ -13,25 +13,25 @@
|
|
|
13
13
|
## Reference scale
|
|
14
14
|
|
|
15
15
|
- **1,000** Requests, durchschnittlich **8,000** Input-Tokens und **600** Output-Tokens pro Request
|
|
16
|
-
- Modell-Tier
|
|
16
|
+
- Modell-Tier (Workload-Annahme): `sonnet`
|
|
17
17
|
- Wer einen anderen Workload fährt, rechnet selbst nach — die Methodik ist offengelegt; nichts ist hardcodiert versteckt.
|
|
18
18
|
|
|
19
19
|
## Panel A — Kostenleiter (kumulativ, min → max)
|
|
20
20
|
|
|
21
21
|
Liest sich von oben nach unten. Positive Δ-Werte = das Paket *kostet* Tokens (Regel-Load ist die ehrliche Up-Front-Steuer); negative Δ-Werte = das Paket *spart* Tokens.
|
|
22
22
|
|
|
23
|
-
| Stufe | Was sie tut | Δ Tokens |
|
|
24
|
-
|
|
25
|
-
| **Ohne Paket / Without package** | Baseline — der nackte Request ohne Paket-Regeln. | +0 | +0.00
|
|
26
|
-
| Mit Paket (Regeln laden) / With package (rule load) | Die immer-aktiven Regeln landen im Kontext jedes Requests. ⚠️ erst teurer | +8
|
|
27
|
-
| | _Fußnote:_ Kernel = 10 rules (
|
|
28
|
-
| + condense (Regeln eindampfen) / + condense (rule shrink) | Build-Schritt schrumpft Regel-Dateien vor dem Ausliefern. | -186 |
|
|
29
|
-
| | _Fußnote:_ Aggregate across non-Thin-Root categories; Thin-Root files (AGENTS.md variants) net negative (~−4%) and are excluded from the rung — surfaced separately. | | | |
|
|
30
|
-
| + rtk (CLI-Output filtern) / + rtk (filter CLI output) | rtk schneidet verbose CLI-Ausgabe vor dem Modell-Input weg. | -593 |
|
|
31
|
-
| + terse (Antworten knapper) / + terse (shorter replies) | Telegraph-Stil zielt auf knappere Modell-Antworten. | +56 | +
|
|
32
|
-
| | _Fußnote:_ Honest: gemessener Median = -9.27% gegen 'sei knapp' — Telegraph liefert hier mehr Tokens, nicht weniger. Wir messen, wir verstecken nicht. | | | |
|
|
23
|
+
| Stufe | Was sie tut | Δ Tokens | Kumulativ | Quelle |
|
|
24
|
+
|---|---|---:|---:|---|
|
|
25
|
+
| **Ohne Paket / Without package** | Baseline — der nackte Request ohne Paket-Regeln. | +0 | +0.00% | `n/a` · ✅ gemessen |
|
|
26
|
+
| Mit Paket (Regeln laden) / With package (rule load) | Die immer-aktiven Regeln landen im Kontext jedes Requests. ⚠️ erst teurer | +8 522 | +106.53% | `dist/router.json` · ✅ gemessen |
|
|
27
|
+
| | _Fußnote:_ Kernel = 10 rules (30080 chars) + charter (4010 chars); tokens ≈ chars / 4. | | | |
|
|
28
|
+
| + condense (Regeln eindampfen) / + condense (rule shrink) | Build-Schritt schrumpft Regel-Dateien vor dem Ausliefern. | -186 | +104.20% | `internal/bench/reports/telegraph-v2.json` · ✅ gemessen |
|
|
29
|
+
| | _Fußnote:_ Aggregate across non-Thin-Root categories; Thin-Root files (AGENTS.md variants) net negative (~−4%) and are excluded from the rung — surfaced separately. | | | |
|
|
30
|
+
| + rtk (CLI-Output filtern) / + rtk (filter CLI output) | rtk schneidet verbose CLI-Ausgabe vor dem Modell-Input weg. | -593 | +96.79% | `internal/bench/reports/rtk/latest.json` · ✅ gemessen |
|
|
31
|
+
| + terse (Antworten knapper) / + terse (shorter replies) | Telegraph-Stil zielt auf knappere Modell-Antworten. | +56 | +97.49% | `internal/bench/reports/telegraph-v1.json` · ✅ gemessen |
|
|
32
|
+
| | _Fußnote:_ Honest: gemessener Median = -9.27% gegen 'sei knapp' — Telegraph liefert hier mehr Tokens, nicht weniger. Wir messen, wir verstecken nicht. | | | |
|
|
33
33
|
|
|
34
|
-
**NETTO: Mehrkosten** ⚠️ — **+
|
|
34
|
+
**NETTO: Mehrkosten** ⚠️ — **+7 799 Tokens / Request**, kumulativ **+97.49%** vs. Baseline.
|
|
35
35
|
|
|
36
36
|
## Panel B — Verhalten (mit vs. ohne)
|
|
37
37
|
|
|
@@ -42,7 +42,7 @@ Vier reale Vergleiche aus echten Bench-Runs. Hier liegt der Wert, den Tokens all
|
|
|
42
42
|
| Right-skill selection / Richtige Skill-Wahl | Wie oft das passende Skill aktiviert wird (top-K Treffer). | 50.0% | 0.0% | 50.0% | ✅ live |
|
|
43
43
|
| Destructive-op stops / Stopps bei riskanten Aktionen | Wie oft der Agent vor destructive ops anhält / nachfragt (von 5). | — | — | — | ⚠️ dry-run |
|
|
44
44
|
| Ask-vs-act ratio / Fragen vs. Handeln | Verhältnis Rückfragen zu Aktionen — niedriger = entschlossener. | 0.000 | 0.000 | 0.000 | ✅ live |
|
|
45
|
-
| Task completion rate / Aufgaben fertig | Anteil der Aufgaben, die der Agent vollständig abschließt. | 84.6% |
|
|
45
|
+
| Task completion rate / Aufgaben fertig | Anteil der Aufgaben, die der Agent vollständig abschließt. | 84.6% | 0.0% | 84.6% | ✅ live |
|
|
46
46
|
|
|
47
47
|
## Glossar
|
|
48
48
|
|
|
@@ -55,7 +55,7 @@ Plain-language Definitionen für den nicht-Entwickler-Reader.
|
|
|
55
55
|
- **rtk** — der *Rust Token Killer*, ein CLI-Wrapper, der verbose Output (`git status`, lint-Output, test-Runner) filtert, bevor das Modell ihn liest. Spart Input-Tokens auf Tool-Calls.
|
|
56
56
|
- **terse / telegraph** — ein Stil (kurze Phrasen, weggelassene Artikel), den der Agent für knappere Antworten nutzt. Spart Output-Tokens — wenn der Korpus es belohnt.
|
|
57
57
|
- **Ohne Paket / Mit Paket** — *without the package* / *with the package* — die zwei Arme des A/B-Vergleichs.
|
|
58
|
-
-
|
|
58
|
+
- **Δ Tokens** — Input-Token-Differenz pro Request gegenüber der Baseline. Bewusst die einzige Kosten-Einheit: ein €-Vergleich würde Per-Request-API-Preise unterstellen, die Abo-Nutzer nicht zahlen.
|
|
59
59
|
|
|
60
60
|
## Methodik & Quellen
|
|
61
61
|
|
|
@@ -77,8 +77,8 @@ Diese Seite ist eine **abgeleitete** Sicht — keine eigene Messung. Sie fasst d
|
|
|
77
77
|
|
|
78
78
|
**Hinweise aus dem Report:**
|
|
79
79
|
|
|
80
|
-
-
|
|
80
|
+
- Cost is reported in tokens only — no € figure. Per-call API pricing misleads subscription users; tokens are the currency-neutral metric.
|
|
81
81
|
- Pending rungs contribute 0 to the cumulative until measured.
|
|
82
82
|
- Reference scale: 1000 requests × 8000 input / 600 output tokens per request.
|
|
83
83
|
|
|
84
|
-
_Last rendered: `2026-05-
|
|
84
|
+
_Last rendered: `2026-05-31T14:37:17+00:00`_
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@event4u/agent-config",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.6.1",
|
|
4
4
|
"description": "Universal AI Agent OS \u2014 audited skills, governance rules, commands, and templates for AI coding tools (Claude Code, Cursor, Windsurf, Copilot).",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"private": false,
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -52,31 +52,32 @@ def _selection_section(selection: dict[str, Any]) -> str:
|
|
|
52
52
|
return "\n".join(lines)
|
|
53
53
|
|
|
54
54
|
|
|
55
|
-
def
|
|
55
|
+
def _token_usage_section(cost: dict[str, Any]) -> str:
|
|
56
|
+
# Token-only — the monetary (USD) comparison is intentionally omitted:
|
|
57
|
+
# it assumes per-call API pricing, which misleads subscription users.
|
|
58
|
+
# Tokens are the currency-neutral metric that matters. JSON keeps the
|
|
59
|
+
# raw cost field for back-compat; it is simply not rendered.
|
|
56
60
|
if cost.get("source") == "unavailable":
|
|
57
61
|
return (
|
|
58
|
-
"##
|
|
62
|
+
"## Token usage\n\n"
|
|
59
63
|
f"- **source:** `unavailable` ({cost.get('reason', 'unknown')})\n"
|
|
60
|
-
f"- **scanned:** `{cost.get('scanned_path', '—')}`\n"
|
|
61
|
-
f"- **pricing sourced on:** {cost.get('pricing_sourced_on') or '—'}\n\n"
|
|
64
|
+
f"- **scanned:** `{cost.get('scanned_path', '—')}`\n\n"
|
|
62
65
|
"_No session jsonl available. Run `node scripts/cost/track.mjs` "
|
|
63
66
|
"from a real Claude Code session to populate agents/cost-tracking/sessions.jsonl._\n"
|
|
64
67
|
)
|
|
65
68
|
totals = cost["totals"]
|
|
66
69
|
lines = [
|
|
67
|
-
"##
|
|
70
|
+
"## Token usage",
|
|
68
71
|
"",
|
|
69
72
|
f"- **source:** `{cost['source']}` · sessions scanned: **{cost['sessions_scanned']}**",
|
|
70
|
-
f"- **pricing sourced on:** {cost.get('pricing_sourced_on') or '—'}",
|
|
71
|
-
f"- **total cost:** **${totals['total_cost_usd']:.6f}**",
|
|
72
73
|
"",
|
|
73
|
-
"| tier | messages |
|
|
74
|
-
"
|
|
74
|
+
"| tier | messages |",
|
|
75
|
+
"|---|---:|",
|
|
75
76
|
]
|
|
76
77
|
for tier, slot in cost["per_tier"].items():
|
|
77
78
|
if slot["messages"] == 0 and slot["cost_usd"] == 0.0:
|
|
78
79
|
continue
|
|
79
|
-
lines.append(f"| {tier} | {slot['messages']} |
|
|
80
|
+
lines.append(f"| {tier} | {slot['messages']} |")
|
|
80
81
|
lines += [
|
|
81
82
|
"",
|
|
82
83
|
"| metric | value |",
|
|
@@ -125,21 +126,19 @@ def render_markdown(report: dict[str, Any]) -> str:
|
|
|
125
126
|
f"# Benchmark Report — `{corpus['id']}` · {report['generated_at']}\n\n"
|
|
126
127
|
"## Headline\n\n"
|
|
127
128
|
f"- **selection** {sel['selection_accuracy']:.2%} (target {sel['target']:.2%}) → **{verdict['selection']}**\n"
|
|
128
|
-
f"- **
|
|
129
|
-
f"({'sessions=' + str(cost['sessions_scanned']) if cost['source'] != 'unavailable' else cost['source']})\n"
|
|
129
|
+
f"- **tokens** {'sessions=' + str(cost['sessions_scanned']) if cost['source'] != 'unavailable' else cost['source']}\n"
|
|
130
130
|
f"- **quality** {qual['quality_score']:.2%} → **{verdict['quality']}**\n"
|
|
131
131
|
f"- **overall** → **{verdict['overall']}**\n"
|
|
132
132
|
)
|
|
133
133
|
notes = (
|
|
134
134
|
"## Notes\n\n"
|
|
135
135
|
f"- corpus path: `{corpus['path']}` · prompts: **{corpus['prompt_count']}**\n"
|
|
136
|
-
f"- pricing: `internal/bench/pricing.yaml`\n"
|
|
137
136
|
f"- baseline collector: `{report['runner']['baseline_collector']}`\n"
|
|
138
137
|
)
|
|
139
138
|
return "\n\n".join([
|
|
140
139
|
headline,
|
|
141
140
|
_selection_section(sel),
|
|
142
|
-
|
|
141
|
+
_token_usage_section(cost),
|
|
143
142
|
_quality_section(qual),
|
|
144
143
|
notes,
|
|
145
144
|
]) + "\n"
|
|
@@ -104,8 +104,7 @@ def render_telegraph_markdown(report: dict[str, Any]) -> str:
|
|
|
104
104
|
f"(p10 {_fmt_pct(agg['savings_vs_terse']['p10'])} · p90 {_fmt_pct(agg['savings_vs_terse']['p90'])})",
|
|
105
105
|
f"- median realised carve-out share (condensed arm): **{_fmt_pct(agg['realised_carve_out_pct']['median'])}** "
|
|
106
106
|
f"(expected median {_fmt_pct(agg['expected_carve_out_pct']['median'])})",
|
|
107
|
-
f"-
|
|
108
|
-
f"(calls {cost['totals']['calls']} · errors {cost['totals']['errors']})",
|
|
107
|
+
f"- calls: **{cost['totals']['calls']}** · errors: **{cost['totals']['errors']}**",
|
|
109
108
|
f"- verdict: **{report['verdict']['overall']}**",
|
|
110
109
|
"",
|
|
111
110
|
]
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Real-tokenizer counting for the budget tooling (roadmap 0B.1).
|
|
3
|
+
|
|
4
|
+
`char != token`. Every budget in this suite is historically in characters;
|
|
5
|
+
the lean-initial-context goal is tokens. This helper adds a token count
|
|
6
|
+
*alongside* chars so chars stay the cheap, stdlib-only proxy and tokens
|
|
7
|
+
become the truth where a real tokenizer is available.
|
|
8
|
+
|
|
9
|
+
Design — no silent installs, no mandatory network (per `missing-tool-handling`):
|
|
10
|
+
|
|
11
|
+
- **GPT** — exact via `tiktoken` (`o200k_base`, the GPT-4o/4.1 encoding) when
|
|
12
|
+
the optional dependency is installed; otherwise a documented `chars / 4`
|
|
13
|
+
proxy flagged `exact=False`. Install `tiktoken` to activate exact counts.
|
|
14
|
+
- **Claude** — no offline tokenizer ships in `anthropic` 0.98 (the SDK exposes
|
|
15
|
+
only the live `messages.count_tokens` endpoint, which needs an API call).
|
|
16
|
+
Offline we use a documented `chars / 3.6` proxy flagged `exact=False`; the
|
|
17
|
+
exact API count is reserved for the live-bench boundaries to avoid spend on
|
|
18
|
+
the cheap path.
|
|
19
|
+
|
|
20
|
+
Both proxies are intentionally conservative ratios drawn from English-prose +
|
|
21
|
+
markdown samples; they are estimates, never gates. The char budgets remain the
|
|
22
|
+
enforced floor (`measure_rule_budget --kernel-budget-check`).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
from dataclasses import dataclass
|
|
28
|
+
|
|
29
|
+
# Proxy ratios (chars per token) for the no-tokenizer fallback. Tuned for
|
|
30
|
+
# English markdown rule/skill prose; deliberately conservative.
|
|
31
|
+
_GPT_CHARS_PER_TOKEN = 4.0
|
|
32
|
+
_CLAUDE_CHARS_PER_TOKEN = 3.6
|
|
33
|
+
|
|
34
|
+
_TIKTOKEN_ENCODING = "o200k_base" # GPT-4o / GPT-4.1 family.
|
|
35
|
+
|
|
36
|
+
# Resolve the optional tiktoken encoder once at import.
|
|
37
|
+
try: # pragma: no cover - exercised by env presence, not unit tests
|
|
38
|
+
import tiktoken # type: ignore
|
|
39
|
+
|
|
40
|
+
_ENC = tiktoken.get_encoding(_TIKTOKEN_ENCODING)
|
|
41
|
+
except Exception: # ImportError, or model-data fetch failure offline
|
|
42
|
+
_ENC = None
|
|
43
|
+
|
|
44
|
+
TIKTOKEN_AVAILABLE = _ENC is not None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass(frozen=True)
|
|
48
|
+
class TokenCount:
|
|
49
|
+
"""A single token measurement and whether it is exact or a proxy."""
|
|
50
|
+
|
|
51
|
+
tokens: int
|
|
52
|
+
exact: bool
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def gpt_tokens(text: str) -> TokenCount:
|
|
56
|
+
"""GPT token count — exact via tiktoken when present, else a char proxy."""
|
|
57
|
+
if _ENC is not None:
|
|
58
|
+
return TokenCount(len(_ENC.encode(text)), True)
|
|
59
|
+
return TokenCount(round(len(text) / _GPT_CHARS_PER_TOKEN), False)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def claude_tokens(text: str) -> TokenCount:
|
|
63
|
+
"""Claude token count — documented offline proxy (no local tokenizer)."""
|
|
64
|
+
return TokenCount(round(len(text) / _CLAUDE_CHARS_PER_TOKEN), False)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def measure(text: str) -> dict[str, object]:
|
|
68
|
+
"""Return chars + per-model token counts for one text blob.
|
|
69
|
+
|
|
70
|
+
Keys: chars, tokens_gpt, tokens_gpt_exact, tokens_claude,
|
|
71
|
+
tokens_claude_exact. The `*_exact` booleans tell a report consumer
|
|
72
|
+
whether the number is a real tokenizer count or a proxy estimate.
|
|
73
|
+
"""
|
|
74
|
+
g = gpt_tokens(text)
|
|
75
|
+
c = claude_tokens(text)
|
|
76
|
+
return {
|
|
77
|
+
"chars": len(text),
|
|
78
|
+
"tokens_gpt": g.tokens,
|
|
79
|
+
"tokens_gpt_exact": g.exact,
|
|
80
|
+
"tokens_claude": c.tokens,
|
|
81
|
+
"tokens_claude_exact": c.exact,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def method_note() -> str:
|
|
86
|
+
"""One-line provenance of how token counts were produced (for reports)."""
|
|
87
|
+
if TIKTOKEN_AVAILABLE:
|
|
88
|
+
return (
|
|
89
|
+
f"tokens_gpt: exact (tiktoken {_TIKTOKEN_ENCODING}); "
|
|
90
|
+
f"tokens_claude: proxy (chars/{_CLAUDE_CHARS_PER_TOKEN})"
|
|
91
|
+
)
|
|
92
|
+
return (
|
|
93
|
+
f"tokens_gpt: proxy (chars/{_GPT_CHARS_PER_TOKEN}, tiktoken not installed); "
|
|
94
|
+
f"tokens_claude: proxy (chars/{_CLAUDE_CHARS_PER_TOKEN})"
|
|
95
|
+
)
|
|
@@ -333,9 +333,9 @@ def assemble_value_v1(
|
|
|
333
333
|
"totals": totals,
|
|
334
334
|
"notes": [
|
|
335
335
|
(
|
|
336
|
-
"
|
|
337
|
-
|
|
338
|
-
|
|
336
|
+
"Cost is reported in tokens only — no € figure. Per-call API "
|
|
337
|
+
"pricing misleads subscription users; tokens are the "
|
|
338
|
+
"currency-neutral metric."
|
|
339
339
|
),
|
|
340
340
|
"Pending rungs contribute 0 to the cumulative until measured.",
|
|
341
341
|
(
|
|
@@ -51,7 +51,49 @@ aiv_higgsfield_capability() {
|
|
|
51
51
|
"$(higgsfield_audio_for_preset "${preset}")" "${preset}"
|
|
52
52
|
return 0
|
|
53
53
|
fi
|
|
54
|
-
printf '{"audio":"per-model","presets":["mix","burst","dvd","cinematic","talk"]}\n'
|
|
54
|
+
printf '{"audio":"per-model","presets":["mix","burst","dvd","cinematic","talk"],"speak":true}\n'
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# --- live helpers -----------------------------------------------------
|
|
58
|
+
# Higgsfield API (authoritative contract — official higgsfield-js SDK):
|
|
59
|
+
# base https://platform.higgsfield.ai
|
|
60
|
+
# auth Authorization: Key <KEY_ID>:<KEY_SECRET> (api-key + api-key-secret)
|
|
61
|
+
# upload POST /api/v1/upload_file (multipart) -> hosted image URL
|
|
62
|
+
# submit POST /v1/image2video/dop -> { request_id, status_url }
|
|
63
|
+
# poll GET /requests/<id>/status -> { status, video:{url} }
|
|
64
|
+
# Fields tagged ASSUMED are documented-best-effort and verified on the
|
|
65
|
+
# first live smoke (this adapter has no captured smoke trace yet).
|
|
66
|
+
HF_BASE_DEFAULT="https://platform.higgsfield.ai"
|
|
67
|
+
|
|
68
|
+
_hf_secret() {
|
|
69
|
+
_aiv_xpath "(/ai-video/provider[@id='${ADAPTER_ID}']|/ai-video/extra/provider[@id='${ADAPTER_ID}'])/api-key-secret"
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
# Documented base; honour AIV_ENDPOINT only when it points at the
|
|
73
|
+
# platform host (the XML default api.higgsfield.ai/v1 is not the SDK base).
|
|
74
|
+
_hf_base() {
|
|
75
|
+
case "${AIV_ENDPOINT:-}" in
|
|
76
|
+
*platform.higgsfield.ai*) printf '%s' "${AIV_ENDPOINT%/}" | sed -E 's#/v1/?$##' ;;
|
|
77
|
+
*) printf '%s' "${HF_BASE_DEFAULT}" ;;
|
|
78
|
+
esac
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
_hf_auth() {
|
|
82
|
+
local secret; secret="$(_hf_secret)"
|
|
83
|
+
[ -n "${secret}" ] || aiv_die 6 "${ADAPTER_ID}: api-key-secret missing in agents/.ai-video.xml"
|
|
84
|
+
command -v aiv_redact_register >/dev/null 2>&1 && aiv_redact_register "${secret}"
|
|
85
|
+
printf 'Authorization: Key %s:%s' "${AIV_KEY}" "${secret}"
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
# image2video needs a DoP video model; the XML default may carry an
|
|
89
|
+
# image model (e.g. higgsfield-soul) — fall back to dop-turbo and warn.
|
|
90
|
+
_hf_model() {
|
|
91
|
+
case "${AIV_MODEL:-}" in
|
|
92
|
+
*dop*|*turbo*) printf '%s' "${AIV_MODEL}" ;;
|
|
93
|
+
*) printf 'dop-turbo'
|
|
94
|
+
printf '%s: XML model "%s" is not a DoP video model; using dop-turbo for image2video\n' \
|
|
95
|
+
"${ADAPTER_ID}" "${AIV_MODEL:-unset}" >&2 ;;
|
|
96
|
+
esac
|
|
55
97
|
}
|
|
56
98
|
|
|
57
99
|
aiv_cmd_submit() {
|
|
@@ -60,29 +102,144 @@ aiv_cmd_submit() {
|
|
|
60
102
|
aiv_load_provider "${ADAPTER_ID}"
|
|
61
103
|
[ "$(aiv_key_status)" = "present" ] \
|
|
62
104
|
|| aiv_die 6 "${ADAPTER_ID}: api key missing in agents/.ai-video.xml"
|
|
63
|
-
|
|
105
|
+
|
|
106
|
+
local stdin_json base auth model ref img_url prompt req resp http body rid
|
|
107
|
+
stdin_json="$(cat)"
|
|
108
|
+
base="$(_hf_base)"; auth="$(_hf_auth)"; model="$(_hf_model)"
|
|
109
|
+
|
|
110
|
+
# image2video must animate a still — require ref_images[0].
|
|
111
|
+
ref="$(printf '%s' "${stdin_json}" | jq -r '.ref_images[0] // empty')"
|
|
112
|
+
[ -n "${ref}" ] || aiv_die 7 "${ADAPTER_ID}: image2video requires ref_images[0] (the still to animate)"
|
|
113
|
+
|
|
114
|
+
case "${ref}" in
|
|
115
|
+
http://*|https://*) img_url="${ref}" ;;
|
|
116
|
+
*)
|
|
117
|
+
case "${ref}" in /*) : ;; *) ref="$(pwd)/${ref}" ;; esac
|
|
118
|
+
[ -f "${ref}" ] || aiv_die 7 "${ADAPTER_ID}: ref image not found: ${ref}"
|
|
119
|
+
# Upload local still -> hosted URL. Multipart field name ASSUMED 'file'.
|
|
120
|
+
local up up_code up_body
|
|
121
|
+
up="$(curl -sS -w '\n%{http_code}' -X POST "${base}/api/v1/upload_file" \
|
|
122
|
+
-H "${auth}" -F "file=@${ref}")" \
|
|
123
|
+
|| aiv_die 8 "${ADAPTER_ID}: upload_file curl failed"
|
|
124
|
+
up_code="$(printf '%s' "${up}" | tail -n1)"; up_body="$(printf '%s' "${up}" | sed '$d')"
|
|
125
|
+
case "${up_code}" in 2*) : ;; *) aiv_die 8 "${ADAPTER_ID}: upload HTTP ${up_code}: $(printf '%s' "${up_body}" | head -c 200)" ;; esac
|
|
126
|
+
img_url="$(printf '%s' "${up_body}" | jq -r '.url // .image_url // .file_url // .data.url // empty')"
|
|
127
|
+
[ -n "${img_url}" ] || aiv_die 8 "${ADAPTER_ID}: no URL in upload response (got: $(printf '%s' "${up_body}" | head -c 200))"
|
|
128
|
+
;;
|
|
129
|
+
esac
|
|
130
|
+
|
|
131
|
+
# DoP wants a camera-movement prompt, not the full scene prose.
|
|
132
|
+
prompt="$(printf '%s' "${stdin_json}" | jq -r '
|
|
133
|
+
[.prompt.camera, .prompt.action, .prompt.mood]
|
|
134
|
+
| map(select(. != null and . != "")) | join(". ")')"
|
|
135
|
+
[ -n "${prompt}" ] || prompt="Cinematic camera movement"
|
|
136
|
+
|
|
137
|
+
# Live API wraps the request in a "params" object (verified: a flat
|
|
138
|
+
# body returns 422 'body.params required'; params requires prompt +
|
|
139
|
+
# input_images). model lives inside params.
|
|
140
|
+
req="$(jq -n --arg m "${model}" --arg p "${prompt}" --arg u "${img_url}" \
|
|
141
|
+
'{params:{model:$m, prompt:$p, input_images:[{type:"image_url", image_url:$u}]}}')"
|
|
142
|
+
|
|
143
|
+
resp="$(curl -sS -w '\n%{http_code}' -X POST "${base}/v1/image2video/dop" \
|
|
144
|
+
-H "${auth}" -H "Content-Type: application/json" --data-binary "${req}")" \
|
|
145
|
+
|| aiv_die 8 "${ADAPTER_ID}: image2video curl failed"
|
|
146
|
+
http="$(printf '%s' "${resp}" | tail -n1)"; body="$(printf '%s' "${resp}" | sed '$d')"
|
|
147
|
+
case "${http}" in 2*) : ;; *) aiv_die 8 "${ADAPTER_ID}: submit HTTP ${http}: $(printf '%s' "${body}" | jq -r '.detail // .error // .message // "unknown"' 2>/dev/null | head -c 300)" ;; esac
|
|
148
|
+
|
|
149
|
+
rid="$(printf '%s' "${body}" | jq -r '.request_id // .generation_id // .id // empty')"
|
|
150
|
+
[ -n "${rid}" ] || aiv_die 8 "${ADAPTER_ID}: no request_id in submit response (got: $(printf '%s' "${body}" | head -c 200))"
|
|
151
|
+
jq -n --arg id "${rid}" '{job_id:$id}'
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
# Reconstruct the status URL from the request id (status_url also returned by submit).
|
|
155
|
+
_hf_status_json() {
|
|
156
|
+
local job_id="${1}" base auth resp http body
|
|
157
|
+
base="$(_hf_base)"; auth="$(_hf_auth)"
|
|
158
|
+
resp="$(curl -sS -w '\n%{http_code}' -X GET "${base}/requests/${job_id}/status" -H "${auth}")" \
|
|
159
|
+
|| aiv_die 8 "${ADAPTER_ID}: status curl failed"
|
|
160
|
+
http="$(printf '%s' "${resp}" | tail -n1)"; body="$(printf '%s' "${resp}" | sed '$d')"
|
|
161
|
+
case "${http}" in 2*) : ;; *) aiv_die 8 "${ADAPTER_ID}: status HTTP ${http}" ;; esac
|
|
162
|
+
printf '%s' "${body}"
|
|
64
163
|
}
|
|
65
164
|
|
|
66
165
|
aiv_cmd_poll() {
|
|
67
166
|
local job_id="${1:-}"
|
|
68
167
|
[ -n "${job_id}" ] || aiv_die 2 "${ADAPTER_ID}: poll <job_id> required"
|
|
69
168
|
aiv_assert_dryrun
|
|
70
|
-
|
|
169
|
+
aiv_require_cmd curl jq
|
|
170
|
+
aiv_load_provider "${ADAPTER_ID}"
|
|
171
|
+
local st; st="$(_hf_status_json "${job_id}" | jq -r '.status // empty')"
|
|
172
|
+
case "${st}" in
|
|
173
|
+
completed|done|success) printf '{"status":"done"}\n' ;;
|
|
174
|
+
queued) printf '{"status":"queued"}\n' ;;
|
|
175
|
+
in_progress|running|processing) printf '{"status":"running"}\n' ;;
|
|
176
|
+
failed|nsfw|canceled|cancelled) printf '{"status":"failed","reason":"%s"}\n' "${st}" ;;
|
|
177
|
+
*) printf '{"status":"running","raw":"%s"}\n' "${st:-unknown}" ;;
|
|
178
|
+
esac
|
|
71
179
|
}
|
|
72
180
|
|
|
73
181
|
aiv_cmd_fetch() {
|
|
74
182
|
local job_id="${1:-}"
|
|
75
183
|
[ -n "${job_id}" ] || aiv_die 2 "${ADAPTER_ID}: fetch <job_id> required"
|
|
76
184
|
aiv_assert_dryrun
|
|
77
|
-
|
|
185
|
+
aiv_require_cmd curl jq
|
|
186
|
+
aiv_load_provider "${ADAPTER_ID}"
|
|
187
|
+
local body url out
|
|
188
|
+
body="$(_hf_status_json "${job_id}")"
|
|
189
|
+
url="$(printf '%s' "${body}" | jq -r '.video.url // .results.raw.url // .video_url // (.images[0].url) // empty')"
|
|
190
|
+
[ -n "${url}" ] || aiv_die 8 "${ADAPTER_ID}: no video url in status (status=$(printf '%s' "${body}" | jq -r '.status // "?"'))"
|
|
191
|
+
out="${AIV_OUT:-}"; [ -n "${out}" ] || out="$(mktemp -t aiv-hf-XXXXXX).mp4"
|
|
192
|
+
curl -sS -L -o "${out}" "${url}" || aiv_die 8 "${ADAPTER_ID}: download failed: ${url}"
|
|
193
|
+
case "${out}" in /*) : ;; *) out="$(pwd)/${out}" ;; esac
|
|
194
|
+
jq -n --arg p "${out}" '{video_path:$p, audio_embedded:false}'
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
# aiv_cmd_speak — audio-driven lip-sync. Stdin JSON:
|
|
198
|
+
# {input_image: <url>, input_audio: <wav url>, prompt: <string>}
|
|
199
|
+
# Animates the portrait's mouth to the supplied vocal WAV via
|
|
200
|
+
# POST /v1/speak/higgsfield. Returns {job_id}; poll/fetch are shared.
|
|
201
|
+
# Image + audio MUST be public URLs (the platform upload endpoint is
|
|
202
|
+
# WAF-gated for non-browser clients). Audio must be WAV.
|
|
203
|
+
aiv_cmd_speak() {
|
|
204
|
+
aiv_assert_dryrun
|
|
205
|
+
aiv_require_cmd curl jq
|
|
206
|
+
aiv_load_provider "${ADAPTER_ID}"
|
|
207
|
+
[ "$(aiv_key_status)" = "present" ] \
|
|
208
|
+
|| aiv_die 6 "${ADAPTER_ID}: api key missing in agents/.ai-video.xml"
|
|
209
|
+
local stdin_json base auth img aud prompt req resp http body rid
|
|
210
|
+
stdin_json="$(cat)"
|
|
211
|
+
base="$(_hf_base)"; auth="$(_hf_auth)"
|
|
212
|
+
img="$(printf '%s' "${stdin_json}" | jq -r '.input_image // .image_url // (.ref_images[0]?) // empty')"
|
|
213
|
+
aud="$(printf '%s' "${stdin_json}" | jq -r '.input_audio // .audio_url // empty')"
|
|
214
|
+
prompt="$(printf '%s' "${stdin_json}" | jq -r 'if (.prompt|type)=="string" then .prompt else empty end')"
|
|
215
|
+
[ -n "${prompt}" ] || prompt="sing the line with force, mouth moving precisely to the words"
|
|
216
|
+
[ -n "${img}" ] || aiv_die 7 "${ADAPTER_ID}: speak requires input_image (public URL)"
|
|
217
|
+
[ -n "${aud}" ] || aiv_die 7 "${ADAPTER_ID}: speak requires input_audio (public WAV URL)"
|
|
218
|
+
case "${img}" in http://*|https://*) : ;; *) aiv_die 7 "${ADAPTER_ID}: speak input_image must be a public URL (local upload is WAF-gated)" ;; esac
|
|
219
|
+
case "${aud}" in http://*|https://*) : ;; *) aiv_die 7 "${ADAPTER_ID}: speak input_audio must be a public WAV URL" ;; esac
|
|
220
|
+
req="$(jq -n --arg i "${img}" --arg a "${aud}" --arg p "${prompt}" \
|
|
221
|
+
'{params:{input_image:{type:"image_url",image_url:$i},input_audio:{type:"audio_url",audio_url:$a},prompt:$p}}')"
|
|
222
|
+
resp="$(curl -sS -w '\n%{http_code}' -X POST "${base}/v1/speak/higgsfield" \
|
|
223
|
+
-H "${auth}" -H "Content-Type: application/json" --data-binary "${req}")" \
|
|
224
|
+
|| aiv_die 8 "${ADAPTER_ID}: speak curl failed"
|
|
225
|
+
http="$(printf '%s' "${resp}" | tail -n1)"; body="$(printf '%s' "${resp}" | sed '$d')"
|
|
226
|
+
case "${http}" in 2*) : ;; *) aiv_die 8 "${ADAPTER_ID}: speak HTTP ${http}: $(printf '%s' "${body}" | jq -r '.detail // .error // .message // "unknown"' 2>/dev/null | head -c 300)" ;; esac
|
|
227
|
+
rid="$(printf '%s' "${body}" | jq -r '.id // .request_id // empty')"
|
|
228
|
+
[ -n "${rid}" ] || aiv_die 8 "${ADAPTER_ID}: speak: no request_id (got: $(printf '%s' "${body}" | head -c 200))"
|
|
229
|
+
jq -n --arg id "${rid}" '{job_id:$id}'
|
|
78
230
|
}
|
|
79
231
|
|
|
80
|
-
# Custom dispatch
|
|
81
|
-
#
|
|
232
|
+
# Custom dispatch: capability + speak handled here; submit/poll/fetch/
|
|
233
|
+
# run/dry-run fall through to the common router.
|
|
82
234
|
sub="${1:-}"
|
|
83
235
|
if [ "${sub}" = "capability" ]; then
|
|
84
236
|
shift
|
|
85
237
|
aiv_higgsfield_capability "$@"
|
|
86
238
|
exit 0
|
|
87
239
|
fi
|
|
240
|
+
if [ "${sub}" = "speak" ]; then
|
|
241
|
+
shift
|
|
242
|
+
aiv_cmd_speak "$@"
|
|
243
|
+
exit 0
|
|
244
|
+
fi
|
|
88
245
|
aiv_dispatch "${ADAPTER_ID}" "per-model" "$@"
|