@event4u/agent-config 4.9.0 → 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/implement-ticket.md +5 -4
- package/.agent-src/contexts/execution/roadmap-process-loop.md +30 -4
- package/.agent-src/rules/language-and-tone.md +4 -10
- package/.agent-src/rules/linked-projects-onboarding-gate.md +82 -0
- package/.agent-src/rules/roadmap-progress-sync.md +39 -5
- package/.agent-src/scripts/update_roadmap_progress.py +63 -7
- package/.agent-src/skills/command-routing/SKILL.md +5 -4
- package/.agent-src/skills/roadmap-management/SKILL.md +121 -21
- package/.agent-src/skills/roadmap-writing/SKILL.md +63 -0
- package/.agent-src/templates/agent-settings.md +16 -0
- package/.agent-src/templates/roadmaps.md +22 -1
- package/.agent-src/templates/scripts/work_engine/_lib/agent_settings.py +20 -3
- package/.claude-plugin/marketplace.json +1 -1
- package/CHANGELOG.md +106 -0
- package/CONTRIBUTING.md +19 -0
- package/README.md +12 -1
- package/dist/cli/registry.js +0 -2
- package/dist/cli/registry.js.map +1 -1
- package/dist/discovery/deprecation-report.md +1 -1
- package/dist/discovery/discovery-manifest.json +36 -14
- package/dist/discovery/discovery-manifest.json.sha256 +1 -1
- package/dist/discovery/discovery-manifest.summary.md +3 -3
- package/dist/discovery/orphan-report.md +1 -1
- package/dist/discovery/packs.json +6 -5
- package/dist/discovery/trust-report.md +3 -3
- package/dist/discovery/workspaces.json +5 -4
- package/dist/mcp/registry-manifest.json +3 -3
- package/dist/router.json +1 -1671
- package/docs/architecture.md +1 -1
- package/docs/benchmark.md +20 -8
- package/docs/benchmarks.md +11 -0
- package/docs/catalog.md +3 -2
- package/docs/contracts/benchmark-corpus-spec.md +31 -3
- package/docs/contracts/command-surface-tiers.md +1 -1
- package/docs/contracts/hook-architecture-v1.md +33 -0
- package/docs/contracts/migrate-command.md +197 -0
- package/docs/contracts/settings-api.md +2 -1
- package/docs/contracts/value-dashboard-spec.md +374 -0
- package/docs/contracts/value-report-schema.md +150 -0
- package/docs/decisions/ADR-031-validation-severity-tiers-and-projection-roundtrip.md +97 -0
- package/docs/decisions/ADR-032-linked-projects-scope.md +118 -0
- package/docs/decisions/INDEX.md +2 -0
- package/docs/getting-started.md +1 -1
- package/docs/guidelines/agent-infra/installed-tools-manifest.md +6 -3
- package/docs/guidelines/agent-infra/language-and-tone-examples.md +35 -0
- package/docs/guides/cross-repo-linked-projects.md +86 -0
- package/docs/migration/v1-to-v2.md +40 -27
- package/docs/value.md +84 -0
- package/package.json +8 -8
- package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
- package/scripts/_cli/cmd_migrate.py +264 -102
- package/scripts/_cli/cmd_settings_migrate.py +2 -1
- package/scripts/_dispatch.bash +147 -49
- package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
- package/scripts/_lib/agent_settings.py +20 -3
- package/scripts/_lib/install_regenerator.py +129 -0
- package/scripts/_lib/linked_projects.py +238 -0
- package/scripts/_lib/value_ladder.py +599 -0
- package/scripts/_lib/value_report.py +441 -0
- package/scripts/bench_rtk_savings.py +320 -0
- package/scripts/check_no_local_settings_committed.py +51 -0
- package/scripts/compile_router.py +19 -5
- package/scripts/expected_perms.json +1 -1
- package/scripts/first_run_gate_hook.py +178 -0
- package/scripts/hook_manifest.yaml +16 -7
- package/scripts/hooks/dispatch_hook.py +27 -0
- package/scripts/hooks/dispatch_issues.py +136 -0
- package/scripts/hooks_doctor.py +40 -1
- package/scripts/install.py +25 -21
- package/scripts/lint_agents_layout.py +5 -4
- package/scripts/lint_bench_corpus.py +86 -4
- package/scripts/lint_global_paths.py +4 -3
- package/scripts/lint_marketplace_install_completeness.py +188 -0
- package/scripts/lint_value_dashboard.py +218 -0
- package/scripts/render_benchmark_md.py +6 -2
- package/scripts/render_value_md.py +355 -0
- package/scripts/repro/repro_marketplace_install_gap.sh +161 -0
- package/scripts/roadmap_progress_hook.py +23 -0
- package/scripts/router_telemetry.py +470 -0
- package/scripts/validate_frontmatter.py +23 -9
- package/scripts/_cli/cmd_migrate_to_global.py +0 -415
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Render `docs/value.md` from the latest `value-v1` JSON report.
|
|
3
|
+
|
|
4
|
+
Phase 4 Step 1 of `agents/roadmaps/road-to-readable-value-dashboard.md`.
|
|
5
|
+
|
|
6
|
+
This renderer is **deterministic** — it does not run any bench, only
|
|
7
|
+
formats existing reports. Mirrors `render_benchmark_md.py`'s placeholder
|
|
8
|
+
discipline: when the report is missing, write a placeholder document
|
|
9
|
+
explaining how to produce one. Never errors.
|
|
10
|
+
|
|
11
|
+
The dashboard has two panels:
|
|
12
|
+
- Panel A — cost ladder (cumulative, min → max)
|
|
13
|
+
- Panel B — behaviour (with vs. without)
|
|
14
|
+
|
|
15
|
+
Each panel uses plain language, prints `confidence` markers inline,
|
|
16
|
+
and ends with a bold NETTO line that lifts the totals out of the
|
|
17
|
+
table.
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import json
|
|
23
|
+
import sys
|
|
24
|
+
from datetime import datetime, timezone
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any, Dict, Optional
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
30
|
+
VALUE_REPORTS_DIR = REPO_ROOT / "internal" / "bench" / "reports" / "value"
|
|
31
|
+
LATEST = VALUE_REPORTS_DIR / "latest.json"
|
|
32
|
+
OUT_PATH = REPO_ROOT / "docs" / "value.md"
|
|
33
|
+
|
|
34
|
+
REQUIRED_SECTIONS = (
|
|
35
|
+
"## Reference scale",
|
|
36
|
+
"## Panel A",
|
|
37
|
+
"## Panel B",
|
|
38
|
+
"## Glossar",
|
|
39
|
+
"**NETTO",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def utc_iso() -> str:
|
|
44
|
+
return datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def safe_load(path: Path) -> Optional[Dict[str, Any]]:
|
|
48
|
+
if not path.exists():
|
|
49
|
+
return None
|
|
50
|
+
try:
|
|
51
|
+
return json.loads(path.read_text())
|
|
52
|
+
except json.JSONDecodeError:
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def fmt_signed_int(value: int) -> str:
|
|
57
|
+
return f"{value:+,}".replace(",", " ")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def fmt_eur(value: float) -> str:
|
|
61
|
+
return f"{value:+.2f} €"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def fmt_pct(value: float) -> str:
|
|
65
|
+
return f"{value:+.2f}%"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def confidence_badge(level: str) -> str:
|
|
69
|
+
badges = {
|
|
70
|
+
"measured": "✅ gemessen",
|
|
71
|
+
"estimated": "≈ geschätzt",
|
|
72
|
+
"vendor-claim": "⚠️ vendor-claim",
|
|
73
|
+
"pending": "⏳ pending",
|
|
74
|
+
}
|
|
75
|
+
return badges.get(level, level)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def mode_badge(mode: str) -> str:
|
|
79
|
+
if mode == "live":
|
|
80
|
+
return "✅ live"
|
|
81
|
+
if mode == "dry-run":
|
|
82
|
+
return "⚠️ dry-run"
|
|
83
|
+
return mode
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def render_intro(report: Dict[str, Any]) -> str:
|
|
87
|
+
ref = report.get("reference_scale", {})
|
|
88
|
+
requests = ref.get("requests", 1000)
|
|
89
|
+
avg_in = ref.get("avg_input_tokens", 8000)
|
|
90
|
+
avg_out = ref.get("avg_output_tokens", 600)
|
|
91
|
+
tier = ref.get("model_tier", "sonnet")
|
|
92
|
+
sourced = ref.get("pricing_sourced_on", "—")
|
|
93
|
+
return (
|
|
94
|
+
f"# Value Dashboard — was kostet das Paket, was bringt es?\n"
|
|
95
|
+
"\n"
|
|
96
|
+
"> Diese Seite beantwortet **eine** Frage in echten Zahlen: "
|
|
97
|
+
"*Wie viel mehr Tokens kostet mich das Paket, und wie viel "
|
|
98
|
+
"spart es danach wieder ein?* Generiert von "
|
|
99
|
+
"`scripts/render_value_md.py` aus dem letzten `value-v1` Report; "
|
|
100
|
+
"Quelle: `internal/bench/reports/value/latest.json`.\n"
|
|
101
|
+
"\n"
|
|
102
|
+
"## Wie diese Seite zu lesen ist\n"
|
|
103
|
+
"\n"
|
|
104
|
+
"**Panel A (Kostenleiter)** — von oben nach unten lesen. Jede "
|
|
105
|
+
"Stufe sagt: *was sie macht*, *wie viele Input-Tokens sie pro "
|
|
106
|
+
"Request hinzufügt oder spart*, *was das in € auf "
|
|
107
|
+
f"{requests:,} Requests kostet*, und *wo wir kumulativ stehen*. "
|
|
108
|
+
"Die fett gedruckte **NETTO**-Zeile am Ende ist die Antwort.\n"
|
|
109
|
+
"\n"
|
|
110
|
+
"**Panel B (Verhalten)** — vier reale Vergleiche, *mit* vs. "
|
|
111
|
+
"*ohne* Paket. Hier liegt der nicht-Token-Wert: passende Skill-"
|
|
112
|
+
"Auswahl, Stopps bei riskanten Aktionen, weniger Rückfragen, "
|
|
113
|
+
"mehr abgeschlossene Aufgaben.\n"
|
|
114
|
+
"\n"
|
|
115
|
+
"**Confidence-Marker** an jeder Stufe: `✅ gemessen` = echter "
|
|
116
|
+
"Wert aus einem Report im Repo · `⏳ pending` = noch nicht "
|
|
117
|
+
"gemessen, Stufe trägt 0 zur Summe bei · `⚠️ vendor-claim` = "
|
|
118
|
+
"Behauptung eines Herstellers, nicht selbst gemessen.\n"
|
|
119
|
+
"\n"
|
|
120
|
+
"## Reference scale\n"
|
|
121
|
+
"\n"
|
|
122
|
+
f"- **{requests:,}** Requests, durchschnittlich "
|
|
123
|
+
f"**{avg_in:,}** Input-Tokens und **{avg_out:,}** Output-Tokens "
|
|
124
|
+
"pro Request\n"
|
|
125
|
+
f"- Modell-Tier: `{tier}` · "
|
|
126
|
+
f"Preisstand `{sourced}` (Quelle: `internal/bench/pricing.yaml`)\n"
|
|
127
|
+
"- Wer einen anderen Workload fährt, rechnet selbst nach — die "
|
|
128
|
+
"Methodik ist offengelegt; nichts ist hardcodiert versteckt.\n"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def render_panel_a(report: Dict[str, Any]) -> str:
|
|
133
|
+
lines = [
|
|
134
|
+
"## Panel A — Kostenleiter (kumulativ, min → max)\n",
|
|
135
|
+
"Liest sich von oben nach unten. Positive Δ-Werte = das Paket "
|
|
136
|
+
"*kostet* Tokens (Regel-Load ist die ehrliche Up-Front-Steuer); "
|
|
137
|
+
"negative Δ-Werte = das Paket *spart* Tokens.\n",
|
|
138
|
+
"| Stufe | Was sie tut | Δ Tokens | Δ € (1k Req) | Kumulativ | Quelle |",
|
|
139
|
+
"|---|---|---:|---:|---:|---|",
|
|
140
|
+
]
|
|
141
|
+
for rung in report.get("cost_ladder", []):
|
|
142
|
+
if rung["id"] == "baseline":
|
|
143
|
+
label_cell = f"**{rung['label']}**"
|
|
144
|
+
else:
|
|
145
|
+
label_cell = rung["label"]
|
|
146
|
+
what = rung.get("what_it_does", "")
|
|
147
|
+
token_delta = int(rung.get("token_delta", 0))
|
|
148
|
+
eur_delta = float(rung.get("eur_delta", 0.0))
|
|
149
|
+
cum = float(rung.get("cumulative_pct", 0.0))
|
|
150
|
+
conf = confidence_badge(rung.get("confidence", "pending"))
|
|
151
|
+
source = rung.get("source_report", "")
|
|
152
|
+
# Honesty stamp: an `up-front-cost` note on the load rung.
|
|
153
|
+
if rung["id"] == "load" and token_delta > 0:
|
|
154
|
+
what = f"{what} ⚠️ erst teurer"
|
|
155
|
+
lines.append(
|
|
156
|
+
f"| {label_cell} | {what} | "
|
|
157
|
+
f"{fmt_signed_int(token_delta)} | {fmt_eur(eur_delta)} | "
|
|
158
|
+
f"{fmt_pct(cum)} | `{source}` · {conf} |"
|
|
159
|
+
)
|
|
160
|
+
if rung.get("footnote"):
|
|
161
|
+
lines.append(
|
|
162
|
+
f"| | _Fußnote:_ {rung['footnote']} | | | | |"
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
totals = report.get("totals", {})
|
|
166
|
+
cum_tokens = int(totals.get("cumulative_token_delta", 0))
|
|
167
|
+
cum_eur = float(totals.get("cumulative_eur_delta", 0.0))
|
|
168
|
+
cum_pct = float(totals.get("cumulative_pct", 0.0))
|
|
169
|
+
verdict = totals.get("net_verdict", "—")
|
|
170
|
+
verdict_label = {
|
|
171
|
+
"net-saving": "**NETTO: Ersparnis** ✅",
|
|
172
|
+
"net-cost": "**NETTO: Mehrkosten** ⚠️",
|
|
173
|
+
"break-even": "**NETTO: Break-Even** ⚖️",
|
|
174
|
+
}.get(verdict, f"**NETTO: {verdict}**")
|
|
175
|
+
lines.extend(
|
|
176
|
+
[
|
|
177
|
+
"",
|
|
178
|
+
f"{verdict_label} — "
|
|
179
|
+
f"**{fmt_signed_int(cum_tokens)} Tokens / Request**, "
|
|
180
|
+
f"**{fmt_eur(cum_eur)}** auf "
|
|
181
|
+
f"{report.get('reference_scale', {}).get('requests', 1000):,} Requests, "
|
|
182
|
+
f"kumulativ **{fmt_pct(cum_pct)}** vs. Baseline.\n",
|
|
183
|
+
]
|
|
184
|
+
)
|
|
185
|
+
return "\n".join(lines)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def render_panel_b(report: Dict[str, Any]) -> str:
|
|
189
|
+
lines = [
|
|
190
|
+
"## Panel B — Verhalten (mit vs. ohne)\n",
|
|
191
|
+
"Vier reale Vergleiche aus echten Bench-Runs. Hier liegt der "
|
|
192
|
+
"Wert, den Tokens allein nicht messen: ob der Agent das "
|
|
193
|
+
"richtige Skill wählt, bei riskanten Aktionen stoppt, weniger "
|
|
194
|
+
"rückfragt und mehr Aufgaben abschließt.\n",
|
|
195
|
+
"| Metrik | Was es bedeutet | Mit Paket | Ohne Paket | Δ | Mode |",
|
|
196
|
+
"|---|---|---:|---:|---:|---|",
|
|
197
|
+
]
|
|
198
|
+
for metric in report.get("behaviour", []):
|
|
199
|
+
label = metric["label"]
|
|
200
|
+
what = metric.get("what_this_means", "")
|
|
201
|
+
unit = metric.get("unit", "")
|
|
202
|
+
mode = mode_badge(metric.get("mode", "dry-run"))
|
|
203
|
+
|
|
204
|
+
def _fmt(v: Any) -> str:
|
|
205
|
+
if v is None:
|
|
206
|
+
return "—"
|
|
207
|
+
if unit == "pct" and isinstance(v, (int, float)):
|
|
208
|
+
return f"{float(v) * 100:.1f}%"
|
|
209
|
+
if unit == "count":
|
|
210
|
+
return str(int(v))
|
|
211
|
+
if unit == "ratio" and isinstance(v, (int, float)):
|
|
212
|
+
return f"{float(v):.3f}"
|
|
213
|
+
if unit == "seconds" and isinstance(v, (int, float)):
|
|
214
|
+
return f"{float(v):.1f}s"
|
|
215
|
+
return str(v)
|
|
216
|
+
|
|
217
|
+
with_v = _fmt(metric.get("with"))
|
|
218
|
+
without_v = _fmt(metric.get("without"))
|
|
219
|
+
delta_v = _fmt(metric.get("delta"))
|
|
220
|
+
lines.append(
|
|
221
|
+
f"| {label} | {what} | {with_v} | {without_v} | {delta_v} | {mode} |"
|
|
222
|
+
)
|
|
223
|
+
return "\n".join(lines) + "\n"
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def render_glossary() -> str:
|
|
227
|
+
return (
|
|
228
|
+
"## Glossar\n"
|
|
229
|
+
"\n"
|
|
230
|
+
"Plain-language Definitionen für den nicht-Entwickler-Reader.\n"
|
|
231
|
+
"\n"
|
|
232
|
+
"- **Token** — die Einheit, in der ein Sprachmodell abrechnet. "
|
|
233
|
+
"Faustregel: ein Token ≈ 4 Zeichen deutsch/englischer Prosa. "
|
|
234
|
+
"1.000 Tokens ≈ 750 Wörter.\n"
|
|
235
|
+
"- **Input-Tokens** — alles, was das Modell pro Turn liest "
|
|
236
|
+
"(System-Prompt, immer-aktive Regeln, deine Nachricht, frühere "
|
|
237
|
+
"Konversation). Das Paket fügt hier Regeln hinzu — Installation "
|
|
238
|
+
"kostet Input-Tokens.\n"
|
|
239
|
+
"- **Output-Tokens** — was das Modell zurückschreibt. Meist "
|
|
240
|
+
"weniger als Input. Pro Token teurer als Input.\n"
|
|
241
|
+
"- **condense** — ein Build-Schritt, der die Regel-Dateien "
|
|
242
|
+
"vor dem Ausliefern schrumpft (`.agent-src.uncondensed` → "
|
|
243
|
+
"`.agent-src`). Spart Input-Tokens bei jedem Request.\n"
|
|
244
|
+
"- **rtk** — der *Rust Token Killer*, ein CLI-Wrapper, der "
|
|
245
|
+
"verbose Output (`git status`, lint-Output, test-Runner) "
|
|
246
|
+
"filtert, bevor das Modell ihn liest. Spart Input-Tokens auf "
|
|
247
|
+
"Tool-Calls.\n"
|
|
248
|
+
"- **terse / telegraph** — ein Stil (kurze Phrasen, "
|
|
249
|
+
"weggelassene Artikel), den der Agent für knappere Antworten "
|
|
250
|
+
"nutzt. Spart Output-Tokens — wenn der Korpus es belohnt.\n"
|
|
251
|
+
"- **Ohne Paket / Mit Paket** — *without the package* / *with "
|
|
252
|
+
"the package* — die zwei Arme des A/B-Vergleichs.\n"
|
|
253
|
+
"- **€-per-1k-requests** — Token-Kosten auf der "
|
|
254
|
+
"Referenz-Skala (1.000 Requests durchschnittlicher Größe, "
|
|
255
|
+
"gepreist mit den aktuellen Sonnet-Raten aus "
|
|
256
|
+
"`internal/bench/pricing.yaml`).\n"
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def render_methodology(report: Dict[str, Any]) -> str:
|
|
261
|
+
notes = report.get("notes", [])
|
|
262
|
+
lines = [
|
|
263
|
+
"## Methodik & Quellen\n",
|
|
264
|
+
"Diese Seite ist eine **abgeleitete** Sicht — keine eigene "
|
|
265
|
+
"Messung. Sie fasst drei bestehende Bench-Surfaces zusammen "
|
|
266
|
+
"(siehe Spalte 'Quelle' in Panel A). Die maschinen-lesbaren "
|
|
267
|
+
"Roh-Reports bleiben die Source-of-Truth:\n",
|
|
268
|
+
"- `internal/bench/reports/telegraph-v1.json` / `telegraph-v2.json` "
|
|
269
|
+
"— Telegraph/Condense-Messungen.\n",
|
|
270
|
+
"- `agents/runtime/frugality/baseline.jsonl` — der Paket-Load "
|
|
271
|
+
"(Metric A footprint).\n",
|
|
272
|
+
"- `internal/bench/reports/rtk/latest.json` — die rtk-Messung "
|
|
273
|
+
"(neu, Phase 2).\n",
|
|
274
|
+
"- `internal/bench/reports/ab/*-ab-trackb-{with,without}.json` "
|
|
275
|
+
"— A/B Track B (Verhalten).\n",
|
|
276
|
+
"- `internal/bench/reports/*-dev.json` — Dev-Korpus Selection-"
|
|
277
|
+
"Accuracy.\n",
|
|
278
|
+
"",
|
|
279
|
+
"**A/B-technischer Anhang:** [`docs/benchmark.md`](benchmark.md) "
|
|
280
|
+
"trägt die Cache-Key-, Integrity- und Methodik-Details des "
|
|
281
|
+
"A/B-Benches — wer den Variant-Axis-Beweis sehen will, liest "
|
|
282
|
+
"dort weiter.\n",
|
|
283
|
+
"",
|
|
284
|
+
]
|
|
285
|
+
if notes:
|
|
286
|
+
lines.append("**Hinweise aus dem Report:**\n")
|
|
287
|
+
for note in notes:
|
|
288
|
+
lines.append(f"- {note}")
|
|
289
|
+
lines.append("")
|
|
290
|
+
lines.append(f"_Last rendered: `{utc_iso()}`_\n")
|
|
291
|
+
return "\n".join(lines)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def render_placeholder() -> str:
|
|
295
|
+
return (
|
|
296
|
+
"# Value Dashboard — Platzhalter\n"
|
|
297
|
+
"\n"
|
|
298
|
+
"_Es liegt noch kein `value-v1` Report unter "
|
|
299
|
+
"`internal/bench/reports/value/latest.json` vor._\n"
|
|
300
|
+
"\n"
|
|
301
|
+
"Einen erzeugen mit:\n"
|
|
302
|
+
"\n"
|
|
303
|
+
"```sh\n"
|
|
304
|
+
"task value\n"
|
|
305
|
+
"```\n"
|
|
306
|
+
"\n"
|
|
307
|
+
"Die Methodik dieses Dashboards ist beschrieben in "
|
|
308
|
+
"`docs/contracts/value-dashboard-spec.md` und der zugehörigen "
|
|
309
|
+
"Roadmap `agents/roadmaps/road-to-readable-value-dashboard.md`.\n"
|
|
310
|
+
"\n"
|
|
311
|
+
f"_Last rendered: {utc_iso()}_\n"
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def render(quiet: bool = False) -> int:
|
|
316
|
+
report = safe_load(LATEST)
|
|
317
|
+
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
318
|
+
if not report:
|
|
319
|
+
OUT_PATH.write_text(render_placeholder())
|
|
320
|
+
if not quiet:
|
|
321
|
+
sys.stdout.write(
|
|
322
|
+
f"render_value_md: no report — wrote placeholder to "
|
|
323
|
+
f"{OUT_PATH.relative_to(REPO_ROOT)}\n"
|
|
324
|
+
)
|
|
325
|
+
return 0
|
|
326
|
+
parts = [
|
|
327
|
+
render_intro(report),
|
|
328
|
+
render_panel_a(report),
|
|
329
|
+
render_panel_b(report),
|
|
330
|
+
render_glossary(),
|
|
331
|
+
render_methodology(report),
|
|
332
|
+
]
|
|
333
|
+
OUT_PATH.write_text("\n".join(parts))
|
|
334
|
+
if not quiet:
|
|
335
|
+
sys.stdout.write(
|
|
336
|
+
f"render_value_md: wrote {OUT_PATH.relative_to(REPO_ROOT)}\n"
|
|
337
|
+
)
|
|
338
|
+
return 0
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def parse_args(argv: list[str]) -> argparse.Namespace:
|
|
342
|
+
parser = argparse.ArgumentParser(
|
|
343
|
+
description="Render docs/value.md from the latest value-v1 report."
|
|
344
|
+
)
|
|
345
|
+
parser.add_argument("--quiet", action="store_true", help="Suppress stdout.")
|
|
346
|
+
return parser.parse_args(argv)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def main(argv: list[str] | None = None) -> int:
|
|
350
|
+
args = parse_args(argv if argv is not None else sys.argv[1:])
|
|
351
|
+
return render(quiet=args.quiet)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
if __name__ == "__main__":
|
|
355
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Reproduce the silent marketplace-install gap that this roadmap fixes.
|
|
3
|
+
#
|
|
4
|
+
# Phase 0 of `road-to-hooks-actually-fire-in-consumers`.
|
|
5
|
+
#
|
|
6
|
+
# Simulates a consumer project that ran `/plugin install
|
|
7
|
+
# agent-config@event4u-agent-config` but NEVER ran `agent-config init`.
|
|
8
|
+
# The plugin's hooks.json fires under Claude's lifecycle, but every
|
|
9
|
+
# resolved command (`"$CLAUDE_PROJECT_DIR"/agent-config dispatch:hook
|
|
10
|
+
# …`) errors out silently because the prerequisites do not exist.
|
|
11
|
+
#
|
|
12
|
+
# Expected output (the bug):
|
|
13
|
+
# - Dispatcher exits 0 (never-block contract)
|
|
14
|
+
# - NO `agents/roadmaps-progress.md` written
|
|
15
|
+
# - NO state file under `agents/runtime/state/`
|
|
16
|
+
# - Hook tried, hook failed, no trace left behind
|
|
17
|
+
#
|
|
18
|
+
# Once Phases 1-4 land, the same script should produce a
|
|
19
|
+
# `dispatch-issues.jsonl` entry naming the missing artefact.
|
|
20
|
+
|
|
21
|
+
set -euo pipefail
|
|
22
|
+
|
|
23
|
+
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
24
|
+
TMPDIR_BASE="${TMPDIR:-/tmp}"
|
|
25
|
+
CONSUMER_ROOT="$(mktemp -d "$TMPDIR_BASE/marketplace-install-gap-XXXXXX")"
|
|
26
|
+
EVIDENCE_FILE="$REPO_ROOT/agents/evidence/analysis/hooks-marketplace-gap-2026-05-29.md"
|
|
27
|
+
|
|
28
|
+
cleanup() {
|
|
29
|
+
rm -rf "$CONSUMER_ROOT" 2>/dev/null || true
|
|
30
|
+
}
|
|
31
|
+
trap cleanup EXIT
|
|
32
|
+
|
|
33
|
+
echo "==> Setting up synthetic marketplace-install consumer at: $CONSUMER_ROOT"
|
|
34
|
+
|
|
35
|
+
# 1. Write only the marketplace-install end-state (.claude/settings.json
|
|
36
|
+
# with the plugin enabled). Nothing else — no symlink, no regenerator,
|
|
37
|
+
# no .augment/, no agents/runtime/state/.
|
|
38
|
+
mkdir -p "$CONSUMER_ROOT/.claude"
|
|
39
|
+
cat > "$CONSUMER_ROOT/.claude/settings.json" <<'JSON'
|
|
40
|
+
{
|
|
41
|
+
"enabledPlugins": {
|
|
42
|
+
"agent-config@event4u-agent-config": true
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
JSON
|
|
46
|
+
|
|
47
|
+
# 2. Fake roadmap so a hook on path_prefix `agents/roadmaps/` has a
|
|
48
|
+
# target to react to. (For Phase 0 we don't actually run a hook
|
|
49
|
+
# that depends on this — but it documents the file layout.)
|
|
50
|
+
mkdir -p "$CONSUMER_ROOT/agents/roadmaps"
|
|
51
|
+
cat > "$CONSUMER_ROOT/agents/roadmaps/road-to-fake.md" <<'MD'
|
|
52
|
+
---
|
|
53
|
+
complexity: lightweight
|
|
54
|
+
---
|
|
55
|
+
# Roadmap: fake
|
|
56
|
+
|
|
57
|
+
## Phase 1
|
|
58
|
+
|
|
59
|
+
- [ ] **Step 1:** placeholder
|
|
60
|
+
MD
|
|
61
|
+
|
|
62
|
+
# 3. Capture the missing-artefact inventory BEFORE we try anything.
|
|
63
|
+
echo
|
|
64
|
+
echo "==> Missing-artefact inventory:"
|
|
65
|
+
INVENTORY=""
|
|
66
|
+
for artefact in \
|
|
67
|
+
".claude/settings.json (plugin enabled)" \
|
|
68
|
+
"agent-config symlink" \
|
|
69
|
+
".augment/scripts/update_roadmap_progress.py" \
|
|
70
|
+
".agent-src/scripts/update_roadmap_progress.py" \
|
|
71
|
+
".agent-src.uncondensed/scripts/update_roadmap_progress.py" \
|
|
72
|
+
".git/hooks/pre-commit" \
|
|
73
|
+
"agents/runtime/state/"
|
|
74
|
+
do
|
|
75
|
+
# Strip parenthetical for the existence check.
|
|
76
|
+
path_only="${artefact% (*}"
|
|
77
|
+
if [ -e "$CONSUMER_ROOT/$path_only" ] || [ -L "$CONSUMER_ROOT/$path_only" ]; then
|
|
78
|
+
status="present"
|
|
79
|
+
else
|
|
80
|
+
status="MISSING"
|
|
81
|
+
fi
|
|
82
|
+
line=" $status: $artefact"
|
|
83
|
+
INVENTORY="$INVENTORY$line"$'\n'
|
|
84
|
+
echo "$line"
|
|
85
|
+
done
|
|
86
|
+
|
|
87
|
+
# 4. Emit a synthetic PostToolUse JSON envelope on stdin to the
|
|
88
|
+
# dispatch hook, simulating what Claude Code would send when an
|
|
89
|
+
# agent writes to the fake roadmap.
|
|
90
|
+
echo
|
|
91
|
+
echo "==> Invoking dispatch hook with synthetic PostToolUse envelope..."
|
|
92
|
+
ENVELOPE=$(cat <<JSON
|
|
93
|
+
{
|
|
94
|
+
"session_id": "repro-marketplace-gap",
|
|
95
|
+
"transcript_path": "/dev/null",
|
|
96
|
+
"cwd": "$CONSUMER_ROOT",
|
|
97
|
+
"hook_event_name": "PostToolUse",
|
|
98
|
+
"tool_name": "Write",
|
|
99
|
+
"tool_input": {
|
|
100
|
+
"file_path": "$CONSUMER_ROOT/agents/roadmaps/road-to-fake.md"
|
|
101
|
+
},
|
|
102
|
+
"tool_response": {}
|
|
103
|
+
}
|
|
104
|
+
JSON
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
DISPATCH_STDERR="$(mktemp "$TMPDIR_BASE/dispatch-stderr-XXXXXX")"
|
|
108
|
+
DISPATCH_EXIT=0
|
|
109
|
+
echo "$ENVELOPE" | (
|
|
110
|
+
cd "$CONSUMER_ROOT"
|
|
111
|
+
CLAUDE_PROJECT_DIR="$CONSUMER_ROOT" python3 "$REPO_ROOT/scripts/hooks/dispatch_hook.py" \
|
|
112
|
+
--platform claude --event post_tool_use --native-event PostToolUse \
|
|
113
|
+
2>"$DISPATCH_STDERR"
|
|
114
|
+
) || DISPATCH_EXIT=$?
|
|
115
|
+
|
|
116
|
+
echo " dispatcher exit code: $DISPATCH_EXIT"
|
|
117
|
+
echo " dispatcher stderr:"
|
|
118
|
+
sed 's/^/ /' "$DISPATCH_STDERR" | head -20
|
|
119
|
+
|
|
120
|
+
# 5. Confirm the silent-no-op shape.
|
|
121
|
+
echo
|
|
122
|
+
echo "==> Verifying the silent no-op:"
|
|
123
|
+
|
|
124
|
+
DASHBOARD_EXISTS="no"
|
|
125
|
+
if [ -e "$CONSUMER_ROOT/agents/roadmaps-progress.md" ]; then
|
|
126
|
+
DASHBOARD_EXISTS="yes"
|
|
127
|
+
fi
|
|
128
|
+
echo " agents/roadmaps-progress.md written: $DASHBOARD_EXISTS (expected: no)"
|
|
129
|
+
|
|
130
|
+
STATE_FILES=0
|
|
131
|
+
if [ -d "$CONSUMER_ROOT/agents/runtime/state" ]; then
|
|
132
|
+
STATE_FILES=$(find "$CONSUMER_ROOT/agents/runtime/state" -type f 2>/dev/null | wc -l | tr -d ' ')
|
|
133
|
+
fi
|
|
134
|
+
echo " state files under agents/runtime/state/: $STATE_FILES (expected: 0)"
|
|
135
|
+
|
|
136
|
+
# Phase-1-aware check: after that phase lands, dispatch-issues.jsonl
|
|
137
|
+
# should exist with at least one entry. Today (pre-Phase-1) it does
|
|
138
|
+
# not. The script reports both shapes.
|
|
139
|
+
DISPATCH_ISSUES="no"
|
|
140
|
+
if [ -e "$CONSUMER_ROOT/agents/runtime/state/dispatch-issues.jsonl" ]; then
|
|
141
|
+
DISPATCH_ISSUES="yes"
|
|
142
|
+
fi
|
|
143
|
+
echo " agents/runtime/state/dispatch-issues.jsonl: $DISPATCH_ISSUES (pre-Phase-1: no; post-Phase-1: yes)"
|
|
144
|
+
|
|
145
|
+
rm -f "$DISPATCH_STDERR"
|
|
146
|
+
|
|
147
|
+
# 6. Append evidence.
|
|
148
|
+
mkdir -p "$(dirname "$EVIDENCE_FILE")"
|
|
149
|
+
{
|
|
150
|
+
printf '## %s — repro run\n\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
151
|
+
printf 'Tmp consumer root: `%s`\n\n' "$CONSUMER_ROOT"
|
|
152
|
+
printf 'Inventory:\n\n'
|
|
153
|
+
printf '```\n%s```\n\n' "$INVENTORY"
|
|
154
|
+
printf 'Dispatcher exit: `%s`\n' "$DISPATCH_EXIT"
|
|
155
|
+
printf 'Dashboard written: `%s`\n' "$DASHBOARD_EXISTS"
|
|
156
|
+
printf 'State files: `%s`\n' "$STATE_FILES"
|
|
157
|
+
printf 'dispatch-issues.jsonl: `%s`\n\n' "$DISPATCH_ISSUES"
|
|
158
|
+
} >> "$EVIDENCE_FILE"
|
|
159
|
+
|
|
160
|
+
echo
|
|
161
|
+
echo "==> Evidence appended to: ${EVIDENCE_FILE#$REPO_ROOT/}"
|
|
@@ -132,6 +132,29 @@ def run(stdin_text: str, *, consumer_root: Path, verbose: bool = False) -> int:
|
|
|
132
132
|
|
|
133
133
|
script = _resolve_regenerator(consumer_root)
|
|
134
134
|
if script is None:
|
|
135
|
+
# Phase 1 of road-to-hooks-actually-fire-in-consumers: log
|
|
136
|
+
# dispatch issue directly (this hook runs as a subprocess from
|
|
137
|
+
# the universal dispatcher; routing through the dispatcher
|
|
138
|
+
# would add latency for no benefit).
|
|
139
|
+
try:
|
|
140
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent / "hooks"))
|
|
141
|
+
from dispatch_issues import log_dispatch_issue # noqa: PLC0415
|
|
142
|
+
log_dispatch_issue(
|
|
143
|
+
workspace_root=consumer_root,
|
|
144
|
+
hook="roadmap-progress",
|
|
145
|
+
issue="prerequisite_missing",
|
|
146
|
+
detail=(
|
|
147
|
+
"update_roadmap_progress.py not found at any of: "
|
|
148
|
+
".augment/scripts/, .agent-src/scripts/, "
|
|
149
|
+
".agent-src.uncondensed/scripts/"
|
|
150
|
+
),
|
|
151
|
+
resolution=(
|
|
152
|
+
"./agent-config hooks:install --regen "
|
|
153
|
+
"(or ./agent-config init)"
|
|
154
|
+
),
|
|
155
|
+
)
|
|
156
|
+
except (ImportError, OSError):
|
|
157
|
+
pass # observability never breaks the hook
|
|
135
158
|
if verbose:
|
|
136
159
|
print("roadmap-progress-hook: regenerator not found, skipping",
|
|
137
160
|
file=sys.stderr)
|