npm - @event4u/agent-config - Versions diffs - 5.5.0 → 5.6.1 - Mend

@event4u/agent-config 5.5.0 → 5.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/.agent-src/commands/image/analyse.md +51 -0
package/.agent-src/commands/image/create.md +53 -0
package/.agent-src/commands/image/verify.md +48 -0
package/.agent-src/commands/image.md +69 -0
package/.agent-src/commands/video/from-song.md +40 -6
package/.agent-src/contexts/authority/commit-mechanics.md +8 -0
package/.agent-src/rules/commit-policy.md +3 -8
package/.agent-src/rules/media-sync-ground-truth.md +58 -0
package/.agent-src/skills/image-analyser/SKILL.md +121 -0
package/.agent-src/skills/image-analyser/canon-spec.md +109 -0
package/.agent-src/skills/image-analyser/evals/triggers.json +16 -0
package/.agent-src/skills/image-creator/SKILL.md +117 -0
package/.agent-src/skills/image-creator/evals/triggers.json +16 -0
package/.agent-src/skills/song-to-script/SKILL.md +36 -13
package/.claude-plugin/marketplace.json +7 -1
package/CHANGELOG.md +56 -0
package/README.md +2 -2
package/config/agent-settings.template.yml +18 -0
package/dist/discovery/deprecation-report.md +1 -1
package/dist/discovery/discovery-manifest.json +171 -18
package/dist/discovery/discovery-manifest.json.sha256 +1 -1
package/dist/discovery/discovery-manifest.summary.md +4 -4
package/dist/discovery/orphan-report.md +1 -1
package/dist/discovery/packs.json +15 -8
package/dist/discovery/trust-report.md +3 -3
package/dist/discovery/workspaces.json +13 -6
package/dist/mcp/registry-manifest.json +3 -3
package/dist/router.json +1 -1
package/dist/server/schemas/settings.js +4 -0
package/dist/server/schemas/settings.js.map +1 -1
package/docs/architecture.md +3 -3
package/docs/catalog.md +20 -6
package/docs/contracts/benchmark-report-schema.md +12 -10
package/docs/contracts/command-clusters.md +1 -0
package/docs/contracts/rule-router.md +39 -0
package/docs/contracts/value-dashboard-spec.md +7 -3
package/docs/contracts/value-report-schema.md +6 -1
package/docs/getting-started.md +2 -2
package/docs/value.md +17 -17
package/package.json +1 -1
package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
package/scripts/_lib/bench_report.py +13 -14
package/scripts/_lib/bench_telegraph_report.py +1 -2
package/scripts/_lib/token_count.py +95 -0
package/scripts/_lib/value_report.py +3 -3
package/scripts/ai-video/adapters/higgsfield.sh +163 -6
package/scripts/ai-video/adapters/openai-images.sh +92 -6
package/scripts/audit_auto_rules.py +22 -6
package/scripts/audit_command_surface.py +6 -1
package/scripts/audit_initial_context.py +210 -0
package/scripts/bench_ab_diff.py +4 -11
package/scripts/bench_run.py +2 -3
package/scripts/bench_runner.py +2 -2
package/scripts/condense.py +44 -3
package/scripts/iron_law_sha.py +14 -5
package/scripts/measure_rule_budget.py +15 -0
package/scripts/pack_mcp_content.py +1 -1
package/scripts/project_thin_rules.py +168 -0
package/scripts/render_value_md.py +14 -23
package/scripts/schemas/command.schema.json +1 -1
package/scripts/schemas/rule.schema.json +1 -1
package/scripts/schemas/skill.schema.json +2 -2
package/scripts/trigger_coverage.py +129 -0

package/scripts/project_thin_rules.py ADDED Viewed

@@ -0,0 +1,168 @@
+#!/usr/bin/env python3
+"""Thin-projection of the rule layer (lean-initial-context build-out, Phase 3.1).
+The dominant always-on cost is rule BODIES (~58k GPT tok; kernel only ~6.5k).
+0B.6 verdict: demote every non-kernel rule body to a progressive-disclosure
+pointer the agent resolves on trigger-match (the one mechanism 0B.5 confirmed
+works for the primary tool — like skills). The kernel stays full-bodied.
+A **thin** rule entry keeps the matching signal (frontmatter `description` +
+`triggers`) so the router still selects it, and replaces the body with a
+one-line pointer to the full text. The agent loads the body on match.
+This module is the mechanism + a measurement harness. It writes to a target
+dir of your choosing — it never overwrites the live `.claude/` / `.augment/`
+projections. condense.py reads `lean_projection.mode` (default `eager-all`)
+to decide whether the real generate-tools path calls in here; until that flag
+is flipped + live-A/B-validated, the default projection is unchanged.
+Usage:
+    python3 scripts/project_thin_rules.py --measure          # measure delta, no write
+    python3 scripts/project_thin_rules.py --out <dir>        # write thin rules to <dir>
+"""
+from __future__ import annotations
+import argparse
+import json
+import re
+import sys
+from pathlib import Path
+REPO_ROOT = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(REPO_ROOT / "scripts"))
+from _lib import token_count  # noqa: E402
+RULES_SOURCE = REPO_ROOT / ".agent-src" / "rules"
+ROUTER = REPO_ROOT / "dist" / "router.json"
+def kernel_ids() -> set[str]:
+    """The always-full-bodied set — authoritative kernel list from the router."""
+    return set(json.loads(ROUTER.read_text(encoding="utf-8")).get("kernel", []))
+def split_frontmatter(text: str) -> tuple[str, str]:
+    """Return (frontmatter_including_fences, body). Empty fm if none."""
+    if text.startswith("---\n"):
+        end = text.find("\n---\n", 4)
+        if end != -1:
+            return text[: end + 5], text[end + 5 :]
+    return "", text
+def _description(fm: str) -> str:
+    m = re.search(r'^description:\s*"?(.+?)"?\s*$', fm, re.MULTILINE)
+    return m.group(1).strip() if m else ""
+# How many trigger keywords/phrases to surface as the always-on match hint.
+# The full trigger set lives in dist/router.json (compiled from source) — the
+# projected entry only needs enough signal for the agent to recognise a match
+# and load the body. The router, not this list, drives actual selection.
+_TRIGGER_HINT_LIMIT = 6
+def _trigger_hint(fm: str) -> str:
+    """A short, comma-joined sample of the rule's trigger keywords/phrases."""
+    hits: list[str] = []
+    for m in re.finditer(r'^\s*-\s*(?:keyword|phrase|intent):\s*"?(.+?)"?\s*$', fm, re.MULTILINE):
+        hits.append(m.group(1).strip())
+        if len(hits) >= _TRIGGER_HINT_LIMIT:
+            break
+    return ", ".join(hits)
+def thin_entry(rule_id: str, text: str) -> str:
+    """Build the minimal progressive-disclosure pointer for a non-kernel rule.
+    The always-on layer keeps only the match signal (description + a short
+    trigger hint) and a pointer to the full body — NOT the full frontmatter.
+    The router (dist/router.json, compiled from source) holds the complete
+    `triggers:` / `routes_to:`; selection is unchanged. Dropping the inlined
+    frontmatter is where the bulk of the token saving comes from.
+    """
+    fm, _body = split_frontmatter(text)
+    desc = _description(fm)
+    hint = _trigger_hint(fm)
+    title = rule_id.replace("-", " ").title()
+    fires = f" Fires on: {hint}." if hint else ""
+    return (
+        f"## {title}\n"
+        f"> Routed rule — load the body on trigger-match.{fires} {desc} "
+        f"Body: [`{rule_id}`](../../.agent-src.uncondensed/rules/{rule_id}.md)\n"
+    )
+def build_thin(rules_dir: Path = RULES_SOURCE) -> dict[str, str]:
+    """Map {filename: thin_or_full_text} for every rule. Kernel stays full."""
+    kernel = kernel_ids()
+    out: dict[str, str] = {}
+    for p in sorted(rules_dir.glob("*.md")):
+        text = p.read_text(encoding="utf-8")
+        out[p.name] = text if p.stem in kernel else thin_entry(p.stem, text)
+    return out
+def measure(rules_dir: Path = RULES_SOURCE) -> dict:
+    """Eager vs thin token footprint for the rule layer."""
+    kernel = kernel_ids()
+    eager_blob = "".join(
+        p.read_text(encoding="utf-8") for p in sorted(rules_dir.glob("*.md"))
+    )
+    thin_blob = "".join(build_thin(rules_dir).values())
+    eager = token_count.measure(eager_blob)
+    thin = token_count.measure(thin_blob)
+    n = len(list(rules_dir.glob("*.md")))
+    return {
+        "rules_total": n,
+        "kernel_full": len(kernel & {p.stem for p in rules_dir.glob("*.md")}),
+        "non_kernel_thinned": n - len(kernel & {p.stem for p in rules_dir.glob("*.md")}),
+        "eager_gpt": eager["tokens_gpt"],
+        "thin_gpt": thin["tokens_gpt"],
+        "saved_gpt": eager["tokens_gpt"] - thin["tokens_gpt"],
+        "saved_pct": round(
+            100 * (eager["tokens_gpt"] - thin["tokens_gpt"]) / eager["tokens_gpt"], 1
+        )
+        if eager["tokens_gpt"]
+        else 0.0,
+        "eager_chars": eager["chars"],
+        "thin_chars": thin["chars"],
+        "token_method": token_count.method_note(),
+    }
+def write_thin(out_dir: Path, rules_dir: Path = RULES_SOURCE) -> int:
+    out_dir.mkdir(parents=True, exist_ok=True)
+    files = build_thin(rules_dir)
+    for name, text in files.items():
+        (out_dir / name).write_text(text, encoding="utf-8")
+    return len(files)
+def main(argv: list[str] | None = None) -> int:
+    ap = argparse.ArgumentParser(description=__doc__.splitlines()[0])
+    ap.add_argument("--measure", action="store_true", help="print the eager-vs-thin token delta")
+    ap.add_argument("--out", type=Path, help="write thin rule files to this dir")
+    ap.add_argument("--json", action="store_true")
+    args = ap.parse_args(argv)
+    if args.out:
+        n = write_thin(args.out)
+        print(f"wrote {n} thin rule files → {args.out}")
+        return 0
+    m = measure()
+    if args.json:
+        print(json.dumps(m, indent=2, sort_keys=True))
+    else:
+        print(f"Rule-layer thin projection (kernel full-bodied + {m['non_kernel_thinned']} non-kernel pointers):")
+        print(f"  eager: {m['eager_gpt']:>6} GPT tok ({m['eager_chars']:,} chars)")
+        print(f"  thin:  {m['thin_gpt']:>6} GPT tok ({m['thin_chars']:,} chars)")
+        print(f"  saved: {m['saved_gpt']:>6} GPT tok  ({m['saved_pct']}% of the rule layer)")
+        print(f"  method: {m['token_method']}")
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

package/scripts/render_value_md.py CHANGED Viewed

@@ -57,10 +57,6 @@ def fmt_signed_int(value: int) -> str:
     return f"{value:+,}".replace(",", " ")
-def fmt_eur(value: float) -> str:
-    return f"{value:+.2f} €"
 def fmt_pct(value: float) -> str:
     return f"{value:+.2f}%"
@@ -89,7 +85,6 @@ def render_intro(report: Dict[str, Any]) -> str:
     avg_in = ref.get("avg_input_tokens", 8000)
     avg_out = ref.get("avg_output_tokens", 600)
     tier = ref.get("model_tier", "sonnet")
-    sourced = ref.get("pricing_sourced_on", "—")
     return (
         f"# Value Dashboard — was kostet das Paket, was bringt es?\n"
         "\n"
@@ -101,11 +96,12 @@ def render_intro(report: Dict[str, Any]) -> str:
         "\n"
         "## Wie diese Seite zu lesen ist\n"
         "\n"
-        "**Panel A (Kostenleiter)** — von oben nach unten lesen. Jede "
+        "**Panel A (Token-Leiter)** — von oben nach unten lesen. Jede "
         "Stufe sagt: *was sie macht*, *wie viele Input-Tokens sie pro "
-        "Request hinzufügt oder spart*, *was das in € auf "
-        f"{requests:,} Requests kostet*, und *wo wir kumulativ stehen*. "
-        "Die fett gedruckte **NETTO**-Zeile am Ende ist die Antwort.\n"
+        "Request hinzufügt oder spart*, und *wo wir kumulativ stehen*. "
+        "Die fett gedruckte **NETTO**-Zeile am Ende ist die Antwort. "
+        "Bewusst rein in Tokens — kein €-Vergleich, da Abo-Nutzer keine "
+        "Per-Request-API-Preise zahlen.\n"
         "\n"
         "**Panel B (Verhalten)** — vier reale Vergleiche, *mit* vs. "
         "*ohne* Paket. Hier liegt der nicht-Token-Wert: passende Skill-"
@@ -122,8 +118,7 @@ def render_intro(report: Dict[str, Any]) -> str:
         f"- **{requests:,}** Requests, durchschnittlich "
         f"**{avg_in:,}** Input-Tokens und **{avg_out:,}** Output-Tokens "
         "pro Request\n"
-        f"- Modell-Tier: `{tier}` · "
-        f"Preisstand `{sourced}` (Quelle: `internal/bench/pricing.yaml`)\n"
+        f"- Modell-Tier (Workload-Annahme): `{tier}`\n"
         "- Wer einen anderen Workload fährt, rechnet selbst nach — die "
         "Methodik ist offengelegt; nichts ist hardcodiert versteckt.\n"
     )
@@ -135,8 +130,8 @@ def render_panel_a(report: Dict[str, Any]) -> str:
         "Liest sich von oben nach unten. Positive Δ-Werte = das Paket "
         "*kostet* Tokens (Regel-Load ist die ehrliche Up-Front-Steuer); "
         "negative Δ-Werte = das Paket *spart* Tokens.\n",
-        "| Stufe | Was sie tut | Δ Tokens | Δ € (1k Req) | Kumulativ | Quelle |",
-        "|---|---|---:|---:|---:|---|",
+        "| Stufe | Was sie tut | Δ Tokens | Kumulativ | Quelle |",
+        "|---|---|---:|---:|---|",
     ]
     for rung in report.get("cost_ladder", []):
         if rung["id"] == "baseline":
@@ -145,7 +140,6 @@ def render_panel_a(report: Dict[str, Any]) -> str:
             label_cell = rung["label"]
         what = rung.get("what_it_does", "")
         token_delta = int(rung.get("token_delta", 0))
-        eur_delta = float(rung.get("eur_delta", 0.0))
         cum = float(rung.get("cumulative_pct", 0.0))
         conf = confidence_badge(rung.get("confidence", "pending"))
         source = rung.get("source_report", "")
@@ -154,17 +148,16 @@ def render_panel_a(report: Dict[str, Any]) -> str:
             what = f"{what} ⚠️ erst teurer"
         lines.append(
             f"| {label_cell} | {what} | "
-            f"{fmt_signed_int(token_delta)} | {fmt_eur(eur_delta)} | "
+            f"{fmt_signed_int(token_delta)} | "
             f"{fmt_pct(cum)} | `{source}` · {conf} |"
         )
         if rung.get("footnote"):
             lines.append(
-                f"| | _Fußnote:_ {rung['footnote']} | | | | |"
+                f"| | _Fußnote:_ {rung['footnote']} | | | |"
             )
     totals = report.get("totals", {})
     cum_tokens = int(totals.get("cumulative_token_delta", 0))
-    cum_eur = float(totals.get("cumulative_eur_delta", 0.0))
     cum_pct = float(totals.get("cumulative_pct", 0.0))
     verdict = totals.get("net_verdict", "—")
     verdict_label = {
@@ -177,8 +170,6 @@ def render_panel_a(report: Dict[str, Any]) -> str:
             "",
             f"{verdict_label} — "
             f"**{fmt_signed_int(cum_tokens)} Tokens / Request**, "
-            f"**{fmt_eur(cum_eur)}** auf "
-            f"{report.get('reference_scale', {}).get('requests', 1000):,} Requests, "
             f"kumulativ **{fmt_pct(cum_pct)}** vs. Baseline.\n",
         ]
     )
@@ -250,10 +241,10 @@ def render_glossary() -> str:
         "nutzt. Spart Output-Tokens — wenn der Korpus es belohnt.\n"
         "- **Ohne Paket / Mit Paket** — *without the package* / *with "
         "the package* — die zwei Arme des A/B-Vergleichs.\n"
-        "- **€-per-1k-requests** — Token-Kosten auf der "
-        "Referenz-Skala (1.000 Requests durchschnittlicher Größe, "
-        "gepreist mit den aktuellen Sonnet-Raten aus "
-        "`internal/bench/pricing.yaml`).\n"
+        "- **Δ Tokens** — Input-Token-Differenz pro Request gegenüber der "
+        "Baseline. Bewusst die einzige Kosten-Einheit: ein €-Vergleich "
+        "würde Per-Request-API-Preise unterstellen, die Abo-Nutzer nicht "
+        "zahlen.\n"
     )

package/scripts/schemas/command.schema.json CHANGED Viewed

@@ -20,7 +20,7 @@
     "description": {
       "type": "string",
       "minLength": 1,
-      "maxLength": 500
+      "maxLength": 200
     },
     "disable-model-invocation": {
       "type": "boolean",

package/scripts/schemas/rule.schema.json CHANGED Viewed

@@ -20,7 +20,7 @@
     "description": {
       "type": "string",
       "minLength": 1,
-      "maxLength": 500
+      "maxLength": 190
     },
     "alwaysApply": {
       "type": "boolean",

package/scripts/schemas/skill.schema.json CHANGED Viewed

@@ -15,8 +15,8 @@
     "description": {
       "type": "string",
       "minLength": 1,
-      "maxLength": 300,
-      "description": "Trigger phrase; ≤ 200 chars recommended, 300 is the hard ceiling."
+      "maxLength": 220,
+      "description": "Trigger phrase; ≤ 200 chars recommended, 220 is the ceiling (lean-initial-context: descriptions load eagerly via progressive disclosure). Over-cap is a soft warning, not a hard fail — a warning window so authors adapt."
     },
     "source": {
       "type": "string",

package/scripts/trigger_coverage.py ADDED Viewed

@@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+"""Trigger-coverage suite (roadmap Phase 2.1 / 2.2).
+The deterministic *must-load* floor for the lean-initial-context migration.
+Before any auto-tier rule body is demoted to a router-resolved pointer
+(Phase 3), this suite proves the router still fires that rule on
+representative task phrasings — so a needed rule can never silently fail
+to surface.
+Cases live in `tests/eval/trigger-coverage.yaml` and have the shape:
+    - id: secrets-edit
+      prompt: "add a webhook secret to the billing service auth flow"
+      expect: [security-sensitive-stop]   # MUST be in the fired set
+Matching is deterministic against `dist/router.json` (NOT the semantic
+production router — this is a reproducible floor that catches a removed
+trigger in CI):
+- kernel rules always fire (always-on layer).
+- a tier rule fires iff any of its triggers matches the prompt:
+  - `keyword` → case-insensitive substring.
+  - `intent`  → every alpha word (len>2) of the intent phrase appears as a
+    token in the prompt (so "structural decision" fires on a prompt that
+    contains both "structural" and "decision").
+A case fails when an expected rule is NOT in the fired set. Exit 1 on any
+miss → the merge that would have shrunk the rule is blocked (2.2).
+Usage:
+    python3 scripts/trigger_coverage.py            # run, human report
+    python3 scripts/trigger_coverage.py --json
+"""
+from __future__ import annotations
+import argparse
+import json
+import re
+import sys
+from pathlib import Path
+REPO_ROOT = Path(__file__).resolve().parent.parent
+ROUTER = REPO_ROOT / "dist" / "router.json"
+CORPUS = REPO_ROOT / "tests" / "eval" / "trigger-coverage.yaml"
+try:
+    import yaml
+except ImportError:  # pragma: no cover
+    sys.stderr.write("error: PyYAML required (pip install pyyaml)\n")
+    sys.exit(2)
+_WORD = re.compile(r"[a-z][a-z0-9_]+")
+def _tokens(text: str) -> set[str]:
+    return {w for w in _WORD.findall(text.lower()) if len(w) > 2}
+def load_router() -> dict:
+    return json.loads(ROUTER.read_text(encoding="utf-8"))
+def fired_rules(prompt: str, router: dict) -> set[str]:
+    """Return every rule id the router would surface for `prompt`."""
+    low = prompt.lower()
+    toks = _tokens(prompt)
+    fired: set[str] = set(router.get("kernel", []))
+    for tier in ("tier_1", "tier_2"):
+        for entry in router.get(tier, []):
+            for trig in entry.get("triggers", []):
+                if "keyword" in trig:
+                    if trig["keyword"].lower() in low:
+                        fired.add(entry["id"])
+                        break
+                elif "intent" in trig:
+                    words = _tokens(trig["intent"])
+                    if words and words <= toks:
+                        fired.add(entry["id"])
+                        break
+    return fired
+def run(corpus: list[dict], router: dict) -> tuple[list[dict], int]:
+    results = []
+    misses = 0
+    for case in corpus:
+        fired = fired_rules(case["prompt"], router)
+        expected = case.get("expect", [])
+        missing = [r for r in expected if r not in fired]
+        ok = not missing
+        if not ok:
+            misses += 1
+        results.append({"id": case["id"], "ok": ok, "missing": missing,
+                        "expect": expected})
+    return results, misses
+def main(argv: list[str] | None = None) -> int:
+    ap = argparse.ArgumentParser(description=__doc__.splitlines()[0])
+    ap.add_argument("--json", action="store_true")
+    args = ap.parse_args(argv)
+    if not ROUTER.is_file():
+        sys.stderr.write(f"error: {ROUTER} missing — run compile_router first\n")
+        return 2
+    corpus = yaml.safe_load(CORPUS.read_text(encoding="utf-8")) or []
+    router = load_router()
+    results, misses = run(corpus, router)
+    if args.json:
+        print(json.dumps({"cases": len(results), "misses": misses,
+                          "results": results}, indent=2, sort_keys=True))
+    else:
+        for r in results:
+            mark = "✅" if r["ok"] else "❌"
+            detail = "" if r["ok"] else f"  MISSING: {', '.join(r['missing'])}"
+            print(f"  {mark}  {r['id']}{detail}")
+        print()
+        if misses:
+            print(f"❌  trigger-coverage: {misses}/{len(results)} case(s) failed — "
+                  "a required rule does not fire. Blocking.")
+        else:
+            print(f"✅  trigger-coverage: {len(results)}/{len(results)} pass")
+    return 1 if misses else 0
+if __name__ == "__main__":
+    sys.exit(main())