npm - @event4u/agent-config - Versions diffs - 1.18.0 → 1.19.0 - Mend

@event4u/agent-config 1.18.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

package/.agent-src/commands/council/default.md +74 -76
package/.agent-src/commands/feature/roadmap.md +22 -0
package/.agent-src/commands/roadmap/create.md +38 -6
package/.agent-src/commands/roadmap/execute.md +36 -9
package/.agent-src/rules/agent-authority.md +1 -0
package/.agent-src/rules/agent-docs.md +1 -0
package/.agent-src/rules/analysis-skill-routing.md +1 -0
package/.agent-src/rules/architecture.md +1 -0
package/.agent-src/rules/artifact-drafting-protocol.md +1 -0
package/.agent-src/rules/artifact-engagement-recording.md +1 -0
package/.agent-src/rules/ask-when-uncertain.md +1 -0
package/.agent-src/rules/augment-portability.md +1 -0
package/.agent-src/rules/augment-source-of-truth.md +1 -0
package/.agent-src/rules/autonomous-execution.md +1 -0
package/.agent-src/rules/capture-learnings.md +1 -0
package/.agent-src/rules/chat-history-cadence.md +34 -0
package/.agent-src/rules/chat-history-ownership.md +1 -0
package/.agent-src/rules/chat-history-visibility.md +1 -0
package/.agent-src/rules/cli-output-handling.md +2 -2
package/.agent-src/rules/command-suggestion-policy.md +1 -0
package/.agent-src/rules/commit-conventions.md +1 -0
package/.agent-src/rules/commit-policy.md +1 -0
package/.agent-src/rules/context-hygiene.md +22 -0
package/.agent-src/rules/direct-answers.md +1 -0
package/.agent-src/rules/docker-commands.md +1 -0
package/.agent-src/rules/docs-sync.md +1 -0
package/.agent-src/rules/downstream-changes.md +1 -0
package/.agent-src/rules/e2e-testing.md +1 -0
package/.agent-src/rules/guidelines.md +1 -0
package/.agent-src/rules/improve-before-implement.md +1 -0
package/.agent-src/rules/language-and-tone.md +1 -0
package/.agent-src/rules/laravel-translations.md +1 -0
package/.agent-src/rules/markdown-safe-codeblocks.md +1 -0
package/.agent-src/rules/minimal-safe-diff.md +1 -0
package/.agent-src/rules/missing-tool-handling.md +1 -0
package/.agent-src/rules/model-recommendation.md +1 -0
package/.agent-src/rules/no-cheap-questions.md +1 -0
package/.agent-src/rules/no-roadmap-references.md +1 -0
package/.agent-src/rules/non-destructive-by-default.md +1 -0
package/.agent-src/rules/onboarding-gate.md +26 -0
package/.agent-src/rules/package-ci-checks.md +1 -0
package/.agent-src/rules/php-coding.md +1 -0
package/.agent-src/rules/preservation-guard.md +1 -0
package/.agent-src/rules/review-routing-awareness.md +1 -0
package/.agent-src/rules/reviewer-awareness.md +1 -0
package/.agent-src/rules/roadmap-progress-sync.md +22 -0
package/.agent-src/rules/role-mode-adherence.md +2 -2
package/.agent-src/rules/rule-type-governance.md +1 -0
package/.agent-src/rules/runtime-safety.md +1 -0
package/.agent-src/rules/scope-control.md +1 -0
package/.agent-src/rules/security-sensitive-stop.md +1 -0
package/.agent-src/rules/size-enforcement.md +1 -0
package/.agent-src/rules/skill-improvement-trigger.md +1 -0
package/.agent-src/rules/skill-quality.md +1 -0
package/.agent-src/rules/slash-command-routing-policy.md +39 -0
package/.agent-src/rules/think-before-action.md +1 -0
package/.agent-src/rules/token-efficiency.md +1 -0
package/.agent-src/rules/tool-safety.md +1 -0
package/.agent-src/rules/ui-audit-gate.md +1 -0
package/.agent-src/rules/upstream-proposal.md +1 -0
package/.agent-src/rules/user-interaction.md +1 -0
package/.agent-src/rules/verify-before-complete.md +1 -0
package/.agent-src/skills/roadmap-management/SKILL.md +29 -4
package/.agent-src/skills/verify-completion-evidence/SKILL.md +8 -1
package/.agent-src/templates/agent-settings.md +16 -0
package/.agent-src/templates/roadmaps.md +8 -3
package/.agent-src/templates/scripts/work_engine/hook_bootstrap.py +9 -0
package/.agent-src/templates/scripts/work_engine/hooks/__init__.py +4 -0
package/.agent-src/templates/scripts/work_engine/hooks/builtin/__init__.py +4 -0
package/.agent-src/templates/scripts/work_engine/hooks/builtin/decision_trace.py +163 -0
package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py +111 -0
package/.agent-src/templates/scripts/work_engine/hooks/settings.py +36 -0
package/.agent-src/templates/scripts/work_engine/scoring/decision_trace.py +141 -0
package/.agent-src/templates/scripts/work_engine/scoring/memory_visibility.py +125 -0
package/.claude-plugin/marketplace.json +1 -1
package/CHANGELOG.md +62 -0
package/README.md +19 -19
package/config/agent-settings.template.yml +23 -0
package/docs/catalog.md +5 -2
package/docs/contracts/adr-settings-sync-engine.md +127 -0
package/docs/contracts/decision-trace-v1.md +146 -0
package/docs/contracts/file-ownership-matrix.json +7 -0
package/docs/contracts/hook-architecture-v1.md +213 -0
package/docs/contracts/memory-visibility-v1.md +138 -0
package/docs/contracts/one-off-script-lifecycle.md +109 -0
package/docs/contracts/rule-interactions.yml +22 -0
package/docs/customization.md +1 -0
package/docs/development.md +4 -1
package/docs/guidelines/agent-infra/layered-settings.md +32 -13
package/package.json +1 -1
package/scripts/agent-config +44 -0
package/scripts/ai_council/bundler.py +3 -3
package/scripts/ai_council/clients.py +24 -8
package/scripts/ai_council/one_off_archive/2026-05/README.md +22 -0
package/scripts/ai_council/one_off_archive/2026-05/_one_off_roundtrip.py +13 -8
package/scripts/ai_council/one_off_archive/2026-05/_one_off_tier_retrofit.py +180 -0
package/scripts/ai_council/session.py +92 -0
package/scripts/capture_showcase_session.py +361 -0
package/scripts/chat_history.py +11 -1
package/scripts/check_always_budget.py +7 -2
package/scripts/context_hygiene_hook.py +14 -6
package/scripts/council_cli.py +357 -0
package/scripts/hook_manifest.yaml +184 -0
package/scripts/hooks/__init__.py +1 -0
package/scripts/hooks/augment-dispatcher.sh +72 -0
package/scripts/hooks/cline-dispatcher.sh +86 -0
package/scripts/hooks/cursor-dispatcher.sh +76 -0
package/scripts/hooks/dispatch_hook.py +348 -0
package/scripts/hooks/envelope.py +98 -0
package/scripts/hooks/gemini-dispatcher.sh +117 -0
package/scripts/hooks/state_io.py +122 -0
package/scripts/hooks/windsurf-dispatcher.sh +123 -0
package/scripts/hooks_status.py +146 -0
package/scripts/install.py +725 -87
package/scripts/install.sh +1 -1
package/scripts/lint_hook_manifest.py +216 -0
package/scripts/lint_one_off_age.py +184 -0
package/scripts/lint_rule_tiers.py +78 -0
package/scripts/lint_showcase_sessions.py +148 -0
package/scripts/minimal_safe_diff_hook.py +245 -0
package/scripts/onboarding_gate_hook.py +13 -8
package/scripts/readme_linter.py +12 -3
package/scripts/roadmap_progress_hook.py +5 -0
package/scripts/sync_agent_settings.py +32 -129
package/scripts/sync_yaml_rt.py +734 -0
package/scripts/verify_before_complete_hook.py +216 -0

package/scripts/ai_council/one_off_archive/2026-05/_one_off_tier_retrofit.py ADDED Viewed

@@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+"""One-off — tier-bulk-retrofit (Phase 2.1 + 2.2 of road-to-feedback-consolidation).
+Parses agents/contexts/rule-trigger-matrix.md, emits tmp/tier-classification.md,
+and inserts a `tier:` frontmatter key into every rule under
+.agent-src.uncompressed/rules/. Idempotent — re-runs are a no-op when a rule
+already declares the same tier value.
+Lifecycle: scripts/_one_off/2026-05/. Purge eligible after 2026-08-04 per
+docs/contracts/one-off-script-lifecycle.md.
+"""
+from __future__ import annotations
+import re
+import sys
+from pathlib import Path
+REPO = Path(__file__).resolve().parents[3]
+MATRIX = REPO / "agents" / "contexts" / "rule-trigger-matrix.md"
+RULES_DIR = REPO / ".agent-src.uncompressed" / "rules"
+COMPRESSED_RULES_DIR = REPO / ".agent-src" / "rules"
+SPREADSHEET = REPO / "tmp" / "tier-classification.md"
+VALID_TIERS = {"1", "2a", "2b", "3", "safety-floor", "mechanical-already"}
+def parse_matrix() -> dict[str, tuple[str, str]]:
+    """Return {rule_filename: (tier, notes)} from the matrix table."""
+    out: dict[str, tuple[str, str]] = {}
+    full = MATRIX.read_text(encoding="utf-8")
+    # Slice between '## Matrix' and the next '## ' heading.
+    start = full.find("\n## Matrix\n")
+    if start == -1:
+        sys.exit("matrix: '## Matrix' section not found")
+    end = full.find("\n## ", start + 1)
+    text = full[start:end] if end != -1 else full[start:]
+    # Table rows look like: | `agent-authority.md` | always | 1468 | … | 3 | no | Priority index, … |
+    row_re = re.compile(
+        r"^\|\s*`([a-z0-9-]+\.md)`\s*\|"      # rule filename
+        r"[^|]*\|"                              # type
+        r"[^|]*\|"                              # raw
+        r"[^|]*\|"                              # ext
+        r"[^|]*\|"                              # trigger
+        r"[^|]*\|"                              # obs
+        r"[^|]*\|"                              # enforce
+        r"[^|]*\|"                              # hook-cost
+        r"\s*([^|]+?)\s*\|"                     # tier
+        r"[^|]*\|"                              # dormant?
+        r"\s*(.*?)\s*\|\s*$",                  # notes
+        re.MULTILINE,
+    )
+    for m in row_re.finditer(text):
+        name, tier, notes = m.group(1), m.group(2).strip(), m.group(3).strip()
+        if tier not in VALID_TIERS:
+            sys.exit(f"unknown tier '{tier}' for {name}")
+        out[name] = (tier, notes)
+    return out
+def write_spreadsheet(classifications: dict[str, tuple[str, str]]) -> None:
+    SPREADSHEET.parent.mkdir(parents=True, exist_ok=True)
+    lines = [
+        "# Tier classification — Phase 2.1 of road-to-feedback-consolidation",
+        "",
+        "Source: `agents/contexts/rule-trigger-matrix.md` (manual classifications",
+        "in `scripts/build_rule_trigger_matrix.py`'s `CLASSIFICATION` table).",
+        "Generated by `scripts/_one_off/2026-05/_one_off_tier-retrofit.py`.",
+        "",
+        "Tier rubric: see `agents/contexts/hardening-pattern.md`.",
+        "",
+        f"Total: {len(classifications)} rules.",
+        "",
+        "| Rule | Tier | Rationale |",
+        "|---|---|---|",
+    ]
+    for name in sorted(classifications):
+        tier, notes = classifications[name]
+        lines.append(f"| `{name}` | `{tier}` | {notes} |")
+    SPREADSHEET.write_text("\n".join(lines) + "\n", encoding="utf-8")
+def parse_frontmatter(text: str) -> tuple[dict[str, str], str, str]:
+    """Return (kv, raw_block, body). raw_block excludes the --- fences."""
+    if not text.startswith("---\n"):
+        return {}, "", text
+    end = text.find("\n---\n", 4)
+    if end == -1:
+        return {}, "", text
+    raw = text[4:end]
+    body = text[end + 5 :]
+    kv: dict[str, str] = {}
+    for line in raw.splitlines():
+        if ":" in line:
+            k, _, v = line.partition(":")
+            kv[k.strip()] = v.strip()
+    return kv, raw, body
+def apply_tier(rule_path: Path, tier: str) -> str:
+    """Return one of: 'unchanged', 'inserted', 'updated'.
+    Tier is always written as a quoted string in YAML (`tier: "<value>"`) so the
+    schema enum check (string-only) holds for numeric tiers like `1` and `3`.
+    """
+    text = rule_path.read_text(encoding="utf-8")
+    kv, raw, body = parse_frontmatter(text)
+    if not raw:
+        sys.exit(f"{rule_path}: no frontmatter found")
+    existing_raw = kv.get("tier")
+    existing = existing_raw.strip('"').strip("'") if existing_raw else None
+    quoted = f'"{tier}"'
+    target_line = f"tier: {quoted}"
+    if existing == tier and existing_raw == quoted:
+        return "unchanged"
+    new_lines: list[str] = []
+    inserted = False
+    for line in raw.splitlines():
+        new_lines.append(line)
+        if not inserted and line.startswith("type:"):
+            new_lines.append(target_line)
+            inserted = True
+    if existing is not None:
+        new_lines = [
+            l if not l.lstrip().startswith("tier:") else target_line
+            for l in new_lines
+        ]
+        seen_tier = False
+        deduped: list[str] = []
+        for l in new_lines:
+            if l == target_line:
+                if seen_tier:
+                    continue
+                seen_tier = True
+            deduped.append(l)
+        new_lines = deduped
+        result = "updated" if existing != tier or existing_raw != quoted else "unchanged"
+    else:
+        if not inserted:
+            new_lines.insert(0, target_line)
+        result = "inserted"
+    new_raw = "\n".join(new_lines)
+    rule_path.write_text(f"---\n{new_raw}\n---\n{body}", encoding="utf-8")
+    return result
+def main() -> int:
+    classifications = parse_matrix()
+    if len(classifications) != 58:
+        sys.exit(f"expected 58 rules in matrix, got {len(classifications)}")
+    on_disk = {p.name for p in RULES_DIR.glob("*.md")}
+    missing = on_disk - classifications.keys()
+    extra = classifications.keys() - on_disk
+    if missing or extra:
+        sys.exit(f"matrix/disk mismatch: missing={missing} extra={extra}")
+    write_spreadsheet(classifications)
+    counts: dict[str, int] = {"unchanged": 0, "inserted": 0, "updated": 0}
+    mirror_counts = {"unchanged": 0, "inserted": 0, "updated": 0, "skipped": 0}
+    for name, (tier, _) in classifications.items():
+        result = apply_tier(RULES_DIR / name, tier)
+        counts[result] += 1
+        compressed = COMPRESSED_RULES_DIR / name
+        if compressed.exists():
+            mirror_counts[apply_tier(compressed, tier)] += 1
+        else:
+            mirror_counts["skipped"] += 1
+    print(
+        f"tier-retrofit: spreadsheet={SPREADSHEET.relative_to(REPO)} "
+        f"src(unchanged={counts['unchanged']} inserted={counts['inserted']} "
+        f"updated={counts['updated']}) "
+        f"mirror(unchanged={mirror_counts['unchanged']} inserted={mirror_counts['inserted']} "
+        f"updated={mirror_counts['updated']} skipped={mirror_counts['skipped']})"
+    )
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

package/scripts/ai_council/session.py CHANGED Viewed

@@ -23,6 +23,8 @@ from __future__ import annotations
 import datetime as _dt
 import json
+import re
+import shutil
 import sys
 from dataclasses import dataclass, field
 from pathlib import Path
@@ -33,6 +35,10 @@ from scripts.ai_council.orchestrator import render
 REPO_ROOT = Path(__file__).resolve().parents[2]
 SESSIONS_DIR = REPO_ROOT / "agents" / "council-sessions"
+SETTINGS_FILE = REPO_ROOT / ".agent-settings.yml"
+DEFAULT_RETENTION_DAYS = 14
+_TS_RE = re.compile(r"^(\d{4})-(\d{2})-(\d{2})T(\d{2})-(\d{2})-(\d{2})Z$")
 @dataclass
@@ -69,12 +75,90 @@ def _serialise_response(r: CouncilResponse) -> dict[str, object]:
     }
+def _load_retention_days(settings_path: Path | None = None) -> int:
+    """Read `ai_council.session_retention_days` from `.agent-settings.yml`.
+    Returns `DEFAULT_RETENTION_DAYS` on any read/parse failure (missing
+    file, invalid YAML, missing key, non-int value). Pruning never
+    blocks the council on a settings error.
+    """
+    path = settings_path or SETTINGS_FILE
+    if not path.exists():
+        return DEFAULT_RETENTION_DAYS
+    try:
+        import yaml  # type: ignore[import-not-found]
+        data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
+    except Exception:  # noqa: BLE001 - never block on settings parse
+        return DEFAULT_RETENTION_DAYS
+    ai = data.get("ai_council") if isinstance(data, dict) else None
+    if not isinstance(ai, dict):
+        return DEFAULT_RETENTION_DAYS
+    raw = ai.get("session_retention_days", DEFAULT_RETENTION_DAYS)
+    try:
+        return int(raw)
+    except (TypeError, ValueError):
+        return DEFAULT_RETENTION_DAYS
+def _parse_session_timestamp(name: str) -> _dt.datetime | None:
+    """Parse `YYYY-MM-DDTHH-MM-SSZ` directory name to a UTC datetime."""
+    m = _TS_RE.match(name)
+    if not m:
+        return None
+    try:
+        y, mo, d, h, mi, s = (int(g) for g in m.groups())
+        return _dt.datetime(y, mo, d, h, mi, s, tzinfo=_dt.timezone.utc)
+    except ValueError:
+        return None
+def prune_old_sessions(
+    sessions_dir: Path,
+    retention_days: int,
+    *,
+    now: _dt.datetime | None = None,
+) -> list[Path]:
+    """Delete session subdirectories older than `retention_days`.
+    A session is "old" when its directory-name timestamp predates
+    `now - retention_days`. Non-matching names (e.g. JSON reports at
+    the root, custom folders) are skipped. Never raises — disk
+    failures are logged to stderr.
+    Returns the list of deleted directories. `retention_days <= 0`
+    disables pruning and returns an empty list.
+    """
+    if retention_days <= 0 or not sessions_dir.exists():
+        return []
+    cutoff = (now or _dt.datetime.now(_dt.timezone.utc)) - _dt.timedelta(days=retention_days)
+    removed: list[Path] = []
+    try:
+        entries = list(sessions_dir.iterdir())
+    except OSError as exc:  # noqa: BLE001 - never block the report
+        print(f"[council:session] prune iterdir failed: {exc}", file=sys.stderr)
+        return removed
+    for entry in entries:
+        if not entry.is_dir():
+            continue
+        ts = _parse_session_timestamp(entry.name)
+        if ts is None or ts >= cutoff:
+            continue
+        try:
+            shutil.rmtree(entry)
+            removed.append(entry)
+        except OSError as exc:  # noqa: BLE001 - never block the report
+            print(f"[council:session] prune rmtree failed for {entry}: {exc}",
+                  file=sys.stderr)
+    return removed
 def save(
     *,
     manifest: SessionManifest,
     responses: list[CouncilResponse] | Iterable[list[CouncilResponse]],
     sessions_dir: Path | None = None,
     timestamp: str | None = None,
+    retention_days: int | None = None,
 ) -> Path:
     """Persist a council call. Returns the session directory.
@@ -83,6 +167,11 @@ def save(
     - `Iterable[list[CouncilResponse]]` — multi-round, one list per
       round in execution order.
+    `retention_days` controls auto-pruning of older sibling sessions
+    after the new one is written. `None` reads the value from
+    `.agent-settings.yml` (`ai_council.session_retention_days`,
+    default `14`); `0` disables pruning.
     Disk-write failures are surfaced via a stderr line but do not
     raise; the caller's text report is the source of truth.
     """
@@ -141,4 +230,7 @@ def save(
     except OSError as exc:  # noqa: BLE001 - never block the report
         print(f"[council:session] write failed: {exc}", file=sys.stderr)
+    days = _load_retention_days() if retention_days is None else retention_days
+    prune_old_sessions(base, days)
     return session_dir

package/scripts/capture_showcase_session.py ADDED Viewed

@@ -0,0 +1,361 @@
+#!/usr/bin/env python3
+"""capture_showcase_session.py — wrap and measure showcase sessions.
+Phase 1.2 deliverable for `road-to-feedback-consolidation.md`.
+Two subcommands:
+  capture   Read a raw chat-log (file or stdin) and write a session under
+            `docs/showcase/sessions/<slug>.log` with a YAML frontmatter
+            block (commit_sha, host_agent, model, started, ended,
+            task_class, metrics).
+  metrics   Compute one or all of the four outcome metrics defined in
+            `agents/contexts/outcome-baseline.md` from a captured session
+            file. Output as text table or JSON.
+The four metrics:
+  (a) tool-call-count        — number of <tool_use ...> blocks in body
+  (b) reply-chars            — mean chars of agent replies (excl. fences)
+  (c) memory-hit-ratio       — hits / (hits + misses) from memory traces
+  (d) verify-pass-rate       — first-try done-claims / total done-claims
+Exit codes: 0 success, 1 user error (bad args, missing file), 2 metric
+gate not yet wired (downstream phase pending).
+"""
+from __future__ import annotations
+import argparse
+import datetime as _dt
+import json
+import re
+import subprocess
+import sys
+from dataclasses import dataclass, asdict
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+ROOT = Path(__file__).resolve().parent.parent
+SESSIONS_DIR = ROOT / "docs" / "showcase" / "sessions"
+# Tool-call markers across host agents (Augment, Claude Code, Cursor, …).
+# Union, not branch — a session log may carry multiple shapes.
+TOOL_USE_PATTERNS = [
+    re.compile(r"<tool_use[\s>]"),
+    re.compile(r"<function_calls>"),
+    re.compile(r"<invoke\b"),
+]
+# Memory-retrieve trace shape, per memory-visibility-v1.md (Phase 4.1).
+# Until Phase 4.1 lands, fall back to counting `memory_retrieve` invocations
+# without hit/miss disambiguation (returns ratio=None).
+MEMORY_HIT_RE = re.compile(r"memory_retrieve\b.*?hits=(\d+)", re.IGNORECASE)
+MEMORY_MISS_RE = re.compile(
+    r"memory_retrieve\b.*?(misses=(\d+)|hits=0)", re.IGNORECASE
+)
+MEMORY_CALL_RE = re.compile(r"\bmemory_retrieve(?:_\w+)?\b")
+# Done-claim markers — agent says work is complete.
+DONE_CLAIM_PATTERNS = [
+    re.compile(r"\b(done|complete|ready for review|fertig|abgeschlossen)\b",
+               re.IGNORECASE),
+    re.compile(r"^\s*(✅|✓)", re.MULTILINE),
+]
+# Correction phrasings — user re-prompts with a complaint, signalling
+# the verify-gate let bad work through. Optimistic: anything not on this
+# list is treated as scope expansion, not failure.
+CORRECTION_PHRASES = [
+    "das passt nicht", "das stimmt nicht", "passt so nicht",
+    "that's wrong", "this is wrong", "missing", "fehlt",
+    "didn't work", "doesn't work", "geht nicht", "broken",
+    "you missed", "du hast", "das ist falsch",
+]
+@dataclass
+class SessionMetrics:
+    tool_call_count: Optional[int] = None
+    reply_chars_mean: Optional[float] = None
+    memory_hit_ratio: Optional[float] = None
+    verify_pass_rate: Optional[float] = None
+    notes: List[str] = None  # populated when a metric is degraded
+    def to_dict(self) -> Dict[str, Any]:
+        d = asdict(self)
+        # Drop notes when empty so frontmatter stays compact.
+        if not self.notes:
+            d.pop("notes", None)
+        return d
+def _git_sha() -> str:
+    try:
+        out = subprocess.run(
+            ["git", "rev-parse", "HEAD"],
+            capture_output=True, text=True, check=True, cwd=ROOT,
+        )
+        return out.stdout.strip()
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return "unknown"
+def _now_iso() -> str:
+    return _dt.datetime.now(_dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+def _strip_fences(text: str) -> str:
+    """Remove fenced code blocks so they don't pollute char counts."""
+    return re.sub(r"```.*?```", "", text, flags=re.DOTALL)
+def _split_body(content: str) -> str:
+    """Strip a leading YAML frontmatter block if present."""
+    if content.startswith("---\n"):
+        end = content.find("\n---\n", 4)
+        if end != -1:
+            return content[end + 5:]
+    return content
+def _read_session(path: Path) -> str:
+    if str(path) == "-":
+        return sys.stdin.read()
+    if not path.is_file():
+        raise SystemExit(f"❌  session file not found: {path}")
+    return path.read_text(encoding="utf-8")
+def _split_turns(body: str) -> List[Dict[str, str]]:
+    """Heuristic turn split — `## User` / `## Agent` headings, falls back
+    to whole-body as a single agent turn when no markers exist.
+    """
+    turn_re = re.compile(
+        r"^##\s+(User|Agent|Assistant|Matze|Du)\b.*?$", re.MULTILINE | re.IGNORECASE
+    )
+    matches = list(turn_re.finditer(body))
+    if not matches:
+        return [{"role": "agent", "text": body}]
+    turns: List[Dict[str, str]] = []
+    for i, m in enumerate(matches):
+        role_raw = m.group(1).lower()
+        role = "user" if role_raw in {"user", "matze", "du"} else "agent"
+        start = m.end()
+        end = matches[i + 1].start() if i + 1 < len(matches) else len(body)
+        turns.append({"role": role, "text": body[start:end].strip()})
+    return turns
+def _metric_tool_call_count(body: str) -> int:
+    return sum(len(p.findall(body)) for p in TOOL_USE_PATTERNS)
+def _metric_reply_chars(body: str) -> Optional[float]:
+    turns = _split_turns(body)
+    agent_turns = [t["text"] for t in turns if t["role"] == "agent"]
+    if not agent_turns:
+        return None
+    lengths = [len(_strip_fences(t).strip()) for t in agent_turns]
+    return round(sum(lengths) / len(lengths), 1)
+def _metric_memory_hit_ratio(body: str) -> tuple[Optional[float], List[str]]:
+    """Returns (ratio, notes). Ratio is None when no memory calls found."""
+    notes: List[str] = []
+    hits_total = sum(int(m.group(1)) for m in MEMORY_HIT_RE.finditer(body))
+    miss_blocks = MEMORY_MISS_RE.findall(body)
+    miss_total = 0
+    for raw, count in miss_blocks:
+        if count:
+            miss_total += int(count)
+        else:
+            miss_total += 1  # `hits=0` case
+    calls = len(MEMORY_CALL_RE.findall(body))
+    if calls == 0:
+        return None, ["no memory_retrieve calls found"]
+    if hits_total + miss_total == 0:
+        notes.append("memory-visibility-v1 trace not present; "
+                     "counted calls only (Phase 4.1 pending)")
+        return None, notes
+    return round(hits_total / (hits_total + miss_total), 3), notes
+def _metric_verify_pass_rate(body: str) -> tuple[Optional[float], List[str]]:
+    turns = _split_turns(body)
+    if len(turns) < 2:
+        return None, ["session has no user/agent split — cannot measure"]
+    total_claims = 0
+    failed_claims = 0
+    for i, turn in enumerate(turns):
+        if turn["role"] != "agent":
+            continue
+        if not any(p.search(turn["text"]) for p in DONE_CLAIM_PATTERNS):
+            continue
+        total_claims += 1
+        next_user = next(
+            (t for t in turns[i + 1:] if t["role"] == "user"), None
+        )
+        if next_user is None:
+            continue  # claim accepted (session ended on the claim)
+        lower = next_user["text"].lower()
+        if any(phrase in lower for phrase in CORRECTION_PHRASES):
+            failed_claims += 1
+    if total_claims == 0:
+        return None, ["no done-claims found in session"]
+    return round((total_claims - failed_claims) / total_claims, 3), []
+def _compute_metrics(body: str) -> SessionMetrics:
+    notes: List[str] = []
+    mhr, mhr_notes = _metric_memory_hit_ratio(body)
+    notes.extend(mhr_notes)
+    vpr, vpr_notes = _metric_verify_pass_rate(body)
+    notes.extend(vpr_notes)
+    return SessionMetrics(
+        tool_call_count=_metric_tool_call_count(body),
+        reply_chars_mean=_metric_reply_chars(body),
+        memory_hit_ratio=mhr,
+        verify_pass_rate=vpr,
+        notes=notes or None,
+    )
+def _render_frontmatter(meta: Dict[str, Any]) -> str:
+    """Minimal YAML emitter — stdlib only, dict + scalar + list of strings.
+    Nested dict supported one level deep (for `metrics`).
+    """
+    def fmt_scalar(v: Any) -> str:
+        if v is None:
+            return "null"
+        if isinstance(v, bool):
+            return "true" if v else "false"
+        if isinstance(v, (int, float)):
+            return str(v)
+        return json.dumps(v, ensure_ascii=False)
+    lines = ["---"]
+    for k, v in meta.items():
+        if isinstance(v, dict):
+            lines.append(f"{k}:")
+            for kk, vv in v.items():
+                lines.append(f"  {kk}: {fmt_scalar(vv)}")
+        elif isinstance(v, list):
+            lines.append(f"{k}:")
+            for item in v:
+                lines.append(f"  - {fmt_scalar(item)}")
+        else:
+            lines.append(f"{k}: {fmt_scalar(v)}")
+    lines.append("---")
+    return "\n".join(lines) + "\n"
+def cmd_capture(args: argparse.Namespace) -> int:
+    raw = _read_session(Path(args.input))
+    body = _split_body(raw)
+    metrics = _compute_metrics(body)
+    started = args.started or _now_iso()
+    ended = args.ended or _now_iso()
+    meta: Dict[str, Any] = {
+        "slug": args.slug,
+        "task_class": args.task_class,
+        "host_agent": args.host,
+        "model": args.model,
+        "commit_sha": _git_sha(),
+        "started": started,
+        "ended": ended,
+        "metrics": metrics.to_dict(),
+    }
+    frontmatter = _render_frontmatter(meta)
+    SESSIONS_DIR.mkdir(parents=True, exist_ok=True)
+    out_path = SESSIONS_DIR / f"{args.slug}.log"
+    if out_path.exists() and not args.force:
+        print(f"❌  refusing to overwrite {out_path} — pass --force",
+              file=sys.stderr)
+        return 1
+    out_path.write_text(frontmatter + body, encoding="utf-8")
+    try:
+        display = out_path.relative_to(ROOT)
+    except ValueError:
+        display = out_path
+    print(f"✅  wrote {display}")
+    if args.format == "json":
+        print(json.dumps(metrics.to_dict(), indent=2))
+    return 0
+def cmd_metrics(args: argparse.Namespace) -> int:
+    raw = _read_session(Path(args.session))
+    body = _split_body(raw)
+    metrics = _compute_metrics(body)
+    selected = args.metric
+    available = {
+        "tool-call-count": metrics.tool_call_count,
+        "reply-chars": metrics.reply_chars_mean,
+        "memory-hit-ratio": metrics.memory_hit_ratio,
+        "verify-pass-rate": metrics.verify_pass_rate,
+    }
+    if selected != "all" and selected not in available:
+        print(f"❌  unknown metric: {selected}", file=sys.stderr)
+        return 1
+    if args.format == "json":
+        if selected == "all":
+            print(json.dumps(metrics.to_dict(), indent=2))
+        else:
+            print(json.dumps({selected: available[selected]}, indent=2))
+        return 0
+    items = available.items() if selected == "all" else [(selected, available[selected])]
+    for name, value in items:
+        rendered = "n/a" if value is None else str(value)
+        print(f"  {name:<22} {rendered}")
+    if metrics.notes:
+        print()
+        for note in metrics.notes:
+            print(f"  ℹ️   {note}")
+    return 0
+def _build_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(
+        prog="capture_showcase_session.py",
+        description="Capture and measure /implement-ticket and /work showcase sessions.",
+    )
+    sub = p.add_subparsers(dest="command", required=True)
+    cap = sub.add_parser("capture", help="Write a session log with frontmatter.")
+    cap.add_argument("--input", required=True,
+                     help="Path to raw chat log, or '-' for stdin.")
+    cap.add_argument("--slug", required=True,
+                     help="Filename slug (becomes <slug>.log).")
+    cap.add_argument("--task-class", default="implement-ticket",
+                     choices=["implement-ticket", "work", "review-changes", "qa"])
+    cap.add_argument("--host", default="unknown",
+                     help="Host agent identifier (augment, claude-code, …).")
+    cap.add_argument("--model", default="unknown")
+    cap.add_argument("--started", default=None,
+                     help="ISO-8601 start timestamp (defaults to now).")
+    cap.add_argument("--ended", default=None,
+                     help="ISO-8601 end timestamp (defaults to now).")
+    cap.add_argument("--force", action="store_true",
+                     help="Overwrite an existing session file.")
+    cap.add_argument("--format", choices=["text", "json"], default="text")
+    cap.set_defaults(func=cmd_capture)
+    met = sub.add_parser("metrics", help="Compute one or all metrics.")
+    met.add_argument("--session", required=True,
+                     help="Path to a captured session log.")
+    met.add_argument("--metric", default="all",
+                     choices=["all", "tool-call-count", "reply-chars",
+                              "memory-hit-ratio", "verify-pass-rate"])
+    met.add_argument("--format", choices=["text", "json"], default="text")
+    met.set_defaults(func=cmd_metrics)
+    return p
+def main(argv: Optional[List[str]] = None) -> int:
+    parser = _build_parser()
+    args = parser.parse_args(argv)
+    return args.func(args)
+if __name__ == "__main__":
+    sys.exit(main())

package/scripts/chat_history.py CHANGED Viewed

@@ -912,7 +912,17 @@ def hook_dispatch(platform: str, raw_json: str, *,
         if not isinstance(payload, dict):
             raise ValueError("stdin JSON must decode to an object")
-    raw_event = (event_override or _extract_hook_event(payload) or "").strip()
+    # Unwrap dispatcher envelope (Phase 7.3, hook-architecture-v1.md). When
+    # the dispatcher invoked us, stdin carries {schema_version, platform,
+    # event, payload, …}; pull the platform-native data out of `payload`
+    # and let the envelope's `event` override the per-platform mapping.
+    envelope_event = ""
+    if all(k in payload for k in ("schema_version", "platform", "event", "payload")):
+        envelope_event = (payload.get("native_event") or payload.get("event") or "").strip()
+        inner = payload.get("payload")
+        payload = inner if isinstance(inner, dict) else {}
+    raw_event = (event_override or envelope_event or _extract_hook_event(payload) or "").strip()
     event = PLATFORM_EVENT_MAP[platform].get(raw_event)
     if not event:
         return {"action": "skipped_unmapped_event", "platform": platform,