@event4u/agent-config 1.18.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/.agent-src/commands/council/default.md +74 -76
  2. package/.agent-src/commands/feature/roadmap.md +22 -0
  3. package/.agent-src/commands/roadmap/create.md +38 -6
  4. package/.agent-src/commands/roadmap/execute.md +36 -9
  5. package/.agent-src/rules/agent-authority.md +1 -0
  6. package/.agent-src/rules/agent-docs.md +1 -0
  7. package/.agent-src/rules/analysis-skill-routing.md +1 -0
  8. package/.agent-src/rules/architecture.md +1 -0
  9. package/.agent-src/rules/artifact-drafting-protocol.md +1 -0
  10. package/.agent-src/rules/artifact-engagement-recording.md +1 -0
  11. package/.agent-src/rules/ask-when-uncertain.md +1 -0
  12. package/.agent-src/rules/augment-portability.md +1 -0
  13. package/.agent-src/rules/augment-source-of-truth.md +1 -0
  14. package/.agent-src/rules/autonomous-execution.md +1 -0
  15. package/.agent-src/rules/capture-learnings.md +1 -0
  16. package/.agent-src/rules/chat-history-cadence.md +34 -0
  17. package/.agent-src/rules/chat-history-ownership.md +1 -0
  18. package/.agent-src/rules/chat-history-visibility.md +1 -0
  19. package/.agent-src/rules/cli-output-handling.md +2 -2
  20. package/.agent-src/rules/command-suggestion-policy.md +1 -0
  21. package/.agent-src/rules/commit-conventions.md +1 -0
  22. package/.agent-src/rules/commit-policy.md +1 -0
  23. package/.agent-src/rules/context-hygiene.md +22 -0
  24. package/.agent-src/rules/direct-answers.md +1 -0
  25. package/.agent-src/rules/docker-commands.md +1 -0
  26. package/.agent-src/rules/docs-sync.md +1 -0
  27. package/.agent-src/rules/downstream-changes.md +1 -0
  28. package/.agent-src/rules/e2e-testing.md +1 -0
  29. package/.agent-src/rules/guidelines.md +1 -0
  30. package/.agent-src/rules/improve-before-implement.md +1 -0
  31. package/.agent-src/rules/language-and-tone.md +1 -0
  32. package/.agent-src/rules/laravel-translations.md +1 -0
  33. package/.agent-src/rules/markdown-safe-codeblocks.md +1 -0
  34. package/.agent-src/rules/minimal-safe-diff.md +1 -0
  35. package/.agent-src/rules/missing-tool-handling.md +1 -0
  36. package/.agent-src/rules/model-recommendation.md +1 -0
  37. package/.agent-src/rules/no-cheap-questions.md +1 -0
  38. package/.agent-src/rules/no-roadmap-references.md +1 -0
  39. package/.agent-src/rules/non-destructive-by-default.md +1 -0
  40. package/.agent-src/rules/onboarding-gate.md +26 -0
  41. package/.agent-src/rules/package-ci-checks.md +1 -0
  42. package/.agent-src/rules/php-coding.md +1 -0
  43. package/.agent-src/rules/preservation-guard.md +1 -0
  44. package/.agent-src/rules/review-routing-awareness.md +1 -0
  45. package/.agent-src/rules/reviewer-awareness.md +1 -0
  46. package/.agent-src/rules/roadmap-progress-sync.md +22 -0
  47. package/.agent-src/rules/role-mode-adherence.md +2 -2
  48. package/.agent-src/rules/rule-type-governance.md +1 -0
  49. package/.agent-src/rules/runtime-safety.md +1 -0
  50. package/.agent-src/rules/scope-control.md +1 -0
  51. package/.agent-src/rules/security-sensitive-stop.md +1 -0
  52. package/.agent-src/rules/size-enforcement.md +1 -0
  53. package/.agent-src/rules/skill-improvement-trigger.md +1 -0
  54. package/.agent-src/rules/skill-quality.md +1 -0
  55. package/.agent-src/rules/slash-command-routing-policy.md +39 -0
  56. package/.agent-src/rules/think-before-action.md +1 -0
  57. package/.agent-src/rules/token-efficiency.md +1 -0
  58. package/.agent-src/rules/tool-safety.md +1 -0
  59. package/.agent-src/rules/ui-audit-gate.md +1 -0
  60. package/.agent-src/rules/upstream-proposal.md +1 -0
  61. package/.agent-src/rules/user-interaction.md +1 -0
  62. package/.agent-src/rules/verify-before-complete.md +1 -0
  63. package/.agent-src/skills/roadmap-management/SKILL.md +29 -4
  64. package/.agent-src/skills/verify-completion-evidence/SKILL.md +8 -1
  65. package/.agent-src/templates/agent-settings.md +16 -0
  66. package/.agent-src/templates/roadmaps.md +8 -3
  67. package/.agent-src/templates/scripts/work_engine/hook_bootstrap.py +9 -0
  68. package/.agent-src/templates/scripts/work_engine/hooks/__init__.py +4 -0
  69. package/.agent-src/templates/scripts/work_engine/hooks/builtin/__init__.py +4 -0
  70. package/.agent-src/templates/scripts/work_engine/hooks/builtin/decision_trace.py +163 -0
  71. package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py +111 -0
  72. package/.agent-src/templates/scripts/work_engine/hooks/settings.py +36 -0
  73. package/.agent-src/templates/scripts/work_engine/scoring/decision_trace.py +141 -0
  74. package/.agent-src/templates/scripts/work_engine/scoring/memory_visibility.py +125 -0
  75. package/.claude-plugin/marketplace.json +1 -1
  76. package/CHANGELOG.md +62 -0
  77. package/README.md +19 -19
  78. package/config/agent-settings.template.yml +23 -0
  79. package/docs/catalog.md +5 -2
  80. package/docs/contracts/adr-settings-sync-engine.md +127 -0
  81. package/docs/contracts/decision-trace-v1.md +146 -0
  82. package/docs/contracts/file-ownership-matrix.json +7 -0
  83. package/docs/contracts/hook-architecture-v1.md +213 -0
  84. package/docs/contracts/memory-visibility-v1.md +138 -0
  85. package/docs/contracts/one-off-script-lifecycle.md +109 -0
  86. package/docs/contracts/rule-interactions.yml +22 -0
  87. package/docs/customization.md +1 -0
  88. package/docs/development.md +4 -1
  89. package/docs/guidelines/agent-infra/layered-settings.md +32 -13
  90. package/package.json +1 -1
  91. package/scripts/agent-config +44 -0
  92. package/scripts/ai_council/bundler.py +3 -3
  93. package/scripts/ai_council/clients.py +24 -8
  94. package/scripts/ai_council/one_off_archive/2026-05/README.md +22 -0
  95. package/scripts/ai_council/one_off_archive/2026-05/_one_off_roundtrip.py +13 -8
  96. package/scripts/ai_council/one_off_archive/2026-05/_one_off_tier_retrofit.py +180 -0
  97. package/scripts/ai_council/session.py +92 -0
  98. package/scripts/capture_showcase_session.py +361 -0
  99. package/scripts/chat_history.py +11 -1
  100. package/scripts/check_always_budget.py +7 -2
  101. package/scripts/context_hygiene_hook.py +14 -6
  102. package/scripts/council_cli.py +357 -0
  103. package/scripts/hook_manifest.yaml +184 -0
  104. package/scripts/hooks/__init__.py +1 -0
  105. package/scripts/hooks/augment-dispatcher.sh +72 -0
  106. package/scripts/hooks/cline-dispatcher.sh +86 -0
  107. package/scripts/hooks/cursor-dispatcher.sh +76 -0
  108. package/scripts/hooks/dispatch_hook.py +348 -0
  109. package/scripts/hooks/envelope.py +98 -0
  110. package/scripts/hooks/gemini-dispatcher.sh +117 -0
  111. package/scripts/hooks/state_io.py +122 -0
  112. package/scripts/hooks/windsurf-dispatcher.sh +123 -0
  113. package/scripts/hooks_status.py +146 -0
  114. package/scripts/install.py +725 -87
  115. package/scripts/install.sh +1 -1
  116. package/scripts/lint_hook_manifest.py +216 -0
  117. package/scripts/lint_one_off_age.py +184 -0
  118. package/scripts/lint_rule_tiers.py +78 -0
  119. package/scripts/lint_showcase_sessions.py +148 -0
  120. package/scripts/minimal_safe_diff_hook.py +245 -0
  121. package/scripts/onboarding_gate_hook.py +13 -8
  122. package/scripts/readme_linter.py +12 -3
  123. package/scripts/roadmap_progress_hook.py +5 -0
  124. package/scripts/sync_agent_settings.py +32 -129
  125. package/scripts/sync_yaml_rt.py +734 -0
  126. package/scripts/verify_before_complete_hook.py +216 -0
@@ -0,0 +1,180 @@
1
+ #!/usr/bin/env python3
2
+ """One-off — tier-bulk-retrofit (Phase 2.1 + 2.2 of road-to-feedback-consolidation).
3
+
4
+ Parses agents/contexts/rule-trigger-matrix.md, emits tmp/tier-classification.md,
5
+ and inserts a `tier:` frontmatter key into every rule under
6
+ .agent-src.uncompressed/rules/. Idempotent — re-runs are a no-op when a rule
7
+ already declares the same tier value.
8
+
9
+ Lifecycle: scripts/_one_off/2026-05/. Purge eligible after 2026-08-04 per
10
+ docs/contracts/one-off-script-lifecycle.md.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import re
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ REPO = Path(__file__).resolve().parents[3]
19
+ MATRIX = REPO / "agents" / "contexts" / "rule-trigger-matrix.md"
20
+ RULES_DIR = REPO / ".agent-src.uncompressed" / "rules"
21
+ COMPRESSED_RULES_DIR = REPO / ".agent-src" / "rules"
22
+ SPREADSHEET = REPO / "tmp" / "tier-classification.md"
23
+
24
+ VALID_TIERS = {"1", "2a", "2b", "3", "safety-floor", "mechanical-already"}
25
+
26
+
27
+ def parse_matrix() -> dict[str, tuple[str, str]]:
28
+ """Return {rule_filename: (tier, notes)} from the matrix table."""
29
+ out: dict[str, tuple[str, str]] = {}
30
+ full = MATRIX.read_text(encoding="utf-8")
31
+ # Slice between '## Matrix' and the next '## ' heading.
32
+ start = full.find("\n## Matrix\n")
33
+ if start == -1:
34
+ sys.exit("matrix: '## Matrix' section not found")
35
+ end = full.find("\n## ", start + 1)
36
+ text = full[start:end] if end != -1 else full[start:]
37
+ # Table rows look like: | `agent-authority.md` | always | 1468 | … | 3 | no | Priority index, … |
38
+ row_re = re.compile(
39
+ r"^\|\s*`([a-z0-9-]+\.md)`\s*\|" # rule filename
40
+ r"[^|]*\|" # type
41
+ r"[^|]*\|" # raw
42
+ r"[^|]*\|" # ext
43
+ r"[^|]*\|" # trigger
44
+ r"[^|]*\|" # obs
45
+ r"[^|]*\|" # enforce
46
+ r"[^|]*\|" # hook-cost
47
+ r"\s*([^|]+?)\s*\|" # tier
48
+ r"[^|]*\|" # dormant?
49
+ r"\s*(.*?)\s*\|\s*$", # notes
50
+ re.MULTILINE,
51
+ )
52
+ for m in row_re.finditer(text):
53
+ name, tier, notes = m.group(1), m.group(2).strip(), m.group(3).strip()
54
+ if tier not in VALID_TIERS:
55
+ sys.exit(f"unknown tier '{tier}' for {name}")
56
+ out[name] = (tier, notes)
57
+ return out
58
+
59
+
60
+ def write_spreadsheet(classifications: dict[str, tuple[str, str]]) -> None:
61
+ SPREADSHEET.parent.mkdir(parents=True, exist_ok=True)
62
+ lines = [
63
+ "# Tier classification — Phase 2.1 of road-to-feedback-consolidation",
64
+ "",
65
+ "Source: `agents/contexts/rule-trigger-matrix.md` (manual classifications",
66
+ "in `scripts/build_rule_trigger_matrix.py`'s `CLASSIFICATION` table).",
67
+ "Generated by `scripts/_one_off/2026-05/_one_off_tier-retrofit.py`.",
68
+ "",
69
+ "Tier rubric: see `agents/contexts/hardening-pattern.md`.",
70
+ "",
71
+ f"Total: {len(classifications)} rules.",
72
+ "",
73
+ "| Rule | Tier | Rationale |",
74
+ "|---|---|---|",
75
+ ]
76
+ for name in sorted(classifications):
77
+ tier, notes = classifications[name]
78
+ lines.append(f"| `{name}` | `{tier}` | {notes} |")
79
+ SPREADSHEET.write_text("\n".join(lines) + "\n", encoding="utf-8")
80
+
81
+
82
+ def parse_frontmatter(text: str) -> tuple[dict[str, str], str, str]:
83
+ """Return (kv, raw_block, body). raw_block excludes the --- fences."""
84
+ if not text.startswith("---\n"):
85
+ return {}, "", text
86
+ end = text.find("\n---\n", 4)
87
+ if end == -1:
88
+ return {}, "", text
89
+ raw = text[4:end]
90
+ body = text[end + 5 :]
91
+ kv: dict[str, str] = {}
92
+ for line in raw.splitlines():
93
+ if ":" in line:
94
+ k, _, v = line.partition(":")
95
+ kv[k.strip()] = v.strip()
96
+ return kv, raw, body
97
+
98
+
99
+ def apply_tier(rule_path: Path, tier: str) -> str:
100
+ """Return one of: 'unchanged', 'inserted', 'updated'.
101
+
102
+ Tier is always written as a quoted string in YAML (`tier: "<value>"`) so the
103
+ schema enum check (string-only) holds for numeric tiers like `1` and `3`.
104
+ """
105
+ text = rule_path.read_text(encoding="utf-8")
106
+ kv, raw, body = parse_frontmatter(text)
107
+ if not raw:
108
+ sys.exit(f"{rule_path}: no frontmatter found")
109
+ existing_raw = kv.get("tier")
110
+ existing = existing_raw.strip('"').strip("'") if existing_raw else None
111
+ quoted = f'"{tier}"'
112
+ target_line = f"tier: {quoted}"
113
+ if existing == tier and existing_raw == quoted:
114
+ return "unchanged"
115
+ new_lines: list[str] = []
116
+ inserted = False
117
+ for line in raw.splitlines():
118
+ new_lines.append(line)
119
+ if not inserted and line.startswith("type:"):
120
+ new_lines.append(target_line)
121
+ inserted = True
122
+ if existing is not None:
123
+ new_lines = [
124
+ l if not l.lstrip().startswith("tier:") else target_line
125
+ for l in new_lines
126
+ ]
127
+ seen_tier = False
128
+ deduped: list[str] = []
129
+ for l in new_lines:
130
+ if l == target_line:
131
+ if seen_tier:
132
+ continue
133
+ seen_tier = True
134
+ deduped.append(l)
135
+ new_lines = deduped
136
+ result = "updated" if existing != tier or existing_raw != quoted else "unchanged"
137
+ else:
138
+ if not inserted:
139
+ new_lines.insert(0, target_line)
140
+ result = "inserted"
141
+ new_raw = "\n".join(new_lines)
142
+ rule_path.write_text(f"---\n{new_raw}\n---\n{body}", encoding="utf-8")
143
+ return result
144
+
145
+
146
+ def main() -> int:
147
+ classifications = parse_matrix()
148
+ if len(classifications) != 58:
149
+ sys.exit(f"expected 58 rules in matrix, got {len(classifications)}")
150
+
151
+ on_disk = {p.name for p in RULES_DIR.glob("*.md")}
152
+ missing = on_disk - classifications.keys()
153
+ extra = classifications.keys() - on_disk
154
+ if missing or extra:
155
+ sys.exit(f"matrix/disk mismatch: missing={missing} extra={extra}")
156
+
157
+ write_spreadsheet(classifications)
158
+
159
+ counts: dict[str, int] = {"unchanged": 0, "inserted": 0, "updated": 0}
160
+ mirror_counts = {"unchanged": 0, "inserted": 0, "updated": 0, "skipped": 0}
161
+ for name, (tier, _) in classifications.items():
162
+ result = apply_tier(RULES_DIR / name, tier)
163
+ counts[result] += 1
164
+ compressed = COMPRESSED_RULES_DIR / name
165
+ if compressed.exists():
166
+ mirror_counts[apply_tier(compressed, tier)] += 1
167
+ else:
168
+ mirror_counts["skipped"] += 1
169
+ print(
170
+ f"tier-retrofit: spreadsheet={SPREADSHEET.relative_to(REPO)} "
171
+ f"src(unchanged={counts['unchanged']} inserted={counts['inserted']} "
172
+ f"updated={counts['updated']}) "
173
+ f"mirror(unchanged={mirror_counts['unchanged']} inserted={mirror_counts['inserted']} "
174
+ f"updated={mirror_counts['updated']} skipped={mirror_counts['skipped']})"
175
+ )
176
+ return 0
177
+
178
+
179
+ if __name__ == "__main__":
180
+ raise SystemExit(main())
@@ -23,6 +23,8 @@ from __future__ import annotations
23
23
 
24
24
  import datetime as _dt
25
25
  import json
26
+ import re
27
+ import shutil
26
28
  import sys
27
29
  from dataclasses import dataclass, field
28
30
  from pathlib import Path
@@ -33,6 +35,10 @@ from scripts.ai_council.orchestrator import render
33
35
 
34
36
  REPO_ROOT = Path(__file__).resolve().parents[2]
35
37
  SESSIONS_DIR = REPO_ROOT / "agents" / "council-sessions"
38
+ SETTINGS_FILE = REPO_ROOT / ".agent-settings.yml"
39
+
40
+ DEFAULT_RETENTION_DAYS = 14
41
+ _TS_RE = re.compile(r"^(\d{4})-(\d{2})-(\d{2})T(\d{2})-(\d{2})-(\d{2})Z$")
36
42
 
37
43
 
38
44
  @dataclass
@@ -69,12 +75,90 @@ def _serialise_response(r: CouncilResponse) -> dict[str, object]:
69
75
  }
70
76
 
71
77
 
78
+ def _load_retention_days(settings_path: Path | None = None) -> int:
79
+ """Read `ai_council.session_retention_days` from `.agent-settings.yml`.
80
+
81
+ Returns `DEFAULT_RETENTION_DAYS` on any read/parse failure (missing
82
+ file, invalid YAML, missing key, non-int value). Pruning never
83
+ blocks the council on a settings error.
84
+ """
85
+ path = settings_path or SETTINGS_FILE
86
+ if not path.exists():
87
+ return DEFAULT_RETENTION_DAYS
88
+ try:
89
+ import yaml # type: ignore[import-not-found]
90
+ data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
91
+ except Exception: # noqa: BLE001 - never block on settings parse
92
+ return DEFAULT_RETENTION_DAYS
93
+ ai = data.get("ai_council") if isinstance(data, dict) else None
94
+ if not isinstance(ai, dict):
95
+ return DEFAULT_RETENTION_DAYS
96
+ raw = ai.get("session_retention_days", DEFAULT_RETENTION_DAYS)
97
+ try:
98
+ return int(raw)
99
+ except (TypeError, ValueError):
100
+ return DEFAULT_RETENTION_DAYS
101
+
102
+
103
+ def _parse_session_timestamp(name: str) -> _dt.datetime | None:
104
+ """Parse `YYYY-MM-DDTHH-MM-SSZ` directory name to a UTC datetime."""
105
+ m = _TS_RE.match(name)
106
+ if not m:
107
+ return None
108
+ try:
109
+ y, mo, d, h, mi, s = (int(g) for g in m.groups())
110
+ return _dt.datetime(y, mo, d, h, mi, s, tzinfo=_dt.timezone.utc)
111
+ except ValueError:
112
+ return None
113
+
114
+
115
+ def prune_old_sessions(
116
+ sessions_dir: Path,
117
+ retention_days: int,
118
+ *,
119
+ now: _dt.datetime | None = None,
120
+ ) -> list[Path]:
121
+ """Delete session subdirectories older than `retention_days`.
122
+
123
+ A session is "old" when its directory-name timestamp predates
124
+ `now - retention_days`. Non-matching names (e.g. JSON reports at
125
+ the root, custom folders) are skipped. Never raises — disk
126
+ failures are logged to stderr.
127
+
128
+ Returns the list of deleted directories. `retention_days <= 0`
129
+ disables pruning and returns an empty list.
130
+ """
131
+ if retention_days <= 0 or not sessions_dir.exists():
132
+ return []
133
+ cutoff = (now or _dt.datetime.now(_dt.timezone.utc)) - _dt.timedelta(days=retention_days)
134
+ removed: list[Path] = []
135
+ try:
136
+ entries = list(sessions_dir.iterdir())
137
+ except OSError as exc: # noqa: BLE001 - never block the report
138
+ print(f"[council:session] prune iterdir failed: {exc}", file=sys.stderr)
139
+ return removed
140
+ for entry in entries:
141
+ if not entry.is_dir():
142
+ continue
143
+ ts = _parse_session_timestamp(entry.name)
144
+ if ts is None or ts >= cutoff:
145
+ continue
146
+ try:
147
+ shutil.rmtree(entry)
148
+ removed.append(entry)
149
+ except OSError as exc: # noqa: BLE001 - never block the report
150
+ print(f"[council:session] prune rmtree failed for {entry}: {exc}",
151
+ file=sys.stderr)
152
+ return removed
153
+
154
+
72
155
  def save(
73
156
  *,
74
157
  manifest: SessionManifest,
75
158
  responses: list[CouncilResponse] | Iterable[list[CouncilResponse]],
76
159
  sessions_dir: Path | None = None,
77
160
  timestamp: str | None = None,
161
+ retention_days: int | None = None,
78
162
  ) -> Path:
79
163
  """Persist a council call. Returns the session directory.
80
164
 
@@ -83,6 +167,11 @@ def save(
83
167
  - `Iterable[list[CouncilResponse]]` — multi-round, one list per
84
168
  round in execution order.
85
169
 
170
+ `retention_days` controls auto-pruning of older sibling sessions
171
+ after the new one is written. `None` reads the value from
172
+ `.agent-settings.yml` (`ai_council.session_retention_days`,
173
+ default `14`); `0` disables pruning.
174
+
86
175
  Disk-write failures are surfaced via a stderr line but do not
87
176
  raise; the caller's text report is the source of truth.
88
177
  """
@@ -141,4 +230,7 @@ def save(
141
230
  except OSError as exc: # noqa: BLE001 - never block the report
142
231
  print(f"[council:session] write failed: {exc}", file=sys.stderr)
143
232
 
233
+ days = _load_retention_days() if retention_days is None else retention_days
234
+ prune_old_sessions(base, days)
235
+
144
236
  return session_dir
@@ -0,0 +1,361 @@
1
+ #!/usr/bin/env python3
2
+ """capture_showcase_session.py — wrap and measure showcase sessions.
3
+
4
+ Phase 1.2 deliverable for `road-to-feedback-consolidation.md`.
5
+
6
+ Two subcommands:
7
+
8
+ capture Read a raw chat-log (file or stdin) and write a session under
9
+ `docs/showcase/sessions/<slug>.log` with a YAML frontmatter
10
+ block (commit_sha, host_agent, model, started, ended,
11
+ task_class, metrics).
12
+
13
+ metrics Compute one or all of the four outcome metrics defined in
14
+ `agents/contexts/outcome-baseline.md` from a captured session
15
+ file. Output as text table or JSON.
16
+
17
+ The four metrics:
18
+ (a) tool-call-count — number of <tool_use ...> blocks in body
19
+ (b) reply-chars — mean chars of agent replies (excl. fences)
20
+ (c) memory-hit-ratio — hits / (hits + misses) from memory traces
21
+ (d) verify-pass-rate — first-try done-claims / total done-claims
22
+
23
+ Exit codes: 0 success, 1 user error (bad args, missing file), 2 metric
24
+ gate not yet wired (downstream phase pending).
25
+ """
26
+ from __future__ import annotations
27
+
28
+ import argparse
29
+ import datetime as _dt
30
+ import json
31
+ import re
32
+ import subprocess
33
+ import sys
34
+ from dataclasses import dataclass, asdict
35
+ from pathlib import Path
36
+ from typing import Any, Dict, List, Optional
37
+
38
+ ROOT = Path(__file__).resolve().parent.parent
39
+ SESSIONS_DIR = ROOT / "docs" / "showcase" / "sessions"
40
+
41
+ # Tool-call markers across host agents (Augment, Claude Code, Cursor, …).
42
+ # Union, not branch — a session log may carry multiple shapes.
43
+ TOOL_USE_PATTERNS = [
44
+ re.compile(r"<tool_use[\s>]"),
45
+ re.compile(r"<function_calls>"),
46
+ re.compile(r"<invoke\b"),
47
+ ]
48
+
49
+ # Memory-retrieve trace shape, per memory-visibility-v1.md (Phase 4.1).
50
+ # Until Phase 4.1 lands, fall back to counting `memory_retrieve` invocations
51
+ # without hit/miss disambiguation (returns ratio=None).
52
+ MEMORY_HIT_RE = re.compile(r"memory_retrieve\b.*?hits=(\d+)", re.IGNORECASE)
53
+ MEMORY_MISS_RE = re.compile(
54
+ r"memory_retrieve\b.*?(misses=(\d+)|hits=0)", re.IGNORECASE
55
+ )
56
+ MEMORY_CALL_RE = re.compile(r"\bmemory_retrieve(?:_\w+)?\b")
57
+
58
+ # Done-claim markers — agent says work is complete.
59
+ DONE_CLAIM_PATTERNS = [
60
+ re.compile(r"\b(done|complete|ready for review|fertig|abgeschlossen)\b",
61
+ re.IGNORECASE),
62
+ re.compile(r"^\s*(✅|✓)", re.MULTILINE),
63
+ ]
64
+
65
+ # Correction phrasings — user re-prompts with a complaint, signalling
66
+ # the verify-gate let bad work through. Optimistic: anything not on this
67
+ # list is treated as scope expansion, not failure.
68
+ CORRECTION_PHRASES = [
69
+ "das passt nicht", "das stimmt nicht", "passt so nicht",
70
+ "that's wrong", "this is wrong", "missing", "fehlt",
71
+ "didn't work", "doesn't work", "geht nicht", "broken",
72
+ "you missed", "du hast", "das ist falsch",
73
+ ]
74
+
75
+
76
+ @dataclass
77
+ class SessionMetrics:
78
+ tool_call_count: Optional[int] = None
79
+ reply_chars_mean: Optional[float] = None
80
+ memory_hit_ratio: Optional[float] = None
81
+ verify_pass_rate: Optional[float] = None
82
+ notes: List[str] = None # populated when a metric is degraded
83
+
84
+ def to_dict(self) -> Dict[str, Any]:
85
+ d = asdict(self)
86
+ # Drop notes when empty so frontmatter stays compact.
87
+ if not self.notes:
88
+ d.pop("notes", None)
89
+ return d
90
+
91
+
92
+ def _git_sha() -> str:
93
+ try:
94
+ out = subprocess.run(
95
+ ["git", "rev-parse", "HEAD"],
96
+ capture_output=True, text=True, check=True, cwd=ROOT,
97
+ )
98
+ return out.stdout.strip()
99
+ except (subprocess.CalledProcessError, FileNotFoundError):
100
+ return "unknown"
101
+
102
+
103
+ def _now_iso() -> str:
104
+ return _dt.datetime.now(_dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
105
+
106
+
107
+ def _strip_fences(text: str) -> str:
108
+ """Remove fenced code blocks so they don't pollute char counts."""
109
+ return re.sub(r"```.*?```", "", text, flags=re.DOTALL)
110
+
111
+
112
+ def _split_body(content: str) -> str:
113
+ """Strip a leading YAML frontmatter block if present."""
114
+ if content.startswith("---\n"):
115
+ end = content.find("\n---\n", 4)
116
+ if end != -1:
117
+ return content[end + 5:]
118
+ return content
119
+
120
+
121
+ def _read_session(path: Path) -> str:
122
+ if str(path) == "-":
123
+ return sys.stdin.read()
124
+ if not path.is_file():
125
+ raise SystemExit(f"❌ session file not found: {path}")
126
+ return path.read_text(encoding="utf-8")
127
+
128
+
129
+ def _split_turns(body: str) -> List[Dict[str, str]]:
130
+ """Heuristic turn split — `## User` / `## Agent` headings, falls back
131
+ to whole-body as a single agent turn when no markers exist.
132
+ """
133
+ turn_re = re.compile(
134
+ r"^##\s+(User|Agent|Assistant|Matze|Du)\b.*?$", re.MULTILINE | re.IGNORECASE
135
+ )
136
+ matches = list(turn_re.finditer(body))
137
+ if not matches:
138
+ return [{"role": "agent", "text": body}]
139
+ turns: List[Dict[str, str]] = []
140
+ for i, m in enumerate(matches):
141
+ role_raw = m.group(1).lower()
142
+ role = "user" if role_raw in {"user", "matze", "du"} else "agent"
143
+ start = m.end()
144
+ end = matches[i + 1].start() if i + 1 < len(matches) else len(body)
145
+ turns.append({"role": role, "text": body[start:end].strip()})
146
+ return turns
147
+
148
+
149
+ def _metric_tool_call_count(body: str) -> int:
150
+ return sum(len(p.findall(body)) for p in TOOL_USE_PATTERNS)
151
+
152
+
153
+ def _metric_reply_chars(body: str) -> Optional[float]:
154
+ turns = _split_turns(body)
155
+ agent_turns = [t["text"] for t in turns if t["role"] == "agent"]
156
+ if not agent_turns:
157
+ return None
158
+ lengths = [len(_strip_fences(t).strip()) for t in agent_turns]
159
+ return round(sum(lengths) / len(lengths), 1)
160
+
161
+
162
+ def _metric_memory_hit_ratio(body: str) -> tuple[Optional[float], List[str]]:
163
+ """Returns (ratio, notes). Ratio is None when no memory calls found."""
164
+ notes: List[str] = []
165
+ hits_total = sum(int(m.group(1)) for m in MEMORY_HIT_RE.finditer(body))
166
+ miss_blocks = MEMORY_MISS_RE.findall(body)
167
+ miss_total = 0
168
+ for raw, count in miss_blocks:
169
+ if count:
170
+ miss_total += int(count)
171
+ else:
172
+ miss_total += 1 # `hits=0` case
173
+ calls = len(MEMORY_CALL_RE.findall(body))
174
+ if calls == 0:
175
+ return None, ["no memory_retrieve calls found"]
176
+ if hits_total + miss_total == 0:
177
+ notes.append("memory-visibility-v1 trace not present; "
178
+ "counted calls only (Phase 4.1 pending)")
179
+ return None, notes
180
+ return round(hits_total / (hits_total + miss_total), 3), notes
181
+
182
+
183
+ def _metric_verify_pass_rate(body: str) -> tuple[Optional[float], List[str]]:
184
+ turns = _split_turns(body)
185
+ if len(turns) < 2:
186
+ return None, ["session has no user/agent split — cannot measure"]
187
+ total_claims = 0
188
+ failed_claims = 0
189
+ for i, turn in enumerate(turns):
190
+ if turn["role"] != "agent":
191
+ continue
192
+ if not any(p.search(turn["text"]) for p in DONE_CLAIM_PATTERNS):
193
+ continue
194
+ total_claims += 1
195
+ next_user = next(
196
+ (t for t in turns[i + 1:] if t["role"] == "user"), None
197
+ )
198
+ if next_user is None:
199
+ continue # claim accepted (session ended on the claim)
200
+ lower = next_user["text"].lower()
201
+ if any(phrase in lower for phrase in CORRECTION_PHRASES):
202
+ failed_claims += 1
203
+ if total_claims == 0:
204
+ return None, ["no done-claims found in session"]
205
+ return round((total_claims - failed_claims) / total_claims, 3), []
206
+
207
+
208
+ def _compute_metrics(body: str) -> SessionMetrics:
209
+ notes: List[str] = []
210
+ mhr, mhr_notes = _metric_memory_hit_ratio(body)
211
+ notes.extend(mhr_notes)
212
+ vpr, vpr_notes = _metric_verify_pass_rate(body)
213
+ notes.extend(vpr_notes)
214
+ return SessionMetrics(
215
+ tool_call_count=_metric_tool_call_count(body),
216
+ reply_chars_mean=_metric_reply_chars(body),
217
+ memory_hit_ratio=mhr,
218
+ verify_pass_rate=vpr,
219
+ notes=notes or None,
220
+ )
221
+
222
+
223
+ def _render_frontmatter(meta: Dict[str, Any]) -> str:
224
+ """Minimal YAML emitter — stdlib only, dict + scalar + list of strings.
225
+ Nested dict supported one level deep (for `metrics`).
226
+ """
227
+ def fmt_scalar(v: Any) -> str:
228
+ if v is None:
229
+ return "null"
230
+ if isinstance(v, bool):
231
+ return "true" if v else "false"
232
+ if isinstance(v, (int, float)):
233
+ return str(v)
234
+ return json.dumps(v, ensure_ascii=False)
235
+
236
+ lines = ["---"]
237
+ for k, v in meta.items():
238
+ if isinstance(v, dict):
239
+ lines.append(f"{k}:")
240
+ for kk, vv in v.items():
241
+ lines.append(f" {kk}: {fmt_scalar(vv)}")
242
+ elif isinstance(v, list):
243
+ lines.append(f"{k}:")
244
+ for item in v:
245
+ lines.append(f" - {fmt_scalar(item)}")
246
+ else:
247
+ lines.append(f"{k}: {fmt_scalar(v)}")
248
+ lines.append("---")
249
+ return "\n".join(lines) + "\n"
250
+
251
+
252
+ def cmd_capture(args: argparse.Namespace) -> int:
253
+ raw = _read_session(Path(args.input))
254
+ body = _split_body(raw)
255
+ metrics = _compute_metrics(body)
256
+ started = args.started or _now_iso()
257
+ ended = args.ended or _now_iso()
258
+ meta: Dict[str, Any] = {
259
+ "slug": args.slug,
260
+ "task_class": args.task_class,
261
+ "host_agent": args.host,
262
+ "model": args.model,
263
+ "commit_sha": _git_sha(),
264
+ "started": started,
265
+ "ended": ended,
266
+ "metrics": metrics.to_dict(),
267
+ }
268
+ frontmatter = _render_frontmatter(meta)
269
+ SESSIONS_DIR.mkdir(parents=True, exist_ok=True)
270
+ out_path = SESSIONS_DIR / f"{args.slug}.log"
271
+ if out_path.exists() and not args.force:
272
+ print(f"❌ refusing to overwrite {out_path} — pass --force",
273
+ file=sys.stderr)
274
+ return 1
275
+ out_path.write_text(frontmatter + body, encoding="utf-8")
276
+ try:
277
+ display = out_path.relative_to(ROOT)
278
+ except ValueError:
279
+ display = out_path
280
+ print(f"✅ wrote {display}")
281
+ if args.format == "json":
282
+ print(json.dumps(metrics.to_dict(), indent=2))
283
+ return 0
284
+
285
+
286
+ def cmd_metrics(args: argparse.Namespace) -> int:
287
+ raw = _read_session(Path(args.session))
288
+ body = _split_body(raw)
289
+ metrics = _compute_metrics(body)
290
+ selected = args.metric
291
+ available = {
292
+ "tool-call-count": metrics.tool_call_count,
293
+ "reply-chars": metrics.reply_chars_mean,
294
+ "memory-hit-ratio": metrics.memory_hit_ratio,
295
+ "verify-pass-rate": metrics.verify_pass_rate,
296
+ }
297
+ if selected != "all" and selected not in available:
298
+ print(f"❌ unknown metric: {selected}", file=sys.stderr)
299
+ return 1
300
+ if args.format == "json":
301
+ if selected == "all":
302
+ print(json.dumps(metrics.to_dict(), indent=2))
303
+ else:
304
+ print(json.dumps({selected: available[selected]}, indent=2))
305
+ return 0
306
+ items = available.items() if selected == "all" else [(selected, available[selected])]
307
+ for name, value in items:
308
+ rendered = "n/a" if value is None else str(value)
309
+ print(f" {name:<22} {rendered}")
310
+ if metrics.notes:
311
+ print()
312
+ for note in metrics.notes:
313
+ print(f" ℹ️ {note}")
314
+ return 0
315
+
316
+
317
+ def _build_parser() -> argparse.ArgumentParser:
318
+ p = argparse.ArgumentParser(
319
+ prog="capture_showcase_session.py",
320
+ description="Capture and measure /implement-ticket and /work showcase sessions.",
321
+ )
322
+ sub = p.add_subparsers(dest="command", required=True)
323
+
324
+ cap = sub.add_parser("capture", help="Write a session log with frontmatter.")
325
+ cap.add_argument("--input", required=True,
326
+ help="Path to raw chat log, or '-' for stdin.")
327
+ cap.add_argument("--slug", required=True,
328
+ help="Filename slug (becomes <slug>.log).")
329
+ cap.add_argument("--task-class", default="implement-ticket",
330
+ choices=["implement-ticket", "work", "review-changes", "qa"])
331
+ cap.add_argument("--host", default="unknown",
332
+ help="Host agent identifier (augment, claude-code, …).")
333
+ cap.add_argument("--model", default="unknown")
334
+ cap.add_argument("--started", default=None,
335
+ help="ISO-8601 start timestamp (defaults to now).")
336
+ cap.add_argument("--ended", default=None,
337
+ help="ISO-8601 end timestamp (defaults to now).")
338
+ cap.add_argument("--force", action="store_true",
339
+ help="Overwrite an existing session file.")
340
+ cap.add_argument("--format", choices=["text", "json"], default="text")
341
+ cap.set_defaults(func=cmd_capture)
342
+
343
+ met = sub.add_parser("metrics", help="Compute one or all metrics.")
344
+ met.add_argument("--session", required=True,
345
+ help="Path to a captured session log.")
346
+ met.add_argument("--metric", default="all",
347
+ choices=["all", "tool-call-count", "reply-chars",
348
+ "memory-hit-ratio", "verify-pass-rate"])
349
+ met.add_argument("--format", choices=["text", "json"], default="text")
350
+ met.set_defaults(func=cmd_metrics)
351
+ return p
352
+
353
+
354
+ def main(argv: Optional[List[str]] = None) -> int:
355
+ parser = _build_parser()
356
+ args = parser.parse_args(argv)
357
+ return args.func(args)
358
+
359
+
360
+ if __name__ == "__main__":
361
+ sys.exit(main())
@@ -912,7 +912,17 @@ def hook_dispatch(platform: str, raw_json: str, *,
912
912
  if not isinstance(payload, dict):
913
913
  raise ValueError("stdin JSON must decode to an object")
914
914
 
915
- raw_event = (event_override or _extract_hook_event(payload) or "").strip()
915
+ # Unwrap dispatcher envelope (Phase 7.3, hook-architecture-v1.md). When
916
+ # the dispatcher invoked us, stdin carries {schema_version, platform,
917
+ # event, payload, …}; pull the platform-native data out of `payload`
918
+ # and let the envelope's `event` override the per-platform mapping.
919
+ envelope_event = ""
920
+ if all(k in payload for k in ("schema_version", "platform", "event", "payload")):
921
+ envelope_event = (payload.get("native_event") or payload.get("event") or "").strip()
922
+ inner = payload.get("payload")
923
+ payload = inner if isinstance(inner, dict) else {}
924
+
925
+ raw_event = (event_override or envelope_event or _extract_hook_event(payload) or "").strip()
916
926
  event = PLATFORM_EVENT_MAP[platform].get(raw_event)
917
927
  if not event:
918
928
  return {"action": "skipped_unmapped_event", "platform": platform,