@event4u/agent-config 1.17.0 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/council/default.md +74 -76
- package/.agent-src/commands/feature/roadmap.md +22 -0
- package/.agent-src/commands/roadmap/create.md +38 -6
- package/.agent-src/commands/roadmap/execute.md +36 -9
- package/.agent-src/rules/agent-authority.md +1 -0
- package/.agent-src/rules/agent-docs.md +1 -0
- package/.agent-src/rules/analysis-skill-routing.md +1 -0
- package/.agent-src/rules/architecture.md +1 -0
- package/.agent-src/rules/artifact-drafting-protocol.md +1 -0
- package/.agent-src/rules/artifact-engagement-recording.md +1 -0
- package/.agent-src/rules/ask-when-uncertain.md +1 -0
- package/.agent-src/rules/augment-portability.md +1 -0
- package/.agent-src/rules/augment-source-of-truth.md +1 -0
- package/.agent-src/rules/autonomous-execution.md +1 -0
- package/.agent-src/rules/capture-learnings.md +1 -0
- package/.agent-src/rules/chat-history-cadence.md +34 -0
- package/.agent-src/rules/chat-history-ownership.md +1 -0
- package/.agent-src/rules/chat-history-visibility.md +1 -0
- package/.agent-src/rules/cli-output-handling.md +2 -2
- package/.agent-src/rules/command-suggestion-policy.md +1 -0
- package/.agent-src/rules/commit-conventions.md +1 -0
- package/.agent-src/rules/commit-policy.md +1 -0
- package/.agent-src/rules/context-hygiene.md +28 -0
- package/.agent-src/rules/direct-answers.md +18 -26
- package/.agent-src/rules/docker-commands.md +1 -0
- package/.agent-src/rules/docs-sync.md +1 -0
- package/.agent-src/rules/downstream-changes.md +1 -0
- package/.agent-src/rules/e2e-testing.md +1 -0
- package/.agent-src/rules/guidelines.md +1 -0
- package/.agent-src/rules/improve-before-implement.md +1 -0
- package/.agent-src/rules/language-and-tone.md +1 -0
- package/.agent-src/rules/laravel-translations.md +1 -0
- package/.agent-src/rules/markdown-safe-codeblocks.md +1 -0
- package/.agent-src/rules/minimal-safe-diff.md +1 -0
- package/.agent-src/rules/missing-tool-handling.md +1 -0
- package/.agent-src/rules/model-recommendation.md +1 -0
- package/.agent-src/rules/no-cheap-questions.md +15 -21
- package/.agent-src/rules/no-roadmap-references.md +1 -0
- package/.agent-src/rules/non-destructive-by-default.md +1 -0
- package/.agent-src/rules/onboarding-gate.md +33 -0
- package/.agent-src/rules/package-ci-checks.md +1 -0
- package/.agent-src/rules/php-coding.md +1 -0
- package/.agent-src/rules/preservation-guard.md +1 -0
- package/.agent-src/rules/review-routing-awareness.md +1 -0
- package/.agent-src/rules/reviewer-awareness.md +1 -0
- package/.agent-src/rules/roadmap-progress-sync.md +49 -0
- package/.agent-src/rules/role-mode-adherence.md +2 -2
- package/.agent-src/rules/rule-type-governance.md +29 -0
- package/.agent-src/rules/runtime-safety.md +1 -0
- package/.agent-src/rules/scope-control.md +1 -0
- package/.agent-src/rules/security-sensitive-stop.md +1 -0
- package/.agent-src/rules/size-enforcement.md +1 -0
- package/.agent-src/rules/skill-improvement-trigger.md +1 -0
- package/.agent-src/rules/skill-quality.md +1 -0
- package/.agent-src/rules/slash-command-routing-policy.md +39 -0
- package/.agent-src/rules/think-before-action.md +1 -0
- package/.agent-src/rules/token-efficiency.md +1 -0
- package/.agent-src/rules/tool-safety.md +1 -0
- package/.agent-src/rules/ui-audit-gate.md +1 -0
- package/.agent-src/rules/upstream-proposal.md +1 -0
- package/.agent-src/rules/user-interaction.md +1 -0
- package/.agent-src/rules/verify-before-complete.md +1 -0
- package/.agent-src/skills/roadmap-management/SKILL.md +29 -4
- package/.agent-src/skills/verify-completion-evidence/SKILL.md +8 -1
- package/.agent-src/templates/agent-settings.md +16 -0
- package/.agent-src/templates/roadmaps.md +12 -3
- package/.agent-src/templates/scripts/work_engine/hook_bootstrap.py +9 -0
- package/.agent-src/templates/scripts/work_engine/hooks/__init__.py +4 -0
- package/.agent-src/templates/scripts/work_engine/hooks/builtin/__init__.py +4 -0
- package/.agent-src/templates/scripts/work_engine/hooks/builtin/decision_trace.py +163 -0
- package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py +111 -0
- package/.agent-src/templates/scripts/work_engine/hooks/settings.py +36 -0
- package/.agent-src/templates/scripts/work_engine/scoring/decision_trace.py +141 -0
- package/.agent-src/templates/scripts/work_engine/scoring/memory_visibility.py +125 -0
- package/.claude-plugin/marketplace.json +1 -1
- package/CHANGELOG.md +97 -0
- package/README.md +20 -20
- package/config/agent-settings.template.yml +23 -0
- package/docs/architecture.md +1 -1
- package/docs/catalog.md +5 -2
- package/docs/contracts/adr-settings-sync-engine.md +127 -0
- package/docs/contracts/decision-trace-v1.md +146 -0
- package/docs/contracts/file-ownership-matrix.json +7 -0
- package/docs/contracts/hook-architecture-v1.md +213 -0
- package/docs/contracts/load-context-budget-model.md +80 -0
- package/docs/contracts/load-context-schema.md +20 -0
- package/docs/contracts/memory-visibility-v1.md +138 -0
- package/docs/contracts/one-off-script-lifecycle.md +109 -0
- package/docs/contracts/roadmap-complexity-standard.md +137 -0
- package/docs/contracts/rule-interactions.yml +22 -0
- package/docs/customization.md +1 -0
- package/docs/development.md +4 -1
- package/docs/guidelines/agent-infra/ask-when-uncertain-demos.md +134 -0
- package/docs/guidelines/agent-infra/direct-answers-demos.md +145 -0
- package/docs/guidelines/agent-infra/layered-settings.md +32 -13
- package/docs/guidelines/agent-infra/verify-before-complete-demos.md +128 -0
- package/package.json +1 -1
- package/scripts/agent-config +64 -0
- package/scripts/ai_council/bundler.py +3 -3
- package/scripts/ai_council/clients.py +24 -8
- package/scripts/ai_council/one_off_archive/2026-05/README.md +67 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_budget_v2_audit.py +206 -0
- package/scripts/ai_council/{_one_off_roundtrip.py → one_off_archive/2026-05/_one_off_roundtrip.py} +13 -8
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_tier_retrofit.py +180 -0
- package/scripts/ai_council/session.py +92 -0
- package/scripts/build_rule_trigger_matrix.py +360 -0
- package/scripts/capture_showcase_session.py +361 -0
- package/scripts/chat_history.py +11 -1
- package/scripts/check_always_budget.py +46 -2
- package/scripts/check_one_off_location.py +81 -0
- package/scripts/check_references.py +6 -0
- package/scripts/compress.py +5 -2
- package/scripts/context_hygiene_hook.py +181 -0
- package/scripts/council_cli.py +357 -0
- package/scripts/hook_manifest.yaml +184 -0
- package/scripts/hooks/__init__.py +1 -0
- package/scripts/hooks/augment-context-hygiene.sh +55 -0
- package/scripts/hooks/augment-dispatcher.sh +72 -0
- package/scripts/hooks/augment-onboarding-gate.sh +55 -0
- package/scripts/hooks/cline-dispatcher.sh +86 -0
- package/scripts/hooks/cursor-dispatcher.sh +76 -0
- package/scripts/hooks/dispatch_hook.py +348 -0
- package/scripts/hooks/envelope.py +98 -0
- package/scripts/hooks/gemini-dispatcher.sh +117 -0
- package/scripts/hooks/state_io.py +122 -0
- package/scripts/hooks/windsurf-dispatcher.sh +123 -0
- package/scripts/hooks_status.py +146 -0
- package/scripts/install.py +728 -51
- package/scripts/install.sh +1 -1
- package/scripts/lint_examples.py +98 -0
- package/scripts/lint_hook_manifest.py +216 -0
- package/scripts/lint_one_off_age.py +184 -0
- package/scripts/lint_roadmap_complexity.py +127 -0
- package/scripts/lint_rule_tiers.py +78 -0
- package/scripts/lint_showcase_sessions.py +148 -0
- package/scripts/minimal_safe_diff_hook.py +245 -0
- package/scripts/onboarding_gate_hook.py +142 -0
- package/scripts/readme_linter.py +12 -3
- package/scripts/roadmap_progress_hook.py +5 -0
- package/scripts/schemas/rule.schema.json +5 -0
- package/scripts/sync_agent_settings.py +32 -129
- package/scripts/sync_yaml_rt.py +734 -0
- package/scripts/verify_before_complete_hook.py +216 -0
- /package/scripts/ai_council/{_one_off_2a4_acceptance.py → one_off_archive/2026-05/_one_off_2a4_acceptance.py} +0 -0
- /package/scripts/ai_council/{_one_off_context_layer_v1_estimate.py → one_off_archive/2026-05/_one_off_context_layer_v1_estimate.py} +0 -0
- /package/scripts/ai_council/{_one_off_context_layer_v1_review.py → one_off_archive/2026-05/_one_off_context_layer_v1_review.py} +0 -0
- /package/scripts/ai_council/{_one_off_followups_review.py → one_off_archive/2026-05/_one_off_followups_review.py} +0 -0
- /package/scripts/ai_council/{_one_off_nondestructive_inline_audit.py → one_off_archive/2026-05/_one_off_nondestructive_inline_audit.py} +0 -0
- /package/scripts/{_one_off_phase4_dispatch_latency.py → ai_council/one_off_archive/2026-05/_one_off_phase4_dispatch_latency.py} +0 -0
- /package/scripts/{_one_off_phase6_trigger_jaccard.py → ai_council/one_off_archive/2026-05/_one_off_phase6_trigger_jaccard.py} +0 -0
- /package/scripts/ai_council/{_one_off_phase_2a_budget_rebalance.py → one_off_archive/2026-05/_one_off_phase_2a_budget_rebalance.py} +0 -0
- /package/scripts/ai_council/{_one_off_phase_2a_post_revert.py → one_off_archive/2026-05/_one_off_phase_2a_post_revert.py} +0 -0
- /package/scripts/ai_council/{_one_off_rebalancing_audit.py → one_off_archive/2026-05/_one_off_rebalancing_audit.py} +0 -0
- /package/scripts/ai_council/{_one_off_rule_hardening_v1.py → one_off_archive/2026-05/_one_off_rule_hardening_v1.py} +0 -0
- /package/scripts/ai_council/{_one_off_structural_open_questions.py → one_off_archive/2026-05/_one_off_structural_open_questions.py} +0 -0
- /package/scripts/ai_council/{_one_off_structural_optimization.py → one_off_archive/2026-05/_one_off_structural_optimization.py} +0 -0
- /package/scripts/ai_council/{_one_off_structural_v3_gaps.py → one_off_archive/2026-05/_one_off_structural_v3_gaps.py} +0 -0
- /package/scripts/ai_council/{_one_off_structural_v3_review.py → one_off_archive/2026-05/_one_off_structural_v3_review.py} +0 -0
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""capture_showcase_session.py — wrap and measure showcase sessions.
|
|
3
|
+
|
|
4
|
+
Phase 1.2 deliverable for `road-to-feedback-consolidation.md`.
|
|
5
|
+
|
|
6
|
+
Two subcommands:
|
|
7
|
+
|
|
8
|
+
capture Read a raw chat-log (file or stdin) and write a session under
|
|
9
|
+
`docs/showcase/sessions/<slug>.log` with a YAML frontmatter
|
|
10
|
+
block (commit_sha, host_agent, model, started, ended,
|
|
11
|
+
task_class, metrics).
|
|
12
|
+
|
|
13
|
+
metrics Compute one or all of the four outcome metrics defined in
|
|
14
|
+
`agents/contexts/outcome-baseline.md` from a captured session
|
|
15
|
+
file. Output as text table or JSON.
|
|
16
|
+
|
|
17
|
+
The four metrics:
|
|
18
|
+
(a) tool-call-count — number of <tool_use ...> blocks in body
|
|
19
|
+
(b) reply-chars — mean chars of agent replies (excl. fences)
|
|
20
|
+
(c) memory-hit-ratio — hits / (hits + misses) from memory traces
|
|
21
|
+
(d) verify-pass-rate — first-try done-claims / total done-claims
|
|
22
|
+
|
|
23
|
+
Exit codes: 0 success, 1 user error (bad args, missing file), 2 metric
|
|
24
|
+
gate not yet wired (downstream phase pending).
|
|
25
|
+
"""
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import argparse
|
|
29
|
+
import datetime as _dt
|
|
30
|
+
import json
|
|
31
|
+
import re
|
|
32
|
+
import subprocess
|
|
33
|
+
import sys
|
|
34
|
+
from dataclasses import dataclass, asdict
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
from typing import Any, Dict, List, Optional
|
|
37
|
+
|
|
38
|
+
ROOT = Path(__file__).resolve().parent.parent
|
|
39
|
+
SESSIONS_DIR = ROOT / "docs" / "showcase" / "sessions"
|
|
40
|
+
|
|
41
|
+
# Tool-call markers across host agents (Augment, Claude Code, Cursor, …).
|
|
42
|
+
# Union, not branch — a session log may carry multiple shapes.
|
|
43
|
+
TOOL_USE_PATTERNS = [
|
|
44
|
+
re.compile(r"<tool_use[\s>]"),
|
|
45
|
+
re.compile(r"<function_calls>"),
|
|
46
|
+
re.compile(r"<invoke\b"),
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
# Memory-retrieve trace shape, per memory-visibility-v1.md (Phase 4.1).
|
|
50
|
+
# Until Phase 4.1 lands, fall back to counting `memory_retrieve` invocations
|
|
51
|
+
# without hit/miss disambiguation (returns ratio=None).
|
|
52
|
+
MEMORY_HIT_RE = re.compile(r"memory_retrieve\b.*?hits=(\d+)", re.IGNORECASE)
|
|
53
|
+
MEMORY_MISS_RE = re.compile(
|
|
54
|
+
r"memory_retrieve\b.*?(misses=(\d+)|hits=0)", re.IGNORECASE
|
|
55
|
+
)
|
|
56
|
+
MEMORY_CALL_RE = re.compile(r"\bmemory_retrieve(?:_\w+)?\b")
|
|
57
|
+
|
|
58
|
+
# Done-claim markers — agent says work is complete.
|
|
59
|
+
DONE_CLAIM_PATTERNS = [
|
|
60
|
+
re.compile(r"\b(done|complete|ready for review|fertig|abgeschlossen)\b",
|
|
61
|
+
re.IGNORECASE),
|
|
62
|
+
re.compile(r"^\s*(✅|✓)", re.MULTILINE),
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
# Correction phrasings — user re-prompts with a complaint, signalling
|
|
66
|
+
# the verify-gate let bad work through. Optimistic: anything not on this
|
|
67
|
+
# list is treated as scope expansion, not failure.
|
|
68
|
+
CORRECTION_PHRASES = [
|
|
69
|
+
"das passt nicht", "das stimmt nicht", "passt so nicht",
|
|
70
|
+
"that's wrong", "this is wrong", "missing", "fehlt",
|
|
71
|
+
"didn't work", "doesn't work", "geht nicht", "broken",
|
|
72
|
+
"you missed", "du hast", "das ist falsch",
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass
|
|
77
|
+
class SessionMetrics:
|
|
78
|
+
tool_call_count: Optional[int] = None
|
|
79
|
+
reply_chars_mean: Optional[float] = None
|
|
80
|
+
memory_hit_ratio: Optional[float] = None
|
|
81
|
+
verify_pass_rate: Optional[float] = None
|
|
82
|
+
notes: List[str] = None # populated when a metric is degraded
|
|
83
|
+
|
|
84
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
85
|
+
d = asdict(self)
|
|
86
|
+
# Drop notes when empty so frontmatter stays compact.
|
|
87
|
+
if not self.notes:
|
|
88
|
+
d.pop("notes", None)
|
|
89
|
+
return d
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _git_sha() -> str:
|
|
93
|
+
try:
|
|
94
|
+
out = subprocess.run(
|
|
95
|
+
["git", "rev-parse", "HEAD"],
|
|
96
|
+
capture_output=True, text=True, check=True, cwd=ROOT,
|
|
97
|
+
)
|
|
98
|
+
return out.stdout.strip()
|
|
99
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
100
|
+
return "unknown"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _now_iso() -> str:
|
|
104
|
+
return _dt.datetime.now(_dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _strip_fences(text: str) -> str:
|
|
108
|
+
"""Remove fenced code blocks so they don't pollute char counts."""
|
|
109
|
+
return re.sub(r"```.*?```", "", text, flags=re.DOTALL)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _split_body(content: str) -> str:
|
|
113
|
+
"""Strip a leading YAML frontmatter block if present."""
|
|
114
|
+
if content.startswith("---\n"):
|
|
115
|
+
end = content.find("\n---\n", 4)
|
|
116
|
+
if end != -1:
|
|
117
|
+
return content[end + 5:]
|
|
118
|
+
return content
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _read_session(path: Path) -> str:
|
|
122
|
+
if str(path) == "-":
|
|
123
|
+
return sys.stdin.read()
|
|
124
|
+
if not path.is_file():
|
|
125
|
+
raise SystemExit(f"❌ session file not found: {path}")
|
|
126
|
+
return path.read_text(encoding="utf-8")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _split_turns(body: str) -> List[Dict[str, str]]:
|
|
130
|
+
"""Heuristic turn split — `## User` / `## Agent` headings, falls back
|
|
131
|
+
to whole-body as a single agent turn when no markers exist.
|
|
132
|
+
"""
|
|
133
|
+
turn_re = re.compile(
|
|
134
|
+
r"^##\s+(User|Agent|Assistant|Matze|Du)\b.*?$", re.MULTILINE | re.IGNORECASE
|
|
135
|
+
)
|
|
136
|
+
matches = list(turn_re.finditer(body))
|
|
137
|
+
if not matches:
|
|
138
|
+
return [{"role": "agent", "text": body}]
|
|
139
|
+
turns: List[Dict[str, str]] = []
|
|
140
|
+
for i, m in enumerate(matches):
|
|
141
|
+
role_raw = m.group(1).lower()
|
|
142
|
+
role = "user" if role_raw in {"user", "matze", "du"} else "agent"
|
|
143
|
+
start = m.end()
|
|
144
|
+
end = matches[i + 1].start() if i + 1 < len(matches) else len(body)
|
|
145
|
+
turns.append({"role": role, "text": body[start:end].strip()})
|
|
146
|
+
return turns
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _metric_tool_call_count(body: str) -> int:
|
|
150
|
+
return sum(len(p.findall(body)) for p in TOOL_USE_PATTERNS)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _metric_reply_chars(body: str) -> Optional[float]:
|
|
154
|
+
turns = _split_turns(body)
|
|
155
|
+
agent_turns = [t["text"] for t in turns if t["role"] == "agent"]
|
|
156
|
+
if not agent_turns:
|
|
157
|
+
return None
|
|
158
|
+
lengths = [len(_strip_fences(t).strip()) for t in agent_turns]
|
|
159
|
+
return round(sum(lengths) / len(lengths), 1)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _metric_memory_hit_ratio(body: str) -> tuple[Optional[float], List[str]]:
|
|
163
|
+
"""Returns (ratio, notes). Ratio is None when no memory calls found."""
|
|
164
|
+
notes: List[str] = []
|
|
165
|
+
hits_total = sum(int(m.group(1)) for m in MEMORY_HIT_RE.finditer(body))
|
|
166
|
+
miss_blocks = MEMORY_MISS_RE.findall(body)
|
|
167
|
+
miss_total = 0
|
|
168
|
+
for raw, count in miss_blocks:
|
|
169
|
+
if count:
|
|
170
|
+
miss_total += int(count)
|
|
171
|
+
else:
|
|
172
|
+
miss_total += 1 # `hits=0` case
|
|
173
|
+
calls = len(MEMORY_CALL_RE.findall(body))
|
|
174
|
+
if calls == 0:
|
|
175
|
+
return None, ["no memory_retrieve calls found"]
|
|
176
|
+
if hits_total + miss_total == 0:
|
|
177
|
+
notes.append("memory-visibility-v1 trace not present; "
|
|
178
|
+
"counted calls only (Phase 4.1 pending)")
|
|
179
|
+
return None, notes
|
|
180
|
+
return round(hits_total / (hits_total + miss_total), 3), notes
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _metric_verify_pass_rate(body: str) -> tuple[Optional[float], List[str]]:
|
|
184
|
+
turns = _split_turns(body)
|
|
185
|
+
if len(turns) < 2:
|
|
186
|
+
return None, ["session has no user/agent split — cannot measure"]
|
|
187
|
+
total_claims = 0
|
|
188
|
+
failed_claims = 0
|
|
189
|
+
for i, turn in enumerate(turns):
|
|
190
|
+
if turn["role"] != "agent":
|
|
191
|
+
continue
|
|
192
|
+
if not any(p.search(turn["text"]) for p in DONE_CLAIM_PATTERNS):
|
|
193
|
+
continue
|
|
194
|
+
total_claims += 1
|
|
195
|
+
next_user = next(
|
|
196
|
+
(t for t in turns[i + 1:] if t["role"] == "user"), None
|
|
197
|
+
)
|
|
198
|
+
if next_user is None:
|
|
199
|
+
continue # claim accepted (session ended on the claim)
|
|
200
|
+
lower = next_user["text"].lower()
|
|
201
|
+
if any(phrase in lower for phrase in CORRECTION_PHRASES):
|
|
202
|
+
failed_claims += 1
|
|
203
|
+
if total_claims == 0:
|
|
204
|
+
return None, ["no done-claims found in session"]
|
|
205
|
+
return round((total_claims - failed_claims) / total_claims, 3), []
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _compute_metrics(body: str) -> SessionMetrics:
|
|
209
|
+
notes: List[str] = []
|
|
210
|
+
mhr, mhr_notes = _metric_memory_hit_ratio(body)
|
|
211
|
+
notes.extend(mhr_notes)
|
|
212
|
+
vpr, vpr_notes = _metric_verify_pass_rate(body)
|
|
213
|
+
notes.extend(vpr_notes)
|
|
214
|
+
return SessionMetrics(
|
|
215
|
+
tool_call_count=_metric_tool_call_count(body),
|
|
216
|
+
reply_chars_mean=_metric_reply_chars(body),
|
|
217
|
+
memory_hit_ratio=mhr,
|
|
218
|
+
verify_pass_rate=vpr,
|
|
219
|
+
notes=notes or None,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _render_frontmatter(meta: Dict[str, Any]) -> str:
|
|
224
|
+
"""Minimal YAML emitter — stdlib only, dict + scalar + list of strings.
|
|
225
|
+
Nested dict supported one level deep (for `metrics`).
|
|
226
|
+
"""
|
|
227
|
+
def fmt_scalar(v: Any) -> str:
|
|
228
|
+
if v is None:
|
|
229
|
+
return "null"
|
|
230
|
+
if isinstance(v, bool):
|
|
231
|
+
return "true" if v else "false"
|
|
232
|
+
if isinstance(v, (int, float)):
|
|
233
|
+
return str(v)
|
|
234
|
+
return json.dumps(v, ensure_ascii=False)
|
|
235
|
+
|
|
236
|
+
lines = ["---"]
|
|
237
|
+
for k, v in meta.items():
|
|
238
|
+
if isinstance(v, dict):
|
|
239
|
+
lines.append(f"{k}:")
|
|
240
|
+
for kk, vv in v.items():
|
|
241
|
+
lines.append(f" {kk}: {fmt_scalar(vv)}")
|
|
242
|
+
elif isinstance(v, list):
|
|
243
|
+
lines.append(f"{k}:")
|
|
244
|
+
for item in v:
|
|
245
|
+
lines.append(f" - {fmt_scalar(item)}")
|
|
246
|
+
else:
|
|
247
|
+
lines.append(f"{k}: {fmt_scalar(v)}")
|
|
248
|
+
lines.append("---")
|
|
249
|
+
return "\n".join(lines) + "\n"
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def cmd_capture(args: argparse.Namespace) -> int:
|
|
253
|
+
raw = _read_session(Path(args.input))
|
|
254
|
+
body = _split_body(raw)
|
|
255
|
+
metrics = _compute_metrics(body)
|
|
256
|
+
started = args.started or _now_iso()
|
|
257
|
+
ended = args.ended or _now_iso()
|
|
258
|
+
meta: Dict[str, Any] = {
|
|
259
|
+
"slug": args.slug,
|
|
260
|
+
"task_class": args.task_class,
|
|
261
|
+
"host_agent": args.host,
|
|
262
|
+
"model": args.model,
|
|
263
|
+
"commit_sha": _git_sha(),
|
|
264
|
+
"started": started,
|
|
265
|
+
"ended": ended,
|
|
266
|
+
"metrics": metrics.to_dict(),
|
|
267
|
+
}
|
|
268
|
+
frontmatter = _render_frontmatter(meta)
|
|
269
|
+
SESSIONS_DIR.mkdir(parents=True, exist_ok=True)
|
|
270
|
+
out_path = SESSIONS_DIR / f"{args.slug}.log"
|
|
271
|
+
if out_path.exists() and not args.force:
|
|
272
|
+
print(f"❌ refusing to overwrite {out_path} — pass --force",
|
|
273
|
+
file=sys.stderr)
|
|
274
|
+
return 1
|
|
275
|
+
out_path.write_text(frontmatter + body, encoding="utf-8")
|
|
276
|
+
try:
|
|
277
|
+
display = out_path.relative_to(ROOT)
|
|
278
|
+
except ValueError:
|
|
279
|
+
display = out_path
|
|
280
|
+
print(f"✅ wrote {display}")
|
|
281
|
+
if args.format == "json":
|
|
282
|
+
print(json.dumps(metrics.to_dict(), indent=2))
|
|
283
|
+
return 0
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def cmd_metrics(args: argparse.Namespace) -> int:
|
|
287
|
+
raw = _read_session(Path(args.session))
|
|
288
|
+
body = _split_body(raw)
|
|
289
|
+
metrics = _compute_metrics(body)
|
|
290
|
+
selected = args.metric
|
|
291
|
+
available = {
|
|
292
|
+
"tool-call-count": metrics.tool_call_count,
|
|
293
|
+
"reply-chars": metrics.reply_chars_mean,
|
|
294
|
+
"memory-hit-ratio": metrics.memory_hit_ratio,
|
|
295
|
+
"verify-pass-rate": metrics.verify_pass_rate,
|
|
296
|
+
}
|
|
297
|
+
if selected != "all" and selected not in available:
|
|
298
|
+
print(f"❌ unknown metric: {selected}", file=sys.stderr)
|
|
299
|
+
return 1
|
|
300
|
+
if args.format == "json":
|
|
301
|
+
if selected == "all":
|
|
302
|
+
print(json.dumps(metrics.to_dict(), indent=2))
|
|
303
|
+
else:
|
|
304
|
+
print(json.dumps({selected: available[selected]}, indent=2))
|
|
305
|
+
return 0
|
|
306
|
+
items = available.items() if selected == "all" else [(selected, available[selected])]
|
|
307
|
+
for name, value in items:
|
|
308
|
+
rendered = "n/a" if value is None else str(value)
|
|
309
|
+
print(f" {name:<22} {rendered}")
|
|
310
|
+
if metrics.notes:
|
|
311
|
+
print()
|
|
312
|
+
for note in metrics.notes:
|
|
313
|
+
print(f" ℹ️ {note}")
|
|
314
|
+
return 0
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
318
|
+
p = argparse.ArgumentParser(
|
|
319
|
+
prog="capture_showcase_session.py",
|
|
320
|
+
description="Capture and measure /implement-ticket and /work showcase sessions.",
|
|
321
|
+
)
|
|
322
|
+
sub = p.add_subparsers(dest="command", required=True)
|
|
323
|
+
|
|
324
|
+
cap = sub.add_parser("capture", help="Write a session log with frontmatter.")
|
|
325
|
+
cap.add_argument("--input", required=True,
|
|
326
|
+
help="Path to raw chat log, or '-' for stdin.")
|
|
327
|
+
cap.add_argument("--slug", required=True,
|
|
328
|
+
help="Filename slug (becomes <slug>.log).")
|
|
329
|
+
cap.add_argument("--task-class", default="implement-ticket",
|
|
330
|
+
choices=["implement-ticket", "work", "review-changes", "qa"])
|
|
331
|
+
cap.add_argument("--host", default="unknown",
|
|
332
|
+
help="Host agent identifier (augment, claude-code, …).")
|
|
333
|
+
cap.add_argument("--model", default="unknown")
|
|
334
|
+
cap.add_argument("--started", default=None,
|
|
335
|
+
help="ISO-8601 start timestamp (defaults to now).")
|
|
336
|
+
cap.add_argument("--ended", default=None,
|
|
337
|
+
help="ISO-8601 end timestamp (defaults to now).")
|
|
338
|
+
cap.add_argument("--force", action="store_true",
|
|
339
|
+
help="Overwrite an existing session file.")
|
|
340
|
+
cap.add_argument("--format", choices=["text", "json"], default="text")
|
|
341
|
+
cap.set_defaults(func=cmd_capture)
|
|
342
|
+
|
|
343
|
+
met = sub.add_parser("metrics", help="Compute one or all metrics.")
|
|
344
|
+
met.add_argument("--session", required=True,
|
|
345
|
+
help="Path to a captured session log.")
|
|
346
|
+
met.add_argument("--metric", default="all",
|
|
347
|
+
choices=["all", "tool-call-count", "reply-chars",
|
|
348
|
+
"memory-hit-ratio", "verify-pass-rate"])
|
|
349
|
+
met.add_argument("--format", choices=["text", "json"], default="text")
|
|
350
|
+
met.set_defaults(func=cmd_metrics)
|
|
351
|
+
return p
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def main(argv: Optional[List[str]] = None) -> int:
|
|
355
|
+
parser = _build_parser()
|
|
356
|
+
args = parser.parse_args(argv)
|
|
357
|
+
return args.func(args)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
if __name__ == "__main__":
|
|
361
|
+
sys.exit(main())
|
package/scripts/chat_history.py
CHANGED
|
@@ -912,7 +912,17 @@ def hook_dispatch(platform: str, raw_json: str, *,
|
|
|
912
912
|
if not isinstance(payload, dict):
|
|
913
913
|
raise ValueError("stdin JSON must decode to an object")
|
|
914
914
|
|
|
915
|
-
|
|
915
|
+
# Unwrap dispatcher envelope (Phase 7.3, hook-architecture-v1.md). When
|
|
916
|
+
# the dispatcher invoked us, stdin carries {schema_version, platform,
|
|
917
|
+
# event, payload, …}; pull the platform-native data out of `payload`
|
|
918
|
+
# and let the envelope's `event` override the per-platform mapping.
|
|
919
|
+
envelope_event = ""
|
|
920
|
+
if all(k in payload for k in ("schema_version", "platform", "event", "payload")):
|
|
921
|
+
envelope_event = (payload.get("native_event") or payload.get("event") or "").strip()
|
|
922
|
+
inner = payload.get("payload")
|
|
923
|
+
payload = inner if isinstance(inner, dict) else {}
|
|
924
|
+
|
|
925
|
+
raw_event = (event_override or envelope_event or _extract_hook_event(payload) or "").strip()
|
|
916
926
|
event = PLATFORM_EVENT_MAP[platform].get(raw_event)
|
|
917
927
|
if not event:
|
|
918
928
|
return {"action": "skipped_unmapped_event", "platform": platform,
|
|
@@ -69,6 +69,12 @@ TOLERANCE_BAND = 0.02
|
|
|
69
69
|
PER_RULE_CAP = 6_000
|
|
70
70
|
TOP3_CAP = TOTAL_CAP // 2
|
|
71
71
|
MAX_DEPTH = 2
|
|
72
|
+
# Phase 1.3 Q2 (road-to-context-layer-maturity) — per-rule context count
|
|
73
|
+
# cap. Counts top-level `load_context:` + `load_context_eager:` entries
|
|
74
|
+
# per rule (not transitive depth). Empirical max in the rule set is 3
|
|
75
|
+
# (autonomous-execution); a 4th declared context is the structural
|
|
76
|
+
# signal that the rule should split, not load more.
|
|
77
|
+
MAX_CONTEXTS_PER_RULE = 3
|
|
72
78
|
|
|
73
79
|
# Recovery band (AI Council session 2026-05-03T12-02-42Z, verdict A1).
|
|
74
80
|
# When enabled, a branch in the 90–100 % gap zone passes as WARN iff its
|
|
@@ -86,9 +92,14 @@ BASELINE_FILE = REPO_ROOT / ".github" / "budget-baseline.txt"
|
|
|
86
92
|
# growth above the ceiling fails CI even while the entry remains.
|
|
87
93
|
# When Phase 2A retires a rule, drop its entry here AND in
|
|
88
94
|
# `tests/test_always_budget.py::KNOWN_PER_RULE_BREACHES`.
|
|
95
|
+
#
|
|
96
|
+
# Phase 2 of road-to-feedback-consolidation.md added a single-line
|
|
97
|
+
# `tier: "safety-floor"` frontmatter key (21 chars) to every safety-floor
|
|
98
|
+
# rule. Both ceilings below were re-baselined +21 to absorb that
|
|
99
|
+
# frontmatter-only growth without trimming Iron-Law content.
|
|
89
100
|
KNOWN_PER_RULE_BREACHES: dict[str, int] = {
|
|
90
|
-
"non-destructive-by-default.md":
|
|
91
|
-
"scope-control.md":
|
|
101
|
+
"non-destructive-by-default.md": 7_908,
|
|
102
|
+
"scope-control.md": 8_550,
|
|
92
103
|
}
|
|
93
104
|
|
|
94
105
|
|
|
@@ -171,6 +182,29 @@ def _always_rules() -> list[Path]:
|
|
|
171
182
|
return sorted(p for p in RULES_DIR.glob("*.md") if _is_always(p))
|
|
172
183
|
|
|
173
184
|
|
|
185
|
+
def _all_rules() -> list[Path]:
|
|
186
|
+
return sorted(RULES_DIR.glob("*.md"))
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _context_count(rule: Path) -> int:
|
|
190
|
+
fm = _frontmatter(rule)
|
|
191
|
+
lazy = fm.get("load_context") or []
|
|
192
|
+
eager = fm.get("load_context_eager") or []
|
|
193
|
+
return (len(lazy) if isinstance(lazy, list) else 0) + (
|
|
194
|
+
len(eager) if isinstance(eager, list) else 0
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _per_rule_count_breaches() -> list[tuple[str, int]]:
|
|
199
|
+
"""Phase 1.3 Q2 — return rules whose declared context count exceeds the cap."""
|
|
200
|
+
out: list[tuple[str, int]] = []
|
|
201
|
+
for rule in _all_rules():
|
|
202
|
+
n = _context_count(rule)
|
|
203
|
+
if n > MAX_CONTEXTS_PER_RULE:
|
|
204
|
+
out.append((rule.name, n))
|
|
205
|
+
return out
|
|
206
|
+
|
|
207
|
+
|
|
174
208
|
def _extended_size(rule: Path) -> tuple[int, list[tuple[str, str]]]:
|
|
175
209
|
raw = rule.stat().st_size
|
|
176
210
|
contexts, violations = _walk_contexts(rule)
|
|
@@ -298,6 +332,7 @@ def main() -> int:
|
|
|
298
332
|
single_breaches, top3_concentration_breach = _concentration_check(
|
|
299
333
|
sizes, total_ext
|
|
300
334
|
)
|
|
335
|
+
count_breaches = _per_rule_count_breaches()
|
|
301
336
|
failing = (
|
|
302
337
|
(
|
|
303
338
|
pct >= FAIL_THRESHOLD
|
|
@@ -312,6 +347,7 @@ def main() -> int:
|
|
|
312
347
|
or all_violations
|
|
313
348
|
or single_breaches
|
|
314
349
|
or top3_concentration_breach is not None
|
|
350
|
+
or count_breaches
|
|
315
351
|
)
|
|
316
352
|
if failing:
|
|
317
353
|
status, rc = "❌ FAIL", 1
|
|
@@ -402,6 +438,14 @@ def main() -> int:
|
|
|
402
438
|
f"{sum_:,} ({frac * 100:.1f}%)"
|
|
403
439
|
)
|
|
404
440
|
|
|
441
|
+
if count_breaches:
|
|
442
|
+
details = ", ".join(f"{n}={c}" for n, c in count_breaches)
|
|
443
|
+
print(
|
|
444
|
+
f"\n Per-rule context-count cap breach "
|
|
445
|
+
f"(> {MAX_CONTEXTS_PER_RULE} declared contexts, Q2 "
|
|
446
|
+
f"road-to-context-layer-maturity Phase 1.3): {details}"
|
|
447
|
+
)
|
|
448
|
+
|
|
405
449
|
# Phase 5.3 — per-rule trend delta vs. previous run.
|
|
406
450
|
prev = _last_trend()
|
|
407
451
|
if prev is not None and not args.quiet:
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""One-off script-location guard (Phase 0a.2 of road-to-rule-hardening).
|
|
3
|
+
|
|
4
|
+
Every ``_one_off_*.py`` script under ``scripts/`` must live inside the
|
|
5
|
+
archive folder ``scripts/ai_council/one_off_archive/<YYYY-MM>/``. The
|
|
6
|
+
guard fails CI if a new probe lands anywhere else in the tree.
|
|
7
|
+
|
|
8
|
+
Rationale: one-off council probes / phase-specific measurements are
|
|
9
|
+
inherently single-purpose; their durable artefact is the council
|
|
10
|
+
session under ``agents/council-sessions/``. Keeping them in the
|
|
11
|
+
archive prevents the ``scripts/`` root from accumulating noise and
|
|
12
|
+
makes their lifecycle visible (folder == month archived).
|
|
13
|
+
|
|
14
|
+
Exit codes:
|
|
15
|
+
0 = clean
|
|
16
|
+
1 = violation (script outside the archive)
|
|
17
|
+
3 = internal error
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import re
|
|
23
|
+
import sys
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
27
|
+
SCRIPTS = REPO_ROOT / "scripts"
|
|
28
|
+
ARCHIVE = SCRIPTS / "ai_council" / "one_off_archive"
|
|
29
|
+
ARCHIVE_MONTH_RE = re.compile(r"^\d{4}-\d{2}$")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def find_violations() -> list[Path]:
|
|
33
|
+
"""Return one-off scripts that are outside the archive folder."""
|
|
34
|
+
violations: list[Path] = []
|
|
35
|
+
for path in SCRIPTS.rglob("_one_off_*.py"):
|
|
36
|
+
if not path.is_file():
|
|
37
|
+
continue
|
|
38
|
+
# Must live under scripts/ai_council/one_off_archive/<YYYY-MM>/
|
|
39
|
+
try:
|
|
40
|
+
rel = path.relative_to(ARCHIVE)
|
|
41
|
+
except ValueError:
|
|
42
|
+
violations.append(path)
|
|
43
|
+
continue
|
|
44
|
+
# rel = "<YYYY-MM>/<name>.py"
|
|
45
|
+
parts = rel.parts
|
|
46
|
+
if len(parts) != 2 or not ARCHIVE_MONTH_RE.match(parts[0]):
|
|
47
|
+
violations.append(path)
|
|
48
|
+
return violations
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def main() -> int:
|
|
52
|
+
parser = argparse.ArgumentParser(description=__doc__.strip().splitlines()[0])
|
|
53
|
+
parser.add_argument("--quiet", action="store_true", help="Only print on failure")
|
|
54
|
+
args = parser.parse_args()
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
violations = find_violations()
|
|
58
|
+
except Exception as exc: # pragma: no cover — defensive
|
|
59
|
+
print(f"❌ internal error: {exc}", file=sys.stderr)
|
|
60
|
+
return 3
|
|
61
|
+
|
|
62
|
+
if violations:
|
|
63
|
+
print("❌ one-off scripts outside the archive:", file=sys.stderr)
|
|
64
|
+
for path in violations:
|
|
65
|
+
rel = path.relative_to(REPO_ROOT)
|
|
66
|
+
print(f" {rel}", file=sys.stderr)
|
|
67
|
+
print(
|
|
68
|
+
"\n Move them under "
|
|
69
|
+
"scripts/ai_council/one_off_archive/<YYYY-MM>/ "
|
|
70
|
+
"(see that folder's README.md).",
|
|
71
|
+
file=sys.stderr,
|
|
72
|
+
)
|
|
73
|
+
return 1
|
|
74
|
+
|
|
75
|
+
if not args.quiet:
|
|
76
|
+
print("✅ all _one_off_*.py scripts are archived")
|
|
77
|
+
return 0
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
if __name__ == "__main__": # pragma: no cover
|
|
81
|
+
sys.exit(main())
|
|
@@ -274,6 +274,12 @@ def check_file(filepath: Path, artifacts: dict[str, set[str]], root: Path) -> Li
|
|
|
274
274
|
if (prefix / rel).exists():
|
|
275
275
|
resolved = True
|
|
276
276
|
break
|
|
277
|
+
# `agents/state/*.json` are runtime hook state files —
|
|
278
|
+
# gitignored, written by hooks at session/turn time, never
|
|
279
|
+
# committed. Prose references to them are descriptive, not
|
|
280
|
+
# checkable file paths.
|
|
281
|
+
if not resolved and raw_ref.startswith("agents/state/"):
|
|
282
|
+
resolved = True
|
|
277
283
|
if not resolved:
|
|
278
284
|
broken.append(BrokenRef(
|
|
279
285
|
file=str(filepath), line=i, ref=m.group(1),
|
package/scripts/compress.py
CHANGED
|
@@ -561,8 +561,11 @@ def project_to_augment() -> None:
|
|
|
561
561
|
dst.symlink_to(Path("..") / ".agent-src" / name)
|
|
562
562
|
print(f" ✅ Symlinked .augment/{name} → ../.agent-src/{name}")
|
|
563
563
|
|
|
564
|
-
# Cleanup: remove any stray top-level entries in .augment/ that are no longer projected
|
|
565
|
-
|
|
564
|
+
# Cleanup: remove any stray top-level entries in .augment/ that are no longer projected.
|
|
565
|
+
# `state` holds runtime state files written by hooks (onboarding-gate,
|
|
566
|
+
# context-hygiene, …) and must survive sync — it is regenerated by
|
|
567
|
+
# the next hook fire, not by compress.
|
|
568
|
+
known = set(AUGMENT_SYMLINK_DIRS) | set(AUGMENT_SYMLINK_FILES) | {"rules", "state"}
|
|
566
569
|
for item in AUGMENT_DIR.iterdir():
|
|
567
570
|
if item.name in known:
|
|
568
571
|
continue
|