@event4u/agent-config 1.17.0 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/.agent-src/rules/context-hygiene.md +6 -0
  2. package/.agent-src/rules/direct-answers.md +17 -26
  3. package/.agent-src/rules/no-cheap-questions.md +14 -21
  4. package/.agent-src/rules/onboarding-gate.md +7 -0
  5. package/.agent-src/rules/roadmap-progress-sync.md +27 -0
  6. package/.agent-src/rules/rule-type-governance.md +28 -0
  7. package/.agent-src/templates/roadmaps.md +4 -0
  8. package/.claude-plugin/marketplace.json +1 -1
  9. package/CHANGELOG.md +35 -0
  10. package/README.md +1 -1
  11. package/docs/architecture.md +1 -1
  12. package/docs/contracts/load-context-budget-model.md +80 -0
  13. package/docs/contracts/load-context-schema.md +20 -0
  14. package/docs/contracts/roadmap-complexity-standard.md +137 -0
  15. package/docs/guidelines/agent-infra/ask-when-uncertain-demos.md +134 -0
  16. package/docs/guidelines/agent-infra/direct-answers-demos.md +145 -0
  17. package/docs/guidelines/agent-infra/verify-before-complete-demos.md +128 -0
  18. package/package.json +1 -1
  19. package/scripts/agent-config +20 -0
  20. package/scripts/ai_council/one_off_archive/2026-05/README.md +45 -0
  21. package/scripts/ai_council/one_off_archive/2026-05/_one_off_budget_v2_audit.py +206 -0
  22. package/scripts/build_rule_trigger_matrix.py +360 -0
  23. package/scripts/check_always_budget.py +39 -0
  24. package/scripts/check_one_off_location.py +81 -0
  25. package/scripts/check_references.py +6 -0
  26. package/scripts/compress.py +5 -2
  27. package/scripts/context_hygiene_hook.py +173 -0
  28. package/scripts/hooks/augment-context-hygiene.sh +55 -0
  29. package/scripts/hooks/augment-onboarding-gate.sh +55 -0
  30. package/scripts/install.py +58 -19
  31. package/scripts/lint_examples.py +98 -0
  32. package/scripts/lint_roadmap_complexity.py +127 -0
  33. package/scripts/onboarding_gate_hook.py +137 -0
  34. package/scripts/schemas/rule.schema.json +5 -0
  35. /package/scripts/ai_council/{_one_off_2a4_acceptance.py → one_off_archive/2026-05/_one_off_2a4_acceptance.py} +0 -0
  36. /package/scripts/ai_council/{_one_off_context_layer_v1_estimate.py → one_off_archive/2026-05/_one_off_context_layer_v1_estimate.py} +0 -0
  37. /package/scripts/ai_council/{_one_off_context_layer_v1_review.py → one_off_archive/2026-05/_one_off_context_layer_v1_review.py} +0 -0
  38. /package/scripts/ai_council/{_one_off_followups_review.py → one_off_archive/2026-05/_one_off_followups_review.py} +0 -0
  39. /package/scripts/ai_council/{_one_off_nondestructive_inline_audit.py → one_off_archive/2026-05/_one_off_nondestructive_inline_audit.py} +0 -0
  40. /package/scripts/{_one_off_phase4_dispatch_latency.py → ai_council/one_off_archive/2026-05/_one_off_phase4_dispatch_latency.py} +0 -0
  41. /package/scripts/{_one_off_phase6_trigger_jaccard.py → ai_council/one_off_archive/2026-05/_one_off_phase6_trigger_jaccard.py} +0 -0
  42. /package/scripts/ai_council/{_one_off_phase_2a_budget_rebalance.py → one_off_archive/2026-05/_one_off_phase_2a_budget_rebalance.py} +0 -0
  43. /package/scripts/ai_council/{_one_off_phase_2a_post_revert.py → one_off_archive/2026-05/_one_off_phase_2a_post_revert.py} +0 -0
  44. /package/scripts/ai_council/{_one_off_rebalancing_audit.py → one_off_archive/2026-05/_one_off_rebalancing_audit.py} +0 -0
  45. /package/scripts/ai_council/{_one_off_roundtrip.py → one_off_archive/2026-05/_one_off_roundtrip.py} +0 -0
  46. /package/scripts/ai_council/{_one_off_rule_hardening_v1.py → one_off_archive/2026-05/_one_off_rule_hardening_v1.py} +0 -0
  47. /package/scripts/ai_council/{_one_off_structural_open_questions.py → one_off_archive/2026-05/_one_off_structural_open_questions.py} +0 -0
  48. /package/scripts/ai_council/{_one_off_structural_optimization.py → one_off_archive/2026-05/_one_off_structural_optimization.py} +0 -0
  49. /package/scripts/ai_council/{_one_off_structural_v3_gaps.py → one_off_archive/2026-05/_one_off_structural_v3_gaps.py} +0 -0
  50. /package/scripts/ai_council/{_one_off_structural_v3_review.py → one_off_archive/2026-05/_one_off_structural_v3_review.py} +0 -0
@@ -0,0 +1,206 @@
1
+ """Council audit of Budget-v2 result (Phase 4.5 of road-to-context-layer-maturity).
2
+
3
+ Phase 4 of road-to-context-layer-maturity selected two 4d-trim paths
4
+ (`direct-answers`, `no-cheap-questions`) from a fixed option set
5
+ documented in agents/contexts/budget-v2-matrix.md and shipped them.
6
+ Exit-gate actuals (run 2026-05-04): total 44,928 / 49,000 chars
7
+ (91.7 %, 4,072 chars headroom) — ≥ 4,000 headroom goal hit. Top-3
8
+ sum unchanged. Safety-floor rules untouched.
9
+
10
+ Phase 4.5 requires a council audit before archival: confirm the
11
+ trim choices were sound, no semantic drift introduced, no better
12
+ path missed inside the Phase 4 inputs gate.
13
+
14
+ Invocation:
15
+ .venv/bin/python -m scripts.ai_council.one_off_archive.2026-05._one_off_budget_v2_audit
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import sys
20
+ from pathlib import Path
21
+
22
+ from scripts.ai_council.bundler import bundle_files
23
+ from scripts.ai_council.clients import (
24
+ AnthropicClient,
25
+ OpenAIClient,
26
+ load_anthropic_key,
27
+ load_openai_key,
28
+ )
29
+ from scripts.ai_council.orchestrator import (
30
+ CostBudget,
31
+ CouncilQuestion,
32
+ consult,
33
+ estimate,
34
+ )
35
+ from scripts.ai_council.pricing import estimate_cost, load_prices
36
+ from scripts.ai_council.project_context import detect_project_context
37
+ from scripts.ai_council.session import SessionManifest, save as save_session
38
+
39
+ REPO_ROOT = Path(__file__).resolve().parents[4]
40
+ ARTEFACTS = [
41
+ REPO_ROOT / "docs/contracts/load-context-budget-model.md",
42
+ REPO_ROOT / "agents/contexts/budget-v2-matrix.md",
43
+ REPO_ROOT / ".agent-src.uncompressed/rules/direct-answers.md",
44
+ REPO_ROOT / ".agent-src.uncompressed/rules/no-cheap-questions.md",
45
+ ]
46
+
47
+ ORIGINAL_ASK = (
48
+ "Phase 4 of road-to-context-layer-maturity trimmed two always-rules "
49
+ "(direct-answers, no-cheap-questions) under the locked Model (b) "
50
+ "literal budget contract, hitting the ≥ 4,000-chars headroom goal "
51
+ "(actual: 4,072). Council task: audit the trim choices for "
52
+ "soundness and semantic drift before roadmap archival."
53
+ )
54
+
55
+ REVIEW_PROMPT = """\
56
+ # Council Audit — Budget-v2 Trim Result (Phase 4.5)
57
+
58
+ ## Context
59
+
60
+ Phase 4 selected two 4d-trim paths from a fixed option set documented
61
+ in `budget-v2-matrix.md`. The matrix evaluated 4a (demote→auto), 4b
62
+ (merge), 4c (shared-context, locked at 3a Model (b) literal — no-op),
63
+ and 4d (compress prose) for every touchable always-rule. Safety-floor
64
+ rules (scope-control, non-destructive-by-default, commit-policy,
65
+ agent-authority) were untouchable. Outcome-untested rules were
66
+ restricted to 4d only per the Phase 4.0 inputs gate.
67
+
68
+ ## Selected paths and result
69
+
70
+ - **4d on `direct-answers`** — emoji-scope subsection trimmed,
71
+ failure-mode collapsed to pointer. Δ ext: 4,098 → 3,987 (−111).
72
+ - **4d on `no-cheap-questions`** — "What counts as cheap" subsection
73
+ collapsed to pointer at `asking-and-brevity-examples.md`. Δ ext:
74
+ 4,257 → 3,933 (−324).
75
+
76
+ Combined: −435 chars · headroom 3,637 → 4,072 (+435) · top-3 sum
77
+ unchanged · safety-floor rules untouched.
78
+
79
+ ## Audit questions (please address each)
80
+
81
+ 1. **Trim soundness** — do the surviving Iron Laws in both rules still
82
+ carry the rule's purpose, or did the prose trim sacrifice precision?
83
+ Cite the specific subsection if you find drift.
84
+
85
+ 2. **Path selection** — was 4d the right choice for these two rules
86
+ given the matrix? Or should one of the deferred paths (4a, 4b)
87
+ have been picked despite the matrix verdict?
88
+
89
+ 3. **Missed leverage** — inside the Phase 4 inputs gate (4d only on
90
+ outcome-untested rules; safety-floor untouchable), is there a
91
+ higher-leverage 4d target the matrix missed?
92
+
93
+ 4. **Headroom durability** — 4,072 chars is +72 over the 4,000 goal.
94
+ Is this margin stable against expected near-term rule edits, or
95
+ should Phase 5 be tightened to defend it?
96
+
97
+ ## Output Contract (STRICT)
98
+
99
+ ```
100
+ ### Verdict
101
+ **Trim choices sound:** <YES — archive · NO — escalate>
102
+ **One-sentence rationale:** <≤ 30 words>
103
+ ```
104
+
105
+ ```
106
+ ### Per-question findings (1–4 above)
107
+ 1. <≤ 2 sentences>
108
+ 2. <≤ 2 sentences>
109
+ 3. <≤ 2 sentences>
110
+ 4. <≤ 2 sentences>
111
+ ```
112
+
113
+ ```
114
+ ### Risk note
115
+ **Single biggest residual risk:** <one sentence>
116
+ **Mitigation (if any):** <one sentence or NONE>
117
+ ```
118
+
119
+ Be decisive — total response ≤ 800 words. Artefacts follow verbatim.
120
+ """
121
+
122
+
123
+ def main() -> int:
124
+ anthropic = AnthropicClient(api_key=load_anthropic_key(), model="claude-sonnet-4-5")
125
+ openai = OpenAIClient(api_key=load_openai_key(), model="gpt-4o")
126
+ members = [anthropic, openai]
127
+
128
+ context = bundle_files(ARTEFACTS)
129
+ project = detect_project_context(REPO_ROOT)
130
+ table = load_prices()
131
+
132
+ user_prompt = REVIEW_PROMPT + "\n\n---\n\n" + context.text
133
+
134
+ question = CouncilQuestion(mode="files", user_prompt=user_prompt, max_tokens=2048)
135
+
136
+ estimates = estimate(question, members, table, project=project, original_ask=ORIGINAL_ASK)
137
+ print("=== ESTIMATE (single round) ===")
138
+ total_est = 0.0
139
+ for c, e in zip(members, estimates):
140
+ print(f" {c.name}/{c.model}: ~{e.input_tokens} in + {e.output_tokens} out = ${e.total_usd:.4f}")
141
+ total_est += e.total_usd
142
+ print(f" TOTAL per round (max): ${total_est:.4f}\n")
143
+
144
+ budget = CostBudget(
145
+ max_input_tokens=200_000,
146
+ max_output_tokens=80_000,
147
+ max_calls=20,
148
+ max_total_usd=2.50,
149
+ )
150
+
151
+ rounds_collected: list[list] = []
152
+
153
+ def _on_round_complete(round_idx: int, round_responses) -> None:
154
+ rounds_collected.append(list(round_responses))
155
+ print(f"=== ROUND {round_idx + 1} COMPLETE ===")
156
+ for r in round_responses:
157
+ if r.error:
158
+ print(f" [error] {r.provider}/{r.model}: {r.error}")
159
+ continue
160
+ actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
161
+ print(f" [done] {r.provider}/{r.model}: {r.input_tokens} in / "
162
+ f"{r.output_tokens} out · {r.latency_ms} ms · ${actual.total_usd:.4f}")
163
+ print()
164
+
165
+ print("=== CONSULT (1 round, Phase 4.5 Budget-v2 audit) ===")
166
+ consult(
167
+ members, question, budget,
168
+ rounds=1,
169
+ on_round_complete=_on_round_complete,
170
+ table=table, project=project, original_ask=ORIGINAL_ASK,
171
+ )
172
+
173
+ if not rounds_collected:
174
+ print("[error] no rounds completed", file=sys.stderr)
175
+ return 1
176
+
177
+ actual_total = 0.0
178
+ for round_responses in rounds_collected:
179
+ for r in round_responses:
180
+ if r.error:
181
+ continue
182
+ actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
183
+ actual_total += actual.total_usd
184
+ print(f"=== TOTAL ACTUAL: ${actual_total:.4f} ===")
185
+
186
+ final_round = rounds_collected[-1]
187
+ if not [r for r in final_round if not r.error]:
188
+ return 1
189
+
190
+ manifest = SessionManifest(
191
+ mode="files",
192
+ artefact="agents/roadmaps/road-to-context-layer-maturity.md",
193
+ original_ask=ORIGINAL_ASK,
194
+ members=[f"{r.provider}/{r.model}" for r in final_round],
195
+ rounds=len(rounds_collected),
196
+ cost_usd_estimated=total_est,
197
+ cost_usd_actual=actual_total,
198
+ extra={"purpose": "Phase 4.5 Budget-v2 trim-result audit"},
199
+ )
200
+ session_dir = save_session(manifest=manifest, responses=rounds_collected)
201
+ print(f"[saved] {session_dir.relative_to(REPO_ROOT)}/")
202
+ return 1 if any(r.error for round_r in rounds_collected for r in round_r) else 0
203
+
204
+
205
+ if __name__ == "__main__":
206
+ raise SystemExit(main())
@@ -0,0 +1,360 @@
1
+ #!/usr/bin/env python3
2
+ """Build agents/contexts/rule-trigger-matrix.md.
3
+
4
+ Emits a single matrix mapping every rule in `.agent-src.uncompressed/rules/`
5
+ to its trigger event, observability, enforcement surface, hook-cost
6
+ estimate, and Tier classification. Sourced from the Phase 1 inventory of
7
+ `road-to-rule-hardening.md` plus `road-to-context-layer-maturity.md`
8
+ Phase 1 (`load_context:` chains).
9
+
10
+ Exit 0 always; this is a generator, not a gate.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ import yaml
19
+
20
+ REPO_ROOT = Path(__file__).resolve().parents[1]
21
+ SRC_RULES = REPO_ROOT / ".agent-src.uncompressed" / "rules"
22
+ COMP_RULES = REPO_ROOT / ".agent-src" / "rules"
23
+ SRC_PREFIX = ".agent-src.uncompressed/"
24
+ COMP_PREFIX = ".agent-src/"
25
+ OUT = REPO_ROOT / "agents" / "contexts" / "rule-trigger-matrix.md"
26
+
27
+ # Classification table — one row per rule. See § Methodology in the
28
+ # generated file for the column meanings. Sourced from the Phase 1 audit;
29
+ # reviewed entries carry an empirical signal in `notes`.
30
+ #
31
+ # Columns: trigger, observability, enforcement, hook_cost, tier, dormant
32
+ #
33
+ # trigger: when the rule should fire
34
+ # observability: agent-only | hook | settings | mechanical-already
35
+ # enforcement: output | tool-call | state | hook | none
36
+ # hook_cost: low | medium | high | NA-mechanical | NA-soft
37
+ # tier: 1 | 2a | 2b | 3 | safety-floor | mechanical-already
38
+ # dormant: no | suspected | unknown
39
+ CLASSIFICATION: dict[str, dict] = {}
40
+
41
+
42
+ def add(name, trigger, obs, enf, cost, tier, dormant="no", notes=""):
43
+ CLASSIFICATION[name] = dict(
44
+ trigger=trigger, observability=obs, enforcement=enf,
45
+ hook_cost=cost, tier=tier, dormant=dormant, notes=notes,
46
+ )
47
+
48
+
49
+ # ── Always-rules — safety floor (out of scope for hardening) ──────────
50
+ add("non-destructive-by-default.md", "destructive-op intent", "agent-only",
51
+ "tool-call", "NA-soft", "safety-floor", notes="Safety floor — Iron Law, not modified")
52
+ add("commit-policy.md", "commit intent", "agent-only", "tool-call",
53
+ "NA-soft", "safety-floor", notes="Safety floor — never-ask Iron Law")
54
+ add("scope-control.md", "git-op / refactor intent", "agent-only", "tool-call",
55
+ "NA-soft", "safety-floor", notes="Safety floor — permission gate")
56
+ add("verify-before-complete.md", "completion claim", "agent-only", "output",
57
+ "low", "2b",
58
+ notes="Pre-PR/commit gate. Hookable: detect 'done'/'complete' in reply, require fresh test/quality output in same turn.")
59
+
60
+ # ── Always-rules — Iron-Law pre-send (Tier 3, soft by construction) ───
61
+ add("agent-authority.md", "every turn (router)", "agent-only", "none",
62
+ "NA-soft", "3", notes="Priority index, no trigger of its own")
63
+ add("ask-when-uncertain.md", "pre-send vague-detection", "agent-only", "output",
64
+ "NA-soft", "3", notes="One-question-per-turn — output-rewrite would be needed")
65
+ add("direct-answers.md", "pre-send (every reply)", "agent-only", "output",
66
+ "NA-soft", "3", notes="No-flattery + verify + brevity Iron Laws")
67
+ add("language-and-tone.md", "pre-send language detection", "agent-only", "output",
68
+ "medium", "3",
69
+ notes="Hook could detect German trigger words in last user msg + flag drift. Best-effort marker only.")
70
+ add("no-cheap-questions.md", "pre-send Q&A check", "agent-only", "output",
71
+ "NA-soft", "3", notes="Pre-send self-check, no platform surface")
72
+
73
+ # ── Auto-rules — Tier 1 candidates (mechanizable, deterministic) ──────
74
+ add("chat-history-cadence.md", "per-turn / per-tool / per-phase", "mechanical-already",
75
+ "hook", "NA-mechanical", "mechanical-already",
76
+ notes="PRECEDENT — heartbeat + chat_history.py + hooks. Reference pattern.")
77
+ add("chat-history-ownership.md", "first turn", "hook", "state",
78
+ "low", "1", notes="Detectable: ownership classification at session start")
79
+ add("chat-history-visibility.md", "heartbeat marker emit", "mechanical-already",
80
+ "hook", "NA-mechanical", "mechanical-already",
81
+ notes="Subprocess marker print is already mechanical")
82
+ add("onboarding-gate.md", "first turn (settings.onboarded == false)", "settings",
83
+ "state", "low", "1",
84
+ notes="Pilot candidate — frequency 100% on un-onboarded projects, binary verifiable")
85
+ add("roadmap-progress-sync.md", "file-edit on agents/roadmaps/**", "hook",
86
+ "tool-call", "low", "1",
87
+ notes="Pilot 1 (smallest hook). PostToolUse path filter; already documented in mechanics context.")
88
+ add("context-hygiene.md", "turn counter / tool-loop / topic shift", "hook",
89
+ "state", "medium", "1",
90
+ notes="Per-turn counter + tool-call repetition detector. Cross-platform persistence is the cost driver.")
91
+ add("size-enforcement.md", "file save on .agent-src.uncompressed/{skills,rules,commands}/**",
92
+ "mechanical-already", "tool-call", "NA-mechanical", "mechanical-already",
93
+ notes="Enforced by skill_linter.py + check_always_budget.py")
94
+ add("no-roadmap-references.md", "file save on stable artifacts", "mechanical-already",
95
+ "tool-call", "NA-mechanical", "mechanical-already",
96
+ notes="Enforced by scripts/check_no_roadmap_refs.py (CI gate)")
97
+ add("augment-portability.md", "file save on .agent-src/**", "mechanical-already",
98
+ "tool-call", "NA-mechanical", "mechanical-already",
99
+ notes="Enforced by scripts/check_portability.py")
100
+ add("augment-source-of-truth.md", "file save on .agent-src/ or .augment/",
101
+ "hook", "tool-call", "low", "1",
102
+ notes="Pre-write hook: refuse writes to generated dirs")
103
+ add("package-ci-checks.md", "pre-push to remote", "mechanical-already",
104
+ "hook", "NA-mechanical", "mechanical-already",
105
+ notes="task ci is the gate")
106
+ add("artifact-engagement-recording.md", "phase-step / task end", "mechanical-already",
107
+ "hook", "NA-mechanical", "mechanical-already",
108
+ notes="telemetry:record subprocess is already mechanical")
109
+
110
+ # ── Auto-rules — Tier 2a candidates (marker nudge) ────────────────────
111
+ add("model-recommendation.md", "task-start / topic-shift", "hook",
112
+ "output", "low", "2a",
113
+ notes="Phase 5 prototype target. Marker injection at first user msg + topic-change detection.")
114
+ add("capture-learnings.md", "task completion", "hook", "output",
115
+ "medium", "2a", notes="Post-task marker; learning detection is fuzzy")
116
+ add("skill-improvement-trigger.md", "task completion (settings.skill_improvement)",
117
+ "settings", "state", "low", "2a",
118
+ notes="Settings-flag observable; pipeline already exists")
119
+ add("commit-conventions.md", "commit message draft", "hook", "output",
120
+ "low", "2a", notes="Hook on /commit invocation, marker for conventional-commits format")
121
+ add("docs-sync.md", "file-edit on .augment/{skills,rules,commands}/**", "hook",
122
+ "tool-call", "medium", "2a",
123
+ notes="Detect add/rename/delete; remind to update count + cross-refs")
124
+ add("agent-docs.md", "file-edit on agents/docs/, AGENTS.md", "hook",
125
+ "tool-call", "medium", "2a", notes="Path-pattern based marker")
126
+ add("upstream-proposal.md", "skill/rule create event", "hook", "output",
127
+ "medium", "2a", notes="Marker after new artifact lands")
128
+ add("review-routing-awareness.md", "PR-prep / risk flagging", "hook",
129
+ "output", "medium", "2a",
130
+ notes="Marker when /create-pr or risk-tagging keywords detected")
131
+ add("reviewer-awareness.md", "PR-prep", "hook", "output",
132
+ "medium", "2a", notes="Reviewer-suggestion marker at PR creation")
133
+ add("security-sensitive-stop.md", "file-edit on auth/billing/secrets paths",
134
+ "hook", "tool-call", "low", "2a",
135
+ notes="Path-pattern based marker — strong candidate for low-cost hook")
136
+ add("cli-output-handling.md", "tool-call (verbose CLI)", "hook", "tool-call",
137
+ "low", "2a", notes="Pre-tool-call marker on git/test/lint invocations")
138
+ add("artifact-drafting-protocol.md", "skill/rule create or major rewrite",
139
+ "hook", "output", "medium", "2a",
140
+ notes="Marker on file-create in .agent-src.uncompressed/{skills,rules,commands}/")
141
+ add("missing-tool-handling.md", "tool failure (command not found)", "hook",
142
+ "output", "low", "2a", notes="Post-tool-failure marker — strong fit")
143
+ add("token-efficiency.md", "every reply / verbose-tool invocation", "hook",
144
+ "output", "medium", "2a", notes="Soft Iron Law; nudge via verbose-output detection")
145
+ add("rule-type-governance.md", "rule create/edit", "hook", "tool-call",
146
+ "low", "2a", notes="Linter could enforce; currently advisory")
147
+ add("role-mode-adherence.md", "settings.roles.active_role set", "settings",
148
+ "output", "low", "2a", notes="Mode marker emit at turn end")
149
+
150
+ # ── Auto-rules — Tier 2b (structured injection / gate) ────────────────
151
+ add("downstream-changes.md", "post-edit (callsite check)", "hook",
152
+ "tool-call", "high", "2b",
153
+ notes="Requires callsite analysis — codebase-retrieval-style query. High cost, high value.")
154
+ add("ui-audit-gate.md", "pre-edit on UI files (settings.state.ui_audit empty)",
155
+ "settings", "tool-call", "medium", "2b",
156
+ notes="Block edit until state.ui_audit populated")
157
+ add("preservation-guard.md", "skill/rule merge or compress", "hook",
158
+ "tool-call", "medium", "2b",
159
+ notes="Pre-merge structured check — diff-shape verifiable")
160
+ add("minimal-safe-diff.md", "every diff", "hook", "tool-call",
161
+ "high", "2b", notes="Diff-shape check; reformatting/drive-by detection is fuzzy")
162
+ add("improve-before-implement.md", "task-start (implementation intent)",
163
+ "hook", "output", "medium", "2b",
164
+ notes="Pre-implementation gate; could inject 'validated?' field requirement")
165
+ add("think-before-action.md", "pre-edit", "hook", "output",
166
+ "medium", "2b", notes="Pre-tool-call marker requiring analysis-first")
167
+ add("runtime-safety.md", "skill metadata change", "hook", "tool-call",
168
+ "low", "2b", notes="Linter-enforceable on skill frontmatter")
169
+ add("tool-safety.md", "skill creation (external tool decl)", "hook",
170
+ "tool-call", "low", "2b", notes="Allowlist-enforceable in skill linter")
171
+ add("skill-quality.md", "skill create/edit", "mechanical-already",
172
+ "tool-call", "NA-mechanical", "mechanical-already",
173
+ notes="Enforced by scripts/skill_linter.py")
174
+ add("markdown-safe-codeblocks.md", "markdown output with code", "hook",
175
+ "output", "medium", "2b", notes="Output-shape check; nesting detection")
176
+
177
+ # ── Auto-rules — Tier 3 (inherent soft / topic-only triggers) ─────────
178
+ add("autonomous-execution.md", "workflow decision (trivial vs blocking)",
179
+ "agent-only", "output", "NA-soft", "3",
180
+ notes="Disposition rule; trivial classification is judgment")
181
+ add("user-interaction.md", "pre-send (every Q&A reply)", "agent-only",
182
+ "output", "NA-soft", "3", notes="Numbered-options Iron Law")
183
+ add("guidelines.md", "before code edit (topic match)", "agent-only",
184
+ "output", "NA-soft", "3", notes="Generic 'check guidelines' nudge")
185
+ add("architecture.md", "new file/class/module creation", "agent-only",
186
+ "output", "NA-soft", "3", notes="Architectural decisions — judgment-bound")
187
+ add("php-coding.md", "PHP file edit", "agent-only", "output",
188
+ "NA-soft", "3", notes="Topic-matched coding guideline")
189
+ add("laravel-translations.md", "lang/ file edit", "hook", "tool-call",
190
+ "low", "2a", dormant="suspected",
191
+ notes="Path-pattern detectable but rare in this repo")
192
+ add("e2e-testing.md", "Playwright file edit", "agent-only", "output",
193
+ "NA-soft", "3", notes="Topic-matched")
194
+ add("docker-commands.md", "PHP CLI in Docker context", "agent-only",
195
+ "output", "NA-soft", "3", notes="Topic-matched")
196
+
197
+ # ── Suspected-dormant entries (per roadmap RH Phase 1 explicit list) ──
198
+ add("command-suggestion-policy.md", "user prompt match (engine-driven)",
199
+ "mechanical-already", "hook", "NA-mechanical", "mechanical-already",
200
+ dormant="suspected",
201
+ notes="Engine in scripts/command_suggester/ exists; live-fire signal unverified — needs telemetry pass")
202
+ add("slash-command-routing-policy.md", "user msg starts with /",
203
+ "hook", "tool-call", "low", "1", dormant="suspected",
204
+ notes="Pattern-detection; live-fire signal unverified")
205
+ add("analysis-skill-routing.md", "analysis skill picker", "agent-only",
206
+ "output", "NA-soft", "3", dormant="suspected",
207
+ notes="Skill-router; no observable surface today")
208
+
209
+
210
+ def fm(path):
211
+ txt = path.read_text(encoding="utf-8")
212
+ if not txt.startswith("---\n"):
213
+ return {}
214
+ end = txt.find("\n---\n", 4)
215
+ if end == -1:
216
+ return {}
217
+ try:
218
+ return yaml.safe_load(txt[4:end]) or {}
219
+ except yaml.YAMLError:
220
+ return {}
221
+
222
+
223
+ def to_comp(entry: str) -> Path:
224
+ if entry.startswith(SRC_PREFIX):
225
+ return REPO_ROOT / (COMP_PREFIX + entry[len(SRC_PREFIX):])
226
+ return REPO_ROOT / entry
227
+
228
+
229
+ def walk(rule: Path):
230
+ seen: set[Path] = set()
231
+ chains: list[tuple[str, int]] = []
232
+ stack = [(rule, 0, "")]
233
+ while stack:
234
+ node, depth, _ = stack.pop()
235
+ for entry in (fm(node).get("load_context") or []) + (fm(node).get("load_context_eager") or []):
236
+ comp = to_comp(str(entry))
237
+ if depth + 1 > 2 or not comp.exists() or comp in seen:
238
+ continue
239
+ seen.add(comp)
240
+ chains.append((str(entry), comp.stat().st_size))
241
+ stack.append((comp, depth + 1, str(entry)))
242
+ return chains
243
+
244
+
245
+ def emit():
246
+ rules = sorted(SRC_RULES.glob("*.md"))
247
+ rows = []
248
+ for r in rules:
249
+ f = fm(r)
250
+ rtype = f.get("type", "?")
251
+ comp = COMP_RULES / r.name
252
+ raw = comp.stat().st_size if comp.exists() else r.stat().st_size
253
+ ctx_chains = walk(comp if comp.exists() else r)
254
+ ext = raw + sum(s for _, s in ctx_chains)
255
+ rows.append((r.name, rtype, raw, ext, ctx_chains))
256
+
257
+ lines: list[str] = []
258
+ lines.append("# Rule Trigger Matrix")
259
+ lines.append("")
260
+ lines.append("**Source:** Phase 1 of `road-to-rule-hardening.md` (self-check audit) +")
261
+ lines.append("Phase 1 of `road-to-context-layer-maturity.md` (`load_context:` inventory).")
262
+ lines.append("**Generated by:** `scripts/build_rule_trigger_matrix.py` — re-run after rule")
263
+ lines.append("set changes. Manual classifications live in the script's `CLASSIFICATION`")
264
+ lines.append("table; size and context-chain columns are derived from the rule files.")
265
+ lines.append("")
266
+ lines.append("## Methodology")
267
+ lines.append("")
268
+ lines.append("| Column | Meaning |")
269
+ lines.append("|---|---|")
270
+ lines.append("| `type` | Frontmatter `type` (`always` / `auto`) |")
271
+ lines.append("| `raw` | Compressed rule size in chars (`.agent-src/rules/<name>`) |")
272
+ lines.append("| `ext` | Extended size under Model (b): raw + transitive `load_context` |")
273
+ lines.append("| `trigger` | Observable event that should activate the rule |")
274
+ lines.append("| `obs` | Where the trigger is observable: `hook` (platform hook), `settings` (`.agent-settings.yml` state), `agent-only` (in-head), `mechanical-already` (precedent — already enforced by a script) |")
275
+ lines.append("| `enforce` | Surface where the rule's effect lands: `output` / `tool-call` / `state` / `hook` / `none` |")
276
+ lines.append("| `hook-cost` | Engineering cost to mechanise across Augment + Claude Code: `low` (≤ 1 day, single hook script), `medium` (1–3 days, cross-platform persistence), `high` (≥ 3 days, semantic analysis or output rewrite), `NA-mechanical` (precedent — script exists), `NA-soft` (no platform mechanism plausible) |")
277
+ lines.append("| `tier` | Per RH roadmap: `1` mechanical · `2a` marker nudge · `2b` structured injection · `3` inherent soft · `safety-floor` (Iron-Law, never modified) · `mechanical-already` (precedent) |")
278
+ lines.append("| `dormant?` | Has the rule observably fired? `no` (yes, fires) · `suspected` (per RH Phase 1 explicit list) · `unknown` |")
279
+ lines.append("")
280
+ lines.append("## Tier counts")
281
+ lines.append("")
282
+ by_tier: dict[str, list[str]] = {}
283
+ for name, _, _, _, _ in rows:
284
+ t = CLASSIFICATION.get(name, {}).get("tier", "?")
285
+ by_tier.setdefault(t, []).append(name)
286
+ for t in ("safety-floor", "mechanical-already", "1", "2a", "2b", "3", "?"):
287
+ if t in by_tier:
288
+ lines.append(f"- **Tier `{t}`** — {len(by_tier[t])} rules")
289
+ lines.append("")
290
+ lines.append("## Matrix")
291
+ lines.append("")
292
+ lines.append("| Rule | type | raw | ext | trigger | obs | enforce | hook-cost | tier | dormant? | notes |")
293
+ lines.append("|---|---|---:|---:|---|---|---|---|---|---|---|")
294
+ for name, rtype, raw, ext, _ in rows:
295
+ c = CLASSIFICATION.get(name)
296
+ if c is None:
297
+ lines.append(f"| `{name}` | {rtype} | {raw} | {ext} | — | — | — | — | **?** | unknown | NOT CLASSIFIED |")
298
+ continue
299
+ lines.append(
300
+ f"| `{name}` | {rtype} | {raw} | {ext} | "
301
+ f"{c['trigger']} | {c['observability']} | {c['enforcement']} | "
302
+ f"{c['hook_cost']} | {c['tier']} | {c['dormant']} | {c['notes']} |"
303
+ )
304
+ lines.append("")
305
+ lines.append("## `load_context:` chains (CL Phase 1 inventory)")
306
+ lines.append("")
307
+ lines.append("Rules that load at least one context, with `rule → context → depth → chars`.")
308
+ lines.append("Chars are measured on the compressed context file (Model (b) literal).")
309
+ lines.append("")
310
+ lines.append("| Rule | Context | Depth | Chars |")
311
+ lines.append("|---|---|---:|---:|")
312
+ for name, _, _, _, chains in rows:
313
+ if not chains:
314
+ continue
315
+ for entry, size in chains:
316
+ depth = entry.count("/") - entry.count("contexts/") + 1 # heuristic
317
+ depth = 1 # all entries from this script are top-level (depth 1) since walk() returns flattened set
318
+ lines.append(f"| `{name}` | `{entry}` | {depth} | {size} |")
319
+ lines.append("")
320
+ lines.append("## Dormant-suspected (per RH Phase 1)")
321
+ lines.append("")
322
+ dormants = [n for n, c in CLASSIFICATION.items() if c["dormant"] == "suspected"]
323
+ for d in sorted(dormants):
324
+ lines.append(f"- `{d}` — {CLASSIFICATION[d]['notes']}")
325
+ lines.append("")
326
+ lines.append("**Action:** absence of failures ≠ healthy trigger. Each suspected-dormant")
327
+ lines.append("rule needs a one-session live-fire test before its Tier classification is")
328
+ lines.append("locked. Tracked under RH Phase 1 follow-up.")
329
+ lines.append("")
330
+ lines.append("## Pilot candidates (RH Phase 3)")
331
+ lines.append("")
332
+ lines.append("Per the RH roadmap pilot-selection criteria (frequency ≥ 30 %, ≥ 2 observed")
333
+ lines.append("failures, binary-verifiable trigger, hook-cost = `low`):")
334
+ lines.append("")
335
+ lines.append("1. **`roadmap-progress-sync`** — file-edit hook on `agents/roadmaps/**`, low cost, deterministic.")
336
+ lines.append("2. **`onboarding-gate`** — first-turn settings check, 100 % frequency on un-onboarded projects.")
337
+ lines.append("3. **`context-hygiene`** — turn counter, medium cost (cross-platform persistence).")
338
+ lines.append("")
339
+ lines.append("Order locked in RH Phase 3: 1 → 2 → 3 (smallest hook first).")
340
+ lines.append("")
341
+ lines.append("## Cross-references")
342
+ lines.append("")
343
+ lines.append("- Budget contract: [`docs/contracts/load-context-budget-model.md`](../../docs/contracts/load-context-budget-model.md)")
344
+ lines.append("- Pattern precedent: `chat-history-cadence` (heartbeat hook + `scripts/chat_history.py`)")
345
+ lines.append("- Phase 2A finding: [`adr-always-rule-context-split-not-viable.md`](adr-always-rule-context-split-not-viable.md)")
346
+ lines.append("")
347
+
348
+ OUT.parent.mkdir(parents=True, exist_ok=True)
349
+ OUT.write_text("\n".join(lines), encoding="utf-8")
350
+ print(f"✅ Wrote {OUT.relative_to(REPO_ROOT)} ({len(rows)} rules, {len(lines)} lines)")
351
+ # Sanity: every rule classified
352
+ missing = [n for n, *_ in rows if n not in CLASSIFICATION]
353
+ if missing:
354
+ print(f"⚠️ {len(missing)} rule(s) not classified: {missing}", file=sys.stderr)
355
+ return 2
356
+ return 0
357
+
358
+
359
+ if __name__ == "__main__":
360
+ sys.exit(emit())
@@ -69,6 +69,12 @@ TOLERANCE_BAND = 0.02
69
69
  PER_RULE_CAP = 6_000
70
70
  TOP3_CAP = TOTAL_CAP // 2
71
71
  MAX_DEPTH = 2
72
+ # Phase 1.3 Q2 (road-to-context-layer-maturity) — per-rule context count
73
+ # cap. Counts top-level `load_context:` + `load_context_eager:` entries
74
+ # per rule (not transitive depth). Empirical max in the rule set is 3
75
+ # (autonomous-execution); a 4th declared context is the structural
76
+ # signal that the rule should split, not load more.
77
+ MAX_CONTEXTS_PER_RULE = 3
72
78
 
73
79
  # Recovery band (AI Council session 2026-05-03T12-02-42Z, verdict A1).
74
80
  # When enabled, a branch in the 90–100 % gap zone passes as WARN iff its
@@ -171,6 +177,29 @@ def _always_rules() -> list[Path]:
171
177
  return sorted(p for p in RULES_DIR.glob("*.md") if _is_always(p))
172
178
 
173
179
 
180
+ def _all_rules() -> list[Path]:
181
+ return sorted(RULES_DIR.glob("*.md"))
182
+
183
+
184
+ def _context_count(rule: Path) -> int:
185
+ fm = _frontmatter(rule)
186
+ lazy = fm.get("load_context") or []
187
+ eager = fm.get("load_context_eager") or []
188
+ return (len(lazy) if isinstance(lazy, list) else 0) + (
189
+ len(eager) if isinstance(eager, list) else 0
190
+ )
191
+
192
+
193
+ def _per_rule_count_breaches() -> list[tuple[str, int]]:
194
+ """Phase 1.3 Q2 — return rules whose declared context count exceeds the cap."""
195
+ out: list[tuple[str, int]] = []
196
+ for rule in _all_rules():
197
+ n = _context_count(rule)
198
+ if n > MAX_CONTEXTS_PER_RULE:
199
+ out.append((rule.name, n))
200
+ return out
201
+
202
+
174
203
  def _extended_size(rule: Path) -> tuple[int, list[tuple[str, str]]]:
175
204
  raw = rule.stat().st_size
176
205
  contexts, violations = _walk_contexts(rule)
@@ -298,6 +327,7 @@ def main() -> int:
298
327
  single_breaches, top3_concentration_breach = _concentration_check(
299
328
  sizes, total_ext
300
329
  )
330
+ count_breaches = _per_rule_count_breaches()
301
331
  failing = (
302
332
  (
303
333
  pct >= FAIL_THRESHOLD
@@ -312,6 +342,7 @@ def main() -> int:
312
342
  or all_violations
313
343
  or single_breaches
314
344
  or top3_concentration_breach is not None
345
+ or count_breaches
315
346
  )
316
347
  if failing:
317
348
  status, rc = "❌ FAIL", 1
@@ -402,6 +433,14 @@ def main() -> int:
402
433
  f"{sum_:,} ({frac * 100:.1f}%)"
403
434
  )
404
435
 
436
+ if count_breaches:
437
+ details = ", ".join(f"{n}={c}" for n, c in count_breaches)
438
+ print(
439
+ f"\n Per-rule context-count cap breach "
440
+ f"(> {MAX_CONTEXTS_PER_RULE} declared contexts, Q2 "
441
+ f"road-to-context-layer-maturity Phase 1.3): {details}"
442
+ )
443
+
405
444
  # Phase 5.3 — per-rule trend delta vs. previous run.
406
445
  prev = _last_trend()
407
446
  if prev is not None and not args.quiet: