@event4u/agent-config 1.16.0 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/.agent-src/commands/{agents-audit.md → agents/audit.md} +4 -3
  2. package/.agent-src/commands/{agents-cleanup.md → agents/cleanup.md} +12 -6
  3. package/.agent-src/commands/{agents-prepare.md → agents/prepare.md} +4 -3
  4. package/.agent-src/commands/agents.md +46 -0
  5. package/.agent-src/commands/{chat-history-checkpoint.md → chat-history/checkpoint.md} +4 -4
  6. package/.agent-src/commands/{chat-history-clear.md → chat-history/clear.md} +4 -4
  7. package/.agent-src/commands/{chat-history-resume.md → chat-history/resume.md} +4 -4
  8. package/.agent-src/commands/chat-history/show.md +107 -0
  9. package/.agent-src/commands/chat-history.md +33 -89
  10. package/.agent-src/commands/{commit-in-chunks.md → commit/in-chunks.md} +15 -13
  11. package/.agent-src/commands/commit.md +22 -2
  12. package/.agent-src/commands/{context-create.md → context/create.md} +4 -3
  13. package/.agent-src/commands/{context-refactor.md → context/refactor.md} +4 -3
  14. package/.agent-src/commands/context.md +44 -0
  15. package/.agent-src/commands/{copilot-agents-init.md → copilot-agents/init.md} +4 -3
  16. package/.agent-src/commands/{copilot-agents-optimize.md → copilot-agents/optimize.md} +4 -3
  17. package/.agent-src/commands/copilot-agents.md +44 -0
  18. package/.agent-src/commands/council/default.md +221 -0
  19. package/.agent-src/commands/{council-design.md → council/design.md} +6 -5
  20. package/.agent-src/commands/{council-optimize.md → council/optimize.md} +7 -6
  21. package/.agent-src/commands/{council-pr.md → council/pr.md} +6 -5
  22. package/.agent-src/commands/council.md +47 -212
  23. package/.agent-src/commands/{create-pr-description.md → create-pr/description-only.md} +4 -2
  24. package/.agent-src/commands/create-pr.md +26 -5
  25. package/.agent-src/commands/{feature-dev.md → feature/dev.md} +5 -10
  26. package/.agent-src/commands/{feature-explore.md → feature/explore.md} +4 -8
  27. package/.agent-src/commands/{feature-plan.md → feature/plan.md} +4 -8
  28. package/.agent-src/commands/{feature-refactor.md → feature/refactor.md} +4 -8
  29. package/.agent-src/commands/{feature-roadmap.md → feature/roadmap.md} +6 -10
  30. package/.agent-src/commands/feature.md +6 -12
  31. package/.agent-src/commands/{fix-ci.md → fix/ci.md} +4 -8
  32. package/.agent-src/commands/{fix-portability.md → fix/portability.md} +4 -8
  33. package/.agent-src/commands/{fix-pr-bot-comments.md → fix/pr-bots.md} +4 -8
  34. package/.agent-src/commands/{fix-pr-developer-comments.md → fix/pr-developers.md} +4 -8
  35. package/.agent-src/commands/{fix-pr-comments.md → fix/pr.md} +7 -11
  36. package/.agent-src/commands/{fix-references.md → fix/refs.md} +4 -8
  37. package/.agent-src/commands/{fix-seeder.md → fix/seeder.md} +4 -8
  38. package/.agent-src/commands/fix.md +7 -13
  39. package/.agent-src/commands/{do-and-judge.md → judge/on-diff.md} +4 -3
  40. package/.agent-src/commands/judge/solo.md +90 -0
  41. package/.agent-src/commands/{do-in-steps.md → judge/steps.md} +4 -3
  42. package/.agent-src/commands/judge.md +35 -70
  43. package/.agent-src/commands/{memory-add.md → memory/add.md} +4 -3
  44. package/.agent-src/commands/{memory-full.md → memory/load.md} +4 -3
  45. package/.agent-src/commands/{memory-promote.md → memory/promote.md} +4 -3
  46. package/.agent-src/commands/{propose-memory.md → memory/propose.md} +4 -3
  47. package/.agent-src/commands/memory.md +48 -0
  48. package/.agent-src/commands/{module-create.md → module/create.md} +4 -3
  49. package/.agent-src/commands/{module-explore.md → module/explore.md} +4 -3
  50. package/.agent-src/commands/module.md +44 -0
  51. package/.agent-src/commands/{optimize-agents.md → optimize/agents.md} +4 -8
  52. package/.agent-src/commands/{optimize-augmentignore.md → optimize/augmentignore.md} +4 -9
  53. package/.agent-src/commands/{optimize-rtk-filters.md → optimize/rtk.md} +4 -8
  54. package/.agent-src/commands/{optimize-skills.md → optimize/skills.md} +4 -8
  55. package/.agent-src/commands/optimize.md +4 -10
  56. package/.agent-src/commands/{override-create.md → override/create.md} +4 -3
  57. package/.agent-src/commands/{override-manage.md → override/manage.md} +4 -3
  58. package/.agent-src/commands/override.md +44 -0
  59. package/.agent-src/commands/{roadmap-create.md → roadmap/create.md} +4 -3
  60. package/.agent-src/commands/{roadmap-execute.md → roadmap/execute.md} +4 -3
  61. package/.agent-src/commands/roadmap.md +44 -0
  62. package/.agent-src/commands/{tests-create.md → tests/create.md} +4 -3
  63. package/.agent-src/commands/{tests-execute.md → tests/execute.md} +4 -3
  64. package/.agent-src/commands/tests.md +44 -0
  65. package/.agent-src/contexts/communication/rules-auto/artifact-engagement-recording-mechanics.md +72 -0
  66. package/.agent-src/contexts/communication/rules-auto/augment-portability-mechanics.md +79 -0
  67. package/.agent-src/contexts/communication/rules-auto/augment-source-of-truth-mechanics.md +98 -0
  68. package/.agent-src/contexts/communication/rules-auto/cli-output-handling-mechanics.md +87 -0
  69. package/.agent-src/contexts/communication/rules-auto/command-suggestion-policy-mechanics.md +62 -0
  70. package/.agent-src/contexts/communication/rules-auto/docs-sync-mechanics.md +78 -0
  71. package/.agent-src/contexts/communication/rules-auto/package-ci-checks-mechanics.md +85 -0
  72. package/.agent-src/contexts/communication/rules-auto/review-routing-awareness-mechanics.md +65 -0
  73. package/.agent-src/contexts/communication/rules-auto/roadmap-progress-sync-mechanics.md +78 -0
  74. package/.agent-src/contexts/communication/rules-auto/skill-quality-mechanics.md +62 -0
  75. package/.agent-src/contexts/communication/rules-auto/slash-command-routing-policy-mechanics.md +55 -0
  76. package/.agent-src/contexts/communication/rules-auto/ui-audit-gate-mechanics.md +53 -0
  77. package/.agent-src/contexts/communication/rules-auto/user-interaction-mechanics.md +77 -0
  78. package/.agent-src/contexts/judges/no-consolidate-rationale.md +102 -0
  79. package/.agent-src/contexts/judges/persona-voice-rubric.md +140 -0
  80. package/.agent-src/rules/artifact-engagement-recording.md +13 -69
  81. package/.agent-src/rules/ask-when-uncertain.md +27 -42
  82. package/.agent-src/rules/augment-portability.md +15 -61
  83. package/.agent-src/rules/augment-source-of-truth.md +27 -93
  84. package/.agent-src/rules/cli-output-handling.md +10 -76
  85. package/.agent-src/rules/command-suggestion-policy.md +18 -59
  86. package/.agent-src/rules/commit-conventions.md +17 -14
  87. package/.agent-src/rules/context-hygiene.md +6 -0
  88. package/.agent-src/rules/direct-answers.md +35 -59
  89. package/.agent-src/rules/docker-commands.md +5 -5
  90. package/.agent-src/rules/docs-sync.md +15 -69
  91. package/.agent-src/rules/language-and-tone.md +48 -72
  92. package/.agent-src/rules/missing-tool-handling.md +28 -22
  93. package/.agent-src/rules/no-cheap-questions.md +39 -53
  94. package/.agent-src/rules/no-roadmap-references.md +73 -0
  95. package/.agent-src/rules/onboarding-gate.md +7 -0
  96. package/.agent-src/rules/package-ci-checks.md +21 -61
  97. package/.agent-src/rules/preservation-guard.md +64 -29
  98. package/.agent-src/rules/review-routing-awareness.md +24 -43
  99. package/.agent-src/rules/roadmap-progress-sync.md +31 -65
  100. package/.agent-src/rules/rule-type-governance.md +28 -0
  101. package/.agent-src/rules/security-sensitive-stop.md +8 -8
  102. package/.agent-src/rules/skill-quality.md +16 -48
  103. package/.agent-src/rules/slash-command-routing-policy.md +7 -4
  104. package/.agent-src/rules/think-before-action.md +52 -42
  105. package/.agent-src/rules/tool-safety.md +19 -16
  106. package/.agent-src/rules/ui-audit-gate.md +24 -38
  107. package/.agent-src/rules/user-interaction.md +13 -68
  108. package/.agent-src/skills/ai-council/SKILL.md +2 -0
  109. package/.agent-src/skills/api-testing/SKILL.md +1 -1
  110. package/.agent-src/skills/check-refs/SKILL.md +59 -40
  111. package/.agent-src/skills/conventional-commits-writing/SKILL.md +86 -28
  112. package/.agent-src/skills/copilot-agents-optimization/SKILL.md +5 -5
  113. package/.agent-src/skills/developer-like-execution/SKILL.md +4 -4
  114. package/.agent-src/skills/finishing-a-development-branch/SKILL.md +101 -65
  115. package/.agent-src/skills/flux/SKILL.md +30 -10
  116. package/.agent-src/skills/github-ci/SKILL.md +2 -2
  117. package/.agent-src/skills/judge-code-quality/SKILL.md +7 -8
  118. package/.agent-src/skills/judge-security-auditor/SKILL.md +4 -5
  119. package/.agent-src/skills/judge-test-coverage/SKILL.md +3 -4
  120. package/.agent-src/skills/lint-skills/SKILL.md +57 -39
  121. package/.agent-src/skills/md-language-check/SKILL.md +61 -39
  122. package/.agent-src/skills/override-management/SKILL.md +5 -5
  123. package/.agent-src/skills/quality-tools/SKILL.md +2 -2
  124. package/.agent-src/skills/react-shadcn-ui/SKILL.md +116 -43
  125. package/.agent-src/skills/readme-reviewer/SKILL.md +30 -29
  126. package/.agent-src/skills/readme-writing/SKILL.md +78 -53
  127. package/.agent-src/skills/readme-writing-package/SKILL.md +50 -47
  128. package/.agent-src/skills/receiving-code-review/SKILL.md +52 -47
  129. package/.agent-src/skills/refine-prompt/SKILL.md +0 -1
  130. package/.agent-src/skills/requesting-code-review/SKILL.md +35 -30
  131. package/.agent-src/skills/security/SKILL.md +7 -2
  132. package/.agent-src/skills/security-audit/SKILL.md +7 -3
  133. package/.agent-src/skills/systematic-debugging/SKILL.md +68 -60
  134. package/.agent-src/skills/test-driven-development/SKILL.md +59 -57
  135. package/.agent-src/skills/test-performance/SKILL.md +0 -1
  136. package/.agent-src/skills/traefik/SKILL.md +4 -4
  137. package/.agent-src/skills/verify-completion-evidence/SKILL.md +28 -26
  138. package/.agent-src/templates/roadmaps.md +4 -0
  139. package/.claude-plugin/marketplace.json +22 -11
  140. package/AGENTS.md +2 -2
  141. package/CHANGELOG.md +125 -1
  142. package/README.md +18 -17
  143. package/docs/architecture.md +4 -6
  144. package/docs/catalog.md +67 -39
  145. package/docs/contracts/STABILITY.md +13 -7
  146. package/docs/contracts/adr-chat-history-split.md +1 -3
  147. package/docs/contracts/adr-command-suggestion.md +0 -2
  148. package/docs/contracts/adr-implement-ticket-runtime.md +1 -2
  149. package/docs/contracts/adr-product-ui-track.md +3 -6
  150. package/docs/contracts/adr-prompt-driven-execution.md +3 -4
  151. package/docs/contracts/agent-memory-contract.md +6 -11
  152. package/docs/contracts/artifact-engagement-flow.md +6 -9
  153. package/docs/contracts/command-clusters.md +56 -46
  154. package/docs/contracts/command-suggestion-flow.md +1 -3
  155. package/docs/contracts/context-paths.md +99 -0
  156. package/docs/contracts/file-ownership-matrix.json +6722 -0
  157. package/docs/contracts/file-ownership-matrix.md +134 -0
  158. package/docs/contracts/implement-ticket-flow.md +6 -9
  159. package/docs/contracts/linear-ai-rules-inclusion.md +0 -1
  160. package/docs/contracts/linear-ai-three-layers.md +0 -2
  161. package/docs/contracts/load-context-budget-model.md +258 -0
  162. package/docs/contracts/load-context-schema.md +21 -3
  163. package/docs/contracts/roadmap-complexity-standard.md +137 -0
  164. package/docs/contracts/rule-interactions.md +0 -1
  165. package/docs/contracts/rule-priority-hierarchy.md +1 -1
  166. package/docs/contracts/ui-track-flow.md +7 -17
  167. package/docs/customization.md +2 -0
  168. package/docs/getting-started.md +5 -4
  169. package/docs/guidelines/agent-infra/ask-when-uncertain-demos.md +134 -0
  170. package/docs/guidelines/agent-infra/asking-and-brevity-examples.md +100 -0
  171. package/docs/guidelines/agent-infra/direct-answers-demos.md +145 -0
  172. package/docs/guidelines/agent-infra/verify-before-complete-demos.md +128 -0
  173. package/package.json +1 -1
  174. package/scripts/_phase2_shim_helper.py +109 -0
  175. package/scripts/agent-config +30 -0
  176. package/scripts/ai_council/one_off_archive/2026-05/README.md +45 -0
  177. package/scripts/ai_council/one_off_archive/2026-05/_one_off_2a4_acceptance.py +208 -0
  178. package/scripts/ai_council/one_off_archive/2026-05/_one_off_budget_v2_audit.py +206 -0
  179. package/scripts/ai_council/one_off_archive/2026-05/_one_off_context_layer_v1_estimate.py +67 -0
  180. package/scripts/ai_council/one_off_archive/2026-05/_one_off_context_layer_v1_review.py +292 -0
  181. package/scripts/ai_council/one_off_archive/2026-05/_one_off_followups_review.py +259 -0
  182. package/scripts/ai_council/one_off_archive/2026-05/_one_off_nondestructive_inline_audit.py +209 -0
  183. package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase4_dispatch_latency.py +108 -0
  184. package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase6_trigger_jaccard.py +92 -0
  185. package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase_2a_budget_rebalance.py +257 -0
  186. package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase_2a_post_revert.py +197 -0
  187. package/scripts/ai_council/one_off_archive/2026-05/_one_off_rule_hardening_v1.py +251 -0
  188. package/scripts/ai_council/one_off_archive/2026-05/_one_off_structural_open_questions.py +232 -0
  189. package/scripts/ai_council/one_off_archive/2026-05/_one_off_structural_optimization.py +144 -0
  190. package/scripts/ai_council/one_off_archive/2026-05/_one_off_structural_v3_gaps.py +252 -0
  191. package/scripts/ai_council/one_off_archive/2026-05/_one_off_structural_v3_review.py +240 -0
  192. package/scripts/build_rule_trigger_matrix.py +360 -0
  193. package/scripts/check_always_budget.py +402 -45
  194. package/scripts/check_cluster_patterns.py +159 -0
  195. package/scripts/check_command_count_messaging.py +14 -7
  196. package/scripts/check_context_paths.py +201 -0
  197. package/scripts/check_no_roadmap_refs.py +155 -0
  198. package/scripts/check_one_off_location.py +81 -0
  199. package/scripts/check_phase_coupling.py +148 -0
  200. package/scripts/check_portability.py +2 -0
  201. package/scripts/check_references.py +35 -2
  202. package/scripts/check_safety_floor_untouched.py +125 -0
  203. package/scripts/command_suggester/loader.py +4 -1
  204. package/scripts/compress.py +64 -15
  205. package/scripts/context_hygiene_hook.py +173 -0
  206. package/scripts/generate_index.py +6 -2
  207. package/scripts/generate_ownership_matrix.py +323 -0
  208. package/scripts/hooks/augment-context-hygiene.sh +55 -0
  209. package/scripts/hooks/augment-onboarding-gate.sh +55 -0
  210. package/scripts/hooks/augment-roadmap-progress.sh +57 -0
  211. package/scripts/install.py +105 -45
  212. package/scripts/lint_examples.py +98 -0
  213. package/scripts/lint_no_new_atomic_commands.py +12 -11
  214. package/scripts/lint_roadmap_complexity.py +127 -0
  215. package/scripts/onboarding_gate_hook.py +137 -0
  216. package/scripts/requirements-evals.txt +1 -0
  217. package/scripts/roadmap_progress_hook.py +159 -0
  218. package/scripts/schemas/command.schema.json +4 -3
  219. package/scripts/schemas/rule.schema.json +5 -0
  220. package/scripts/skill_linter.py +1 -0
  221. package/scripts/sync_agent_settings.py +25 -2
  222. package/scripts/update_counts.py +7 -0
  223. /package/scripts/ai_council/{_one_off_rebalancing_audit.py → one_off_archive/2026-05/_one_off_rebalancing_audit.py} +0 -0
  224. /package/scripts/ai_council/{_one_off_roundtrip.py → one_off_archive/2026-05/_one_off_roundtrip.py} +0 -0
@@ -0,0 +1,292 @@
1
+ """Council review of road-to-context-layer-maturity.md draft v1 + PR #36.
2
+
3
+ Both reviewers ran independent preview-reviews on PR #36 and converged on
4
+ the same gap: rule layer is mature, context layer is unproven. The host
5
+ agent distilled the convergence into road-to-context-layer-maturity.md
6
+ (draft v1, 6 phases, lightweight tier).
7
+
8
+ Council task: validate v1 against (a) the two preview-reviews summarised
9
+ inline, (b) the actual PR #36 shape (description + diff stat), and (c)
10
+ the existing always-budget contract. Together with the host agent,
11
+ define the binding step list before lock.
12
+
13
+ Single round, structured per-phase verdict. Saves the session under
14
+ agents/council-sessions/.
15
+
16
+ Invocation:
17
+ .venv/bin/python -m scripts.ai_council._one_off_context_layer_v1_review
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import subprocess
22
+ import sys
23
+ from pathlib import Path
24
+
25
+ from scripts.ai_council.bundler import bundle_prompt
26
+ from scripts.ai_council.clients import (
27
+ AnthropicClient,
28
+ OpenAIClient,
29
+ load_anthropic_key,
30
+ load_openai_key,
31
+ )
32
+ from scripts.ai_council.orchestrator import (
33
+ CostBudget,
34
+ CouncilQuestion,
35
+ consult,
36
+ estimate,
37
+ )
38
+ from scripts.ai_council.pricing import estimate_cost, load_prices
39
+ from scripts.ai_council.project_context import detect_project_context
40
+ from scripts.ai_council.session import SessionManifest, save as save_session
41
+
42
+ REPO_ROOT = Path(__file__).resolve().parents[2]
43
+ ROADMAP_PATH = REPO_ROOT / "agents/roadmaps/road-to-context-layer-maturity.md"
44
+
45
+ ORIGINAL_ASK = (
46
+ "Two independent preview-reviews on PR #36 converged on the same "
47
+ "finding: rule layer is now mature; context layer is unproven. The "
48
+ "host agent distilled the convergence into a 6-phase lightweight "
49
+ "roadmap (road-to-context-layer-maturity.md draft v1). Council task: "
50
+ "validate v1 against the reviewers' findings, the PR's actual shape, "
51
+ "and the always-budget contract; together with the host agent, "
52
+ "define the binding step list."
53
+ )
54
+
55
+ REVIEW_PROMPT_HEADER = """\
56
+ # Council Review — road-to-context-layer-maturity.md draft v1 + PR #36
57
+
58
+ ## Background (verbatim, do not re-frame)
59
+
60
+ Two preview-reviews of PR #36 (`feat/better-basement` branch, 174 files,
61
+ +20k/-3.6k diff, structural-optimization roadmap closed + 1.16.0
62
+ follow-ups Phase 1+2 closed) produced converging findings.
63
+
64
+ ### Reviewer #1 — 3-Layer Architecture lens
65
+
66
+ - PR #36 is not a feature PR; it is the first **post-maturity
67
+ architecture move**.
68
+ - Names a 3-Layer Architecture: **Rule** (Obligation / MUST), **Context**
69
+ (Decision Logic / HOW to think), **Examples** (Pattern Memory / HOW it
70
+ looks). Phase 2A revert proved the boundary matters — moving Iron-Law
71
+ prose into context made the rule unsafe.
72
+ - Lauds: contexts/communication/ as a previously-unsolved area;
73
+ always-budget guard + golden tests as a maturity signal.
74
+ - Gaps: (a) no Level-2 context-loading design (chain, priority,
75
+ budget, activation order); (b) no outcome measurement — golden tests
76
+ measure structure, not whether the agent decides better; (c)
77
+ context-activation clarity unclear (when, how many concurrently);
78
+ (d) Iron Laws must stay in Rule, never migrate to Context; (e)
79
+ Examples-as-demos under-leveraged.
80
+ - Verdict: 9.6/10. Recommendation: do NOT add features/skills/rules;
81
+ perfect Context, make Examples real demos, make Decision-System
82
+ visible.
83
+
84
+ ### Reviewer #2 — Consolidation / scope-discipline lens
85
+
86
+ - PR #36 is correctly framed as "Structural optimization foundation +
87
+ regression gates + command surface reduction", not a feature PR.
88
+ - Lauds: Phase 2A abort + honest Model-(b) finding (Context Tax >
89
+ rule-slim gain); roadmap reflexion depth (council rounds, locked
90
+ decisions, file-ownership matrix); test gates; 1.16-followup clean
91
+ closure.
92
+ - Gaps: (1) PR size 174 files reduces reviewability; (2) roadmaps
93
+ too heavy — auditable but not consumable for normal feature work,
94
+ needs complexity standard; (3) always-budget headroom 1,552 chars
95
+ (96.8% utilization) — slimming hebel exhausted, new strategy needed
96
+ (demote / merge / hard-compress / shared-context amortization);
97
+ (4) slow-rollout protocol compressed under autonomy mandate;
98
+ (5) one-off scripts accumulating in scripts/ai_council/.
99
+ - P0 before merge: PR description rewrite, one-off-script decision,
100
+ honest budget block, Phase 2A as finding.
101
+ - Verdict: 8.8/10 current, 9.1/10 with cleanup. "Inhaltlich stark.
102
+ Strategisch richtig. Aber zu groß und zu meta-lastig."
103
+
104
+ ### Convergence
105
+
106
+ - Both: don't add more rules / skills / commands.
107
+ - Both: context activation / loading / budget = the unsolved next
108
+ question.
109
+ - Both: praise audit honesty, criticise size/concentration.
110
+ - Both: PR #36 should ship after cleanup, not be expanded.
111
+
112
+ ## PR #36 shape
113
+
114
+ - 174 files changed · +20,120 / −3,629
115
+ - Branch: feat/better-basement → main (open)
116
+ - Title: docs(roadmap): add structural-optimization v3.1 + 1.16.0 follow-ups v1.1
117
+
118
+ (Full diff stat appended below; PR body appended below.)
119
+
120
+ ## Your task
121
+
122
+ Review **road-to-context-layer-maturity.md draft v1** (full text appended
123
+ below) against:
124
+
125
+ 1. **Reviewer convergence:** does v1 close every gap both reviewers
126
+ named, or does it merely paraphrase them?
127
+ 2. **PR #36 fit:** is Phase 0 (PR closeout) sufficient to ship #36
128
+ honestly, or are items missing that the diff exposes?
129
+ 3. **Always-budget contract:** is Phase 4 (Always-Budget v2)
130
+ structurally sound, or does it re-walk Phase 2A's path?
131
+ 4. **Lightweight-tier discipline:** does the roadmap itself follow
132
+ the standard it locks in Phase 5, or does it cheat its own gate?
133
+ 5. **Sequencing:** is `0 → 1 → 2 → 3 → 4 → 5` the right order, or
134
+ should something move?
135
+
136
+ ## Output contract (STRICT)
137
+
138
+ For EACH of the six phases, produce:
139
+
140
+ ```
141
+ ### Phase N — <title>
142
+
143
+ **Verdict:** <ACCEPT | PARTIAL | REJECT>
144
+ **What v1 gets right (1 sentence):** ...
145
+ **What v1 misses or over-reaches (1-2 sentences):** ...
146
+ **Concrete change to v2 (binding):** ...
147
+ ```
148
+
149
+ Then a final block:
150
+
151
+ ```
152
+ ### Greenlight verdict
153
+
154
+ <one of: FULL GREENLIGHT — lock v1 / CONDITIONAL GREENLIGHT — apply N
155
+ revisions then lock / BLOCKED — major rework needed>
156
+
157
+ **Binding revisions for v2 (numbered, ≤ 8):** ...
158
+ **Estimated total effort:** <engineer-days>
159
+ **One-line strategic risk you would still fly with:** ...
160
+ ```
161
+
162
+ Total response budget: ≤ 1800 words. Do not re-write the roadmap. Do
163
+ not propose a separate roadmap unless you mark BLOCKED.
164
+ """
165
+
166
+
167
+ def _read(path: Path) -> str:
168
+ return path.read_text(encoding="utf-8") if path.exists() else ""
169
+
170
+
171
+ def _diff_stat() -> str:
172
+ proc = subprocess.run(
173
+ ["git", "diff", "--stat", "origin/main..HEAD"],
174
+ cwd=REPO_ROOT, check=True, capture_output=True, text=True,
175
+ )
176
+ lines = proc.stdout.splitlines()
177
+ if len(lines) > 60:
178
+ return "\n".join(lines[:30] + ["... [middle truncated] ..."] + lines[-15:])
179
+ return proc.stdout
180
+
181
+
182
+ def _pr_body() -> str:
183
+ try:
184
+ proc = subprocess.run(
185
+ ["gh", "pr", "view", "36", "--json", "body", "--jq", ".body"],
186
+ cwd=REPO_ROOT, check=True, capture_output=True, text=True,
187
+ )
188
+ return proc.stdout
189
+ except subprocess.CalledProcessError:
190
+ return "[gh pr view 36 unavailable]"
191
+
192
+
193
+ def main() -> int:
194
+ anthropic = AnthropicClient(api_key=load_anthropic_key(), model="claude-sonnet-4-5")
195
+ openai = OpenAIClient(api_key=load_openai_key(), model="gpt-4o")
196
+ members = [anthropic, openai]
197
+
198
+ roadmap_text = _read(ROADMAP_PATH)
199
+ if not roadmap_text:
200
+ print(f"[error] roadmap not found: {ROADMAP_PATH}", file=sys.stderr)
201
+ return 1
202
+
203
+ bundle_text = "\n\n---\n\n".join([
204
+ REVIEW_PROMPT_HEADER,
205
+ "## PR #36 — diff --stat (origin/main..HEAD)\n\n```\n" + _diff_stat() + "\n```",
206
+ "## PR #36 — body (verbatim)\n\n" + _pr_body(),
207
+ "## Roadmap v1 (verbatim, the artefact to validate)\n\n" + roadmap_text,
208
+ ])
209
+ context = bundle_prompt(bundle_text)
210
+ project = detect_project_context(REPO_ROOT)
211
+ table = load_prices()
212
+
213
+ question = CouncilQuestion(
214
+ mode="prompt",
215
+ user_prompt=context.text,
216
+ max_tokens=4096,
217
+ )
218
+
219
+ estimates = estimate(
220
+ question, members, table, project=project, original_ask=ORIGINAL_ASK,
221
+ )
222
+ print("=== ESTIMATE (single round, max tokens) ===")
223
+ total_est = 0.0
224
+ for c, e in zip(members, estimates):
225
+ print(f" {c.name}/{c.model}: ~{e.input_tokens} in + {e.output_tokens} out = ${e.total_usd:.4f}")
226
+ total_est += e.total_usd
227
+ print(f" TOTAL per round (max): ${total_est:.4f}")
228
+ print()
229
+
230
+ budget = CostBudget(
231
+ max_input_tokens=200_000,
232
+ max_output_tokens=80_000,
233
+ max_calls=20,
234
+ max_total_usd=2.50,
235
+ )
236
+
237
+ rounds_collected: list[list] = []
238
+
239
+ def _on_round_complete(round_idx: int, round_responses) -> None:
240
+ rounds_collected.append(list(round_responses))
241
+ print(f"=== ROUND {round_idx + 1} COMPLETE ===")
242
+ for r in round_responses:
243
+ if r.error:
244
+ print(f" [error] {r.provider}/{r.model}: {r.error}")
245
+ continue
246
+ actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
247
+ print(f" [done] {r.provider}/{r.model}: {r.input_tokens} in / "
248
+ f"{r.output_tokens} out · {r.latency_ms} ms · ${actual.total_usd:.4f}")
249
+ print()
250
+
251
+ print("=== CONSULT (1 round) ===")
252
+ consult(
253
+ members, question, budget,
254
+ rounds=1,
255
+ on_round_complete=_on_round_complete,
256
+ table=table, project=project, original_ask=ORIGINAL_ASK,
257
+ )
258
+
259
+ if not rounds_collected:
260
+ print("[error] no rounds completed", file=sys.stderr)
261
+ return 1
262
+
263
+ actual_total = 0.0
264
+ for round_responses in rounds_collected:
265
+ for r in round_responses:
266
+ if r.error:
267
+ continue
268
+ actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
269
+ actual_total += actual.total_usd
270
+ print(f"=== TOTAL ACTUAL: ${actual_total:.4f} ===")
271
+
272
+ final_round = rounds_collected[-1]
273
+ if not [r for r in final_round if not r.error]:
274
+ return 1
275
+
276
+ manifest = SessionManifest(
277
+ mode="prompt",
278
+ artefact=str(ROADMAP_PATH.relative_to(REPO_ROOT)),
279
+ original_ask=ORIGINAL_ASK,
280
+ members=[f"{r.provider}/{r.model}" for r in final_round],
281
+ rounds=len(rounds_collected),
282
+ cost_usd_estimated=total_est,
283
+ cost_usd_actual=actual_total,
284
+ extra={"purpose": "Council review of road-to-context-layer-maturity v1 + PR #36"},
285
+ )
286
+ session_dir = save_session(manifest=manifest, responses=rounds_collected)
287
+ print(f"[saved] {session_dir.relative_to(REPO_ROOT)}/")
288
+ return 1 if any(r.error for round_r in rounds_collected for r in round_r) else 0
289
+
290
+
291
+ if __name__ == "__main__":
292
+ raise SystemExit(main())
@@ -0,0 +1,259 @@
1
+ """Council review of road-to-1-16-followups.md (draft v1).
2
+
3
+ The followups roadmap distils 14 numbered findings (F1-F14) from two
4
+ external review rounds of release 1.16.0 into three phases:
5
+ - Phase 0: reviewer P0/P1/P2 stack (README sync, budget headroom,
6
+ council labelling, release-tag gate). Effort <= 1 hour.
7
+ - Phase 1: load_context: rollout to three medium-risk policy rules
8
+ + non-destructive-by-default failure-mode-only split + 1.15-followups
9
+ archive verification. Effort ~2 days.
10
+ - Phase 2: README "Start here" anchor, host-agent wording precision,
11
+ golden-test failure-mode coverage. Effort ~0.5 days.
12
+
13
+ Single round. Both members produce a structured per-phase verdict and
14
+ together with the host agent define the binding step list before the
15
+ roadmap is locked.
16
+
17
+ Invocation:
18
+ .venv/bin/python -m scripts.ai_council._one_off_followups_review
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import sys
23
+ from pathlib import Path
24
+
25
+ from scripts.ai_council.bundler import bundle_roadmap
26
+ from scripts.ai_council.clients import (
27
+ AnthropicClient,
28
+ OpenAIClient,
29
+ load_anthropic_key,
30
+ load_openai_key,
31
+ )
32
+ from scripts.ai_council.orchestrator import (
33
+ CostBudget,
34
+ CouncilQuestion,
35
+ consult,
36
+ estimate,
37
+ )
38
+ from scripts.ai_council.pricing import estimate_cost, load_prices
39
+ from scripts.ai_council.project_context import detect_project_context
40
+ from scripts.ai_council.session import SessionManifest, save as save_session
41
+
42
+ REPO_ROOT = Path(__file__).resolve().parents[2]
43
+ ROADMAP_PATH = REPO_ROOT / "agents/roadmaps/road-to-1-16-followups.md"
44
+
45
+ ORIGINAL_ASK = (
46
+ "Two external review rounds on release 1.16.0 produced 14 numbered "
47
+ "findings (F1-F14) and a P0/P1/P2 fix stack. The host agent distilled "
48
+ "them into road-to-1-16-followups.md (draft v1, 3 phases, 7 risks). "
49
+ "Council task: evaluate the F1-F14 capture quality, the phase split, "
50
+ "the gating between this roadmap and road-to-structural-optimization "
51
+ "v3.1, and the per-step contracts. Together with the host agent, "
52
+ "define the binding step list before lock-in."
53
+ )
54
+
55
+ REVIEW_PROMPT = """\
56
+ # Council Review — road-to-1-16-followups.md draft v1
57
+
58
+ Context for both reviewers:
59
+
60
+ - A separate roadmap (`road-to-structural-optimization.md`) was \
61
+ finalised at v3.1 in a prior session. You both reviewed it across \
62
+ five rounds. v3.1 is locked. **Do not re-litigate it.** This review \
63
+ is about a *different*, *narrower* roadmap that addresses post-1.16.0 \
64
+ PR feedback.
65
+ - Reviewer 1 (consolidation-quality lens) scored 1.16.0 at 9.5/10. \
66
+ Concerns: surface still large, README dense, context-layer rollout \
67
+ pace, "agent runs / implements" overclaim risk, golden-test failure-\
68
+ mode bias. Strategy notes (F5-F8): roll out load_context: slowly, \
69
+ prepare a second command-collapse phase, ship outcome demos, do not \
70
+ inflate the README.
71
+ - Reviewer 2 (commit-level audit, raw.githubusercontent.com verified) \
72
+ scored 1.16.0 at A-. Concrete findings:
73
+ - **F9 (P0):** README on `main` shows pre-1.15.0 wording \
74
+ ("Teach your AI agents Laravel...", "124 Skills · 46 Rules · \
75
+ 73 Commands · 46 Guidelines"). The 1.16.0 release tag has the \
76
+ correct text but is not an ancestor of `main`. Reviewer-supplied \
77
+ cherry-pick SHAs: 1053d56, d26bf68, 2fa8022, c282ae3.
78
+ - **F10 (P1):** `tests/test_always_budget.py` top-5 cap headroom is \
79
+ ~141 chars after `no-cheap-questions` was added. Target: >= 2,000 \
80
+ chars headroom. Q3=A from structural-optimization v3.1 excludes \
81
+ safety-floor rules from slim work.
82
+ - **F11 (P2):** AI Council Phase 3-4 (debate, persistence, special \
83
+ modes) grew without external-user signal. Reviewer fix: experimental \
84
+ banner, no code cut.
85
+ - **F14:** 1.16.0 tag points to commit 08daac9 which is not on `main`. \
86
+ Workflow gap.
87
+ - Three Reviewer-1 strategy items (F5/F6/F7) and one (F8) on golden \
88
+ tests are also captured. The host agent placed them as: F5/F6/F7 = \
89
+ out-of-scope strategy notes (next-roadmap-generation seed); F8 = \
90
+ Phase 2.3.
91
+
92
+ You both reviewed v3.1 already. **Stop re-reviewing v3.1.** Pick a \
93
+ binding verdict per phase of *this* followups roadmap, and together \
94
+ with the host agent define the final step list.
95
+
96
+ ## Output Contract (STRICT)
97
+
98
+ For each phase, produce exactly this block:
99
+
100
+ ```
101
+ ### Phase <0|1|2>: <theme>
102
+
103
+ **Verdict:** <ACCEPT | ACCEPT_WITH_REVISIONS | REJECT>
104
+ **Capture quality (F-items folded into this phase):** <COMPLETE | PARTIAL — list missing>
105
+ **Gate correctness:** <CORRECT | UNDER-GATED | OVER-GATED>
106
+ **Per-step contract clarity:** <SUFFICIENT | NEEDS_TIGHTENING — name steps>
107
+ **Required revisions (numbered, 1-3 max):**
108
+ 1. <one sentence — smallest change>
109
+ 2. <...>
110
+ 3. <...>
111
+ **Risk register coverage:** <SUFFICIENT | MISSING — name risk>
112
+ ```
113
+
114
+ Verdict definitions:
115
+ - **ACCEPT** — phase ships as written; no revisions required.
116
+ - **ACCEPT_WITH_REVISIONS** — phase ships after the listed revisions \
117
+ land. Each revision must be one specific, actionable change.
118
+ - **REJECT** — phase is structurally wrong; describe the structural \
119
+ fault in <= 2 sentences in the revisions block.
120
+
121
+ ## The Three Phases
122
+
123
+ The roadmap text follows this prompt verbatim. Read it first, then \
124
+ render the three blocks above for Phase 0, Phase 1, Phase 2.
125
+
126
+ ## Cross-cutting questions (answer after the three blocks)
127
+
128
+ ```
129
+ ### Cross-cutting
130
+
131
+ **Is the F1-F14 placement correct?** \
132
+ <YES | NO — list misplaced F-numbers and where they should go>
133
+ **Is the gating to road-to-structural-optimization v3.1 correct?** \
134
+ <YES | NO — Phase 1 currently gates on v3.1 Phase 0.4 worked example>
135
+ **Is anything missing entirely from the F1-F14 capture that the \
136
+ source feedback raised?** \
137
+ <NO | YES — list missing observation + which phase/risk it belongs to>
138
+ **Is anything over-scoped (should not be a roadmap item at all)?** \
139
+ <NO | YES — list and justify>
140
+ **Reviewer-1 strategy items F5/F6/F7 placement (out-of-scope, \
141
+ next-roadmap-generation seed) — correct?** \
142
+ <YES | NO — propose alternative placement>
143
+ ```
144
+
145
+ ## Final Output
146
+
147
+ After the three phase blocks and the cross-cutting block, add:
148
+
149
+ ```
150
+ ### Consensus verdict
151
+
152
+ **Overall recommendation:** <one of: LOCK_AS_IS | LOCK_AFTER_REVISIONS \
153
+ | RESCOPE_REQUIRED>
154
+ **Phase-0 launchable now?** <YES | NO — reasons>
155
+ **Phase-1 unblocked once structural v3.1 Phase 0.4 lands?** \
156
+ <YES | NO — reasons>
157
+ **Total residual revisions:** <count and sum of effort estimates>
158
+ **Single biggest risk in this followups roadmap that the host agent \
159
+ has under-weighted:** <one sentence>
160
+ ```
161
+
162
+ Be decisive. The host agent will integrate your two verdicts; \
163
+ divergence triggers a tie-break round, but only if you actually \
164
+ disagree on a *blocking* item. Total response budget: <= 1500 words \
165
+ per reviewer.
166
+ """
167
+
168
+
169
+ def main() -> int:
170
+ anthropic = AnthropicClient(api_key=load_anthropic_key(), model="claude-sonnet-4-5")
171
+ openai = OpenAIClient(api_key=load_openai_key(), model="gpt-4o")
172
+ members = [anthropic, openai]
173
+
174
+ context = bundle_roadmap(ROADMAP_PATH)
175
+ project = detect_project_context(REPO_ROOT)
176
+ table = load_prices()
177
+
178
+ user_prompt = REVIEW_PROMPT + "\n\n---\n\n" + context.text
179
+
180
+ question = CouncilQuestion(
181
+ mode="roadmap",
182
+ user_prompt=user_prompt,
183
+ max_tokens=4096,
184
+ )
185
+
186
+ estimates = estimate(
187
+ question, members, table, project=project, original_ask=ORIGINAL_ASK,
188
+ )
189
+ print("=== ESTIMATE (single round, max tokens) ===")
190
+ total_est = 0.0
191
+ for c, e in zip(members, estimates):
192
+ print(f" {c.name}/{c.model}: ~{e.input_tokens} in + {e.output_tokens} out = ${e.total_usd:.4f}")
193
+ total_est += e.total_usd
194
+ print(f" TOTAL per round (max): ${total_est:.4f}")
195
+ print()
196
+
197
+ budget = CostBudget(
198
+ max_input_tokens=200_000,
199
+ max_output_tokens=80_000,
200
+ max_calls=20,
201
+ max_total_usd=2.50,
202
+ )
203
+
204
+ rounds_collected: list[list] = []
205
+
206
+ def _on_round_complete(round_idx: int, round_responses) -> None:
207
+ rounds_collected.append(list(round_responses))
208
+ print(f"=== ROUND {round_idx + 1} COMPLETE ===")
209
+ for r in round_responses:
210
+ if r.error:
211
+ print(f" [error] {r.provider}/{r.model}: {r.error}")
212
+ continue
213
+ actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
214
+ print(f" [done] {r.provider}/{r.model}: {r.input_tokens} in / "
215
+ f"{r.output_tokens} out · {r.latency_ms} ms · ${actual.total_usd:.4f}")
216
+ print()
217
+
218
+ print("=== CONSULT (1 round, followups roadmap review) ===")
219
+ consult(
220
+ members, question, budget,
221
+ rounds=1,
222
+ on_round_complete=_on_round_complete,
223
+ table=table, project=project, original_ask=ORIGINAL_ASK,
224
+ )
225
+
226
+ if not rounds_collected:
227
+ print("[error] no rounds completed", file=sys.stderr)
228
+ return 1
229
+
230
+ actual_total = 0.0
231
+ for round_responses in rounds_collected:
232
+ for r in round_responses:
233
+ if r.error:
234
+ continue
235
+ actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
236
+ actual_total += actual.total_usd
237
+ print(f"=== TOTAL ACTUAL: ${actual_total:.4f} ===")
238
+
239
+ final_round = rounds_collected[-1]
240
+ if not [r for r in final_round if not r.error]:
241
+ return 1
242
+
243
+ manifest = SessionManifest(
244
+ mode="roadmap",
245
+ artefact=str(ROADMAP_PATH.relative_to(REPO_ROOT)),
246
+ original_ask=ORIGINAL_ASK,
247
+ members=[f"{r.provider}/{r.model}" for r in final_round],
248
+ rounds=len(rounds_collected),
249
+ cost_usd_estimated=total_est,
250
+ cost_usd_actual=actual_total,
251
+ extra={"purpose": "Council review of road-to-1-16-followups.md draft v1"},
252
+ )
253
+ session_dir = save_session(manifest=manifest, responses=rounds_collected)
254
+ print(f"[saved] {session_dir.relative_to(REPO_ROOT)}/")
255
+ return 1 if any(r.error for round_r in rounds_collected for r in round_r) else 0
256
+
257
+
258
+ if __name__ == "__main__":
259
+ raise SystemExit(main())