@event4u/agent-config 1.17.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/.agent-src/commands/council/default.md +74 -76
  2. package/.agent-src/commands/feature/roadmap.md +22 -0
  3. package/.agent-src/commands/roadmap/create.md +38 -6
  4. package/.agent-src/commands/roadmap/execute.md +36 -9
  5. package/.agent-src/rules/agent-authority.md +1 -0
  6. package/.agent-src/rules/agent-docs.md +1 -0
  7. package/.agent-src/rules/analysis-skill-routing.md +1 -0
  8. package/.agent-src/rules/architecture.md +1 -0
  9. package/.agent-src/rules/artifact-drafting-protocol.md +1 -0
  10. package/.agent-src/rules/artifact-engagement-recording.md +1 -0
  11. package/.agent-src/rules/ask-when-uncertain.md +1 -0
  12. package/.agent-src/rules/augment-portability.md +1 -0
  13. package/.agent-src/rules/augment-source-of-truth.md +1 -0
  14. package/.agent-src/rules/autonomous-execution.md +1 -0
  15. package/.agent-src/rules/capture-learnings.md +1 -0
  16. package/.agent-src/rules/chat-history-cadence.md +34 -0
  17. package/.agent-src/rules/chat-history-ownership.md +1 -0
  18. package/.agent-src/rules/chat-history-visibility.md +1 -0
  19. package/.agent-src/rules/cli-output-handling.md +2 -2
  20. package/.agent-src/rules/command-suggestion-policy.md +1 -0
  21. package/.agent-src/rules/commit-conventions.md +1 -0
  22. package/.agent-src/rules/commit-policy.md +1 -0
  23. package/.agent-src/rules/context-hygiene.md +28 -0
  24. package/.agent-src/rules/direct-answers.md +18 -26
  25. package/.agent-src/rules/docker-commands.md +1 -0
  26. package/.agent-src/rules/docs-sync.md +1 -0
  27. package/.agent-src/rules/downstream-changes.md +1 -0
  28. package/.agent-src/rules/e2e-testing.md +1 -0
  29. package/.agent-src/rules/guidelines.md +1 -0
  30. package/.agent-src/rules/improve-before-implement.md +1 -0
  31. package/.agent-src/rules/language-and-tone.md +1 -0
  32. package/.agent-src/rules/laravel-translations.md +1 -0
  33. package/.agent-src/rules/markdown-safe-codeblocks.md +1 -0
  34. package/.agent-src/rules/minimal-safe-diff.md +1 -0
  35. package/.agent-src/rules/missing-tool-handling.md +1 -0
  36. package/.agent-src/rules/model-recommendation.md +1 -0
  37. package/.agent-src/rules/no-cheap-questions.md +15 -21
  38. package/.agent-src/rules/no-roadmap-references.md +1 -0
  39. package/.agent-src/rules/non-destructive-by-default.md +1 -0
  40. package/.agent-src/rules/onboarding-gate.md +33 -0
  41. package/.agent-src/rules/package-ci-checks.md +1 -0
  42. package/.agent-src/rules/php-coding.md +1 -0
  43. package/.agent-src/rules/preservation-guard.md +1 -0
  44. package/.agent-src/rules/review-routing-awareness.md +1 -0
  45. package/.agent-src/rules/reviewer-awareness.md +1 -0
  46. package/.agent-src/rules/roadmap-progress-sync.md +49 -0
  47. package/.agent-src/rules/role-mode-adherence.md +2 -2
  48. package/.agent-src/rules/rule-type-governance.md +29 -0
  49. package/.agent-src/rules/runtime-safety.md +1 -0
  50. package/.agent-src/rules/scope-control.md +1 -0
  51. package/.agent-src/rules/security-sensitive-stop.md +1 -0
  52. package/.agent-src/rules/size-enforcement.md +1 -0
  53. package/.agent-src/rules/skill-improvement-trigger.md +1 -0
  54. package/.agent-src/rules/skill-quality.md +1 -0
  55. package/.agent-src/rules/slash-command-routing-policy.md +39 -0
  56. package/.agent-src/rules/think-before-action.md +1 -0
  57. package/.agent-src/rules/token-efficiency.md +1 -0
  58. package/.agent-src/rules/tool-safety.md +1 -0
  59. package/.agent-src/rules/ui-audit-gate.md +1 -0
  60. package/.agent-src/rules/upstream-proposal.md +1 -0
  61. package/.agent-src/rules/user-interaction.md +1 -0
  62. package/.agent-src/rules/verify-before-complete.md +1 -0
  63. package/.agent-src/skills/roadmap-management/SKILL.md +29 -4
  64. package/.agent-src/skills/verify-completion-evidence/SKILL.md +8 -1
  65. package/.agent-src/templates/agent-settings.md +16 -0
  66. package/.agent-src/templates/roadmaps.md +12 -3
  67. package/.agent-src/templates/scripts/work_engine/hook_bootstrap.py +9 -0
  68. package/.agent-src/templates/scripts/work_engine/hooks/__init__.py +4 -0
  69. package/.agent-src/templates/scripts/work_engine/hooks/builtin/__init__.py +4 -0
  70. package/.agent-src/templates/scripts/work_engine/hooks/builtin/decision_trace.py +163 -0
  71. package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py +111 -0
  72. package/.agent-src/templates/scripts/work_engine/hooks/settings.py +36 -0
  73. package/.agent-src/templates/scripts/work_engine/scoring/decision_trace.py +141 -0
  74. package/.agent-src/templates/scripts/work_engine/scoring/memory_visibility.py +125 -0
  75. package/.claude-plugin/marketplace.json +1 -1
  76. package/CHANGELOG.md +97 -0
  77. package/README.md +20 -20
  78. package/config/agent-settings.template.yml +23 -0
  79. package/docs/architecture.md +1 -1
  80. package/docs/catalog.md +5 -2
  81. package/docs/contracts/adr-settings-sync-engine.md +127 -0
  82. package/docs/contracts/decision-trace-v1.md +146 -0
  83. package/docs/contracts/file-ownership-matrix.json +7 -0
  84. package/docs/contracts/hook-architecture-v1.md +213 -0
  85. package/docs/contracts/load-context-budget-model.md +80 -0
  86. package/docs/contracts/load-context-schema.md +20 -0
  87. package/docs/contracts/memory-visibility-v1.md +138 -0
  88. package/docs/contracts/one-off-script-lifecycle.md +109 -0
  89. package/docs/contracts/roadmap-complexity-standard.md +137 -0
  90. package/docs/contracts/rule-interactions.yml +22 -0
  91. package/docs/customization.md +1 -0
  92. package/docs/development.md +4 -1
  93. package/docs/guidelines/agent-infra/ask-when-uncertain-demos.md +134 -0
  94. package/docs/guidelines/agent-infra/direct-answers-demos.md +145 -0
  95. package/docs/guidelines/agent-infra/layered-settings.md +32 -13
  96. package/docs/guidelines/agent-infra/verify-before-complete-demos.md +128 -0
  97. package/package.json +1 -1
  98. package/scripts/agent-config +64 -0
  99. package/scripts/ai_council/bundler.py +3 -3
  100. package/scripts/ai_council/clients.py +24 -8
  101. package/scripts/ai_council/one_off_archive/2026-05/README.md +67 -0
  102. package/scripts/ai_council/one_off_archive/2026-05/_one_off_budget_v2_audit.py +206 -0
  103. package/scripts/ai_council/{_one_off_roundtrip.py → one_off_archive/2026-05/_one_off_roundtrip.py} +13 -8
  104. package/scripts/ai_council/one_off_archive/2026-05/_one_off_tier_retrofit.py +180 -0
  105. package/scripts/ai_council/session.py +92 -0
  106. package/scripts/build_rule_trigger_matrix.py +360 -0
  107. package/scripts/capture_showcase_session.py +361 -0
  108. package/scripts/chat_history.py +11 -1
  109. package/scripts/check_always_budget.py +46 -2
  110. package/scripts/check_one_off_location.py +81 -0
  111. package/scripts/check_references.py +6 -0
  112. package/scripts/compress.py +5 -2
  113. package/scripts/context_hygiene_hook.py +181 -0
  114. package/scripts/council_cli.py +357 -0
  115. package/scripts/hook_manifest.yaml +184 -0
  116. package/scripts/hooks/__init__.py +1 -0
  117. package/scripts/hooks/augment-context-hygiene.sh +55 -0
  118. package/scripts/hooks/augment-dispatcher.sh +72 -0
  119. package/scripts/hooks/augment-onboarding-gate.sh +55 -0
  120. package/scripts/hooks/cline-dispatcher.sh +86 -0
  121. package/scripts/hooks/cursor-dispatcher.sh +76 -0
  122. package/scripts/hooks/dispatch_hook.py +348 -0
  123. package/scripts/hooks/envelope.py +98 -0
  124. package/scripts/hooks/gemini-dispatcher.sh +117 -0
  125. package/scripts/hooks/state_io.py +122 -0
  126. package/scripts/hooks/windsurf-dispatcher.sh +123 -0
  127. package/scripts/hooks_status.py +146 -0
  128. package/scripts/install.py +728 -51
  129. package/scripts/install.sh +1 -1
  130. package/scripts/lint_examples.py +98 -0
  131. package/scripts/lint_hook_manifest.py +216 -0
  132. package/scripts/lint_one_off_age.py +184 -0
  133. package/scripts/lint_roadmap_complexity.py +127 -0
  134. package/scripts/lint_rule_tiers.py +78 -0
  135. package/scripts/lint_showcase_sessions.py +148 -0
  136. package/scripts/minimal_safe_diff_hook.py +245 -0
  137. package/scripts/onboarding_gate_hook.py +142 -0
  138. package/scripts/readme_linter.py +12 -3
  139. package/scripts/roadmap_progress_hook.py +5 -0
  140. package/scripts/schemas/rule.schema.json +5 -0
  141. package/scripts/sync_agent_settings.py +32 -129
  142. package/scripts/sync_yaml_rt.py +734 -0
  143. package/scripts/verify_before_complete_hook.py +216 -0
  144. /package/scripts/ai_council/{_one_off_2a4_acceptance.py → one_off_archive/2026-05/_one_off_2a4_acceptance.py} +0 -0
  145. /package/scripts/ai_council/{_one_off_context_layer_v1_estimate.py → one_off_archive/2026-05/_one_off_context_layer_v1_estimate.py} +0 -0
  146. /package/scripts/ai_council/{_one_off_context_layer_v1_review.py → one_off_archive/2026-05/_one_off_context_layer_v1_review.py} +0 -0
  147. /package/scripts/ai_council/{_one_off_followups_review.py → one_off_archive/2026-05/_one_off_followups_review.py} +0 -0
  148. /package/scripts/ai_council/{_one_off_nondestructive_inline_audit.py → one_off_archive/2026-05/_one_off_nondestructive_inline_audit.py} +0 -0
  149. /package/scripts/{_one_off_phase4_dispatch_latency.py → ai_council/one_off_archive/2026-05/_one_off_phase4_dispatch_latency.py} +0 -0
  150. /package/scripts/{_one_off_phase6_trigger_jaccard.py → ai_council/one_off_archive/2026-05/_one_off_phase6_trigger_jaccard.py} +0 -0
  151. /package/scripts/ai_council/{_one_off_phase_2a_budget_rebalance.py → one_off_archive/2026-05/_one_off_phase_2a_budget_rebalance.py} +0 -0
  152. /package/scripts/ai_council/{_one_off_phase_2a_post_revert.py → one_off_archive/2026-05/_one_off_phase_2a_post_revert.py} +0 -0
  153. /package/scripts/ai_council/{_one_off_rebalancing_audit.py → one_off_archive/2026-05/_one_off_rebalancing_audit.py} +0 -0
  154. /package/scripts/ai_council/{_one_off_rule_hardening_v1.py → one_off_archive/2026-05/_one_off_rule_hardening_v1.py} +0 -0
  155. /package/scripts/ai_council/{_one_off_structural_open_questions.py → one_off_archive/2026-05/_one_off_structural_open_questions.py} +0 -0
  156. /package/scripts/ai_council/{_one_off_structural_optimization.py → one_off_archive/2026-05/_one_off_structural_optimization.py} +0 -0
  157. /package/scripts/ai_council/{_one_off_structural_v3_gaps.py → one_off_archive/2026-05/_one_off_structural_v3_gaps.py} +0 -0
  158. /package/scripts/ai_council/{_one_off_structural_v3_review.py → one_off_archive/2026-05/_one_off_structural_v3_review.py} +0 -0
@@ -0,0 +1,128 @@
1
+ ---
2
+ demo_for: verify-before-complete
3
+ layer: pattern-memory
4
+ prose_delta:
5
+ rule_chars_before: 2196
6
+ rule_chars_after: 2603
7
+ measured_at: phase-3.3-cross-link-landing
8
+ note: "+407 chars from the Examples cross-link block; future prose-pruning rounds (out of scope for Phase 3) will pull pattern prose from the rule into this demo and drive the after-count below the before-count."
9
+ ---
10
+
11
+ # verify-before-complete — Pattern Memory
12
+
13
+ > Wrong / right / why demos for the `verify-before-complete` Iron
14
+ > Law (no completion claim without fresh evidence in this message).
15
+ > Linked from
16
+ > [`verify-before-complete`](../../.agent-src.uncompressed/rules/verify-before-complete.md)
17
+ > § Examples. Outcome baseline at
18
+ > [`tests/golden/outcomes/verify_before_complete.json`](../../tests/golden/outcomes/verify_before_complete.json).
19
+
20
+ ## Demo 1 — Hedged completion claim
21
+
22
+ ### Wrong shape
23
+
24
+ ```
25
+ User: ist der Fix fertig?
26
+
27
+ Agent: Ja, sollte jetzt durchlaufen — ich habe die fehlende
28
+ Bedingung ergänzt und die Tests müssten grün sein.
29
+ ```
30
+
31
+ **Failure mode:** "sollte", "müssten" are red-flag hedges (§ Red
32
+ flags — STOP immediately). No fresh command output in this message.
33
+ The claim rides on inference, not evidence.
34
+
35
+ ### Right shape
36
+
37
+ ```
38
+ User: ist der Fix fertig?
39
+
40
+ Agent: Frisch gerade ausgeführt:
41
+
42
+ $ python3 -m pytest tests/test_load_context.py -v
43
+ ============================== 7 passed in 0.07s ===============================
44
+
45
+ Exit code 0, 7/7 grün. Damit ist der Fix verifiziert.
46
+ ```
47
+
48
+ ### Why it works
49
+
50
+ Literal command line, fresh output, exit code, pass count — all in
51
+ this message. The user can scroll up and see the same shape that
52
+ the test runner produced. No hedging vocabulary.
53
+
54
+ ## Demo 2 — Trusting an earlier run
55
+
56
+ ### Wrong shape
57
+
58
+ ```
59
+ [12 messages earlier, agent ran tests — green]
60
+
61
+ User: also fertig zum committen?
62
+
63
+ Agent: Ja, die Tests waren ja grün vorhin. Du kannst committen.
64
+ ```
65
+
66
+ **Failure mode:** "Trusting a previous run from earlier in the
67
+ conversation" — explicit red flag. Code drifted in the meantime;
68
+ the earlier run is stale evidence.
69
+
70
+ ### Right shape
71
+
72
+ ```
73
+ User: also fertig zum committen?
74
+
75
+ Agent: Ich verifiziere kurz — Tests laufen jetzt:
76
+
77
+ $ python3 -m pytest tests/ -q
78
+ ... 47 passed in 2.31s
79
+
80
+ Grün. Plus ein quality-tools-Lauf:
81
+
82
+ $ task ci
83
+ ... ✅ All checks passed
84
+
85
+ Jetzt ist der Stand sauber.
86
+ ```
87
+
88
+ ### Why it works
89
+
90
+ Re-runs both gates *now*. The `quality-tools` skill handles the
91
+ specific commands; the rule only mandates "fresh in this message".
92
+ "Stand sauber" is anchored on actual exit codes, not on memory.
93
+
94
+ ## Demo 3 — Partial verification claimed as complete
95
+
96
+ ### Wrong shape
97
+
98
+ ```
99
+ User: ready für den PR?
100
+
101
+ Agent: Tests sind grün, ich öffne den PR.
102
+ ```
103
+
104
+ **Failure mode:** "Relying on partial verification (ran tests but
105
+ not PHPStan)" — explicit red flag. PR-readiness needs the full
106
+ gate, not a single channel.
107
+
108
+ ### Right shape
109
+
110
+ ```
111
+ User: ready für den PR?
112
+
113
+ Agent: Ich check beide Gates:
114
+
115
+ $ vendor/bin/pest
116
+ ... 47 passed
117
+ $ vendor/bin/phpstan analyse
118
+ ... [OK] No errors
119
+
120
+ Beide grün. Soll ich den PR jetzt erstellen?
121
+ ```
122
+
123
+ ### Why it works
124
+
125
+ Both gates run fresh, both outputs in the message, exit summaries
126
+ visible. The follow-up question is permission-gated per
127
+ `scope-control` § Git operations — the verification is done
128
+ independently of the user's go-ahead.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@event4u/agent-config",
3
- "version": "1.17.0",
3
+ "version": "1.19.0",
4
4
  "description": "Shared agent configuration \u2014 skills, rules, commands, guidelines, and templates for AI coding tools",
5
5
  "license": "MIT",
6
6
  "private": false,
@@ -72,9 +72,24 @@ Commands:
72
72
  (CHECKPOINT fallback for platforms without native hooks)
73
73
  roadmap-progress:hook PostToolUse hook entry point (read JSON from stdin)
74
74
  Regenerates roadmaps-progress.md when a tool wrote under agents/roadmaps/
75
+ onboarding-gate:hook Hook entry point (drains stdin)
76
+ Writes .augment/state/onboarding-gate.json from .agent-settings.yml
77
+ context-hygiene:hook PostToolUse hook entry point (read JSON from stdin)
78
+ Maintains .augment/state/context-hygiene.json (turn count, loop, freshness)
79
+ dispatch:hook Universal hook dispatcher (Phase 7, hook-architecture-v1.md)
80
+ Usage: dispatch:hook --platform <name> --event <event> [--native-event <native>]
81
+ Reads scripts/hook_manifest.yaml and runs the resolved concern chain.
82
+ hooks:status Print the runtime hook matrix (per-platform install + bindings)
83
+ Flags: --format json|table, --strict (CI), --project-root <path>
75
84
  telemetry:record Append one artefact-engagement event (default-off)
76
85
  telemetry:status Print artefact-engagement telemetry status (read-only)
77
86
  telemetry:report Aggregate the engagement log into a quartile report
87
+ council:estimate Pre-call council cost preview (no API call, no spend)
88
+ Usage: council:estimate <question> [--input-mode prompt|roadmap]
89
+ council:run Run the council. Requires --confirm to spend.
90
+ Usage: council:run <question> --output <path> --confirm
91
+ council:render Re-render a saved council responses JSON to markdown
92
+ Usage: council:render <responses.json>
78
93
  help Show this help
79
94
  --version, -V Print package version
80
95
 
@@ -98,6 +113,9 @@ Examples:
98
113
  ./agent-config telemetry:status --format json
99
114
  ./agent-config telemetry:report --since 30d --top 20
100
115
  ./agent-config telemetry:report --since 7d --format json --top 0
116
+ ./agent-config council:estimate prompt.txt
117
+ ./agent-config council:run prompt.txt --output agents/council-sessions/out.json --confirm
118
+ ./agent-config council:render agents/council-sessions/out.json
101
119
 
102
120
  All commands operate on the CURRENT DIRECTORY (your project root).
103
121
  The CLI is strictly consumer-facing. Maintainer tasks live in Taskfile.yml.
@@ -325,6 +343,34 @@ cmd_roadmap_progress_hook() {
325
343
  exec python3 "$script" "$@"
326
344
  }
327
345
 
346
+ cmd_onboarding_gate_hook() {
347
+ require_python3
348
+ local script
349
+ script="$(resolve_script "scripts/onboarding_gate_hook.py")" || return 1
350
+ exec python3 "$script" "$@"
351
+ }
352
+
353
+ cmd_context_hygiene_hook() {
354
+ require_python3
355
+ local script
356
+ script="$(resolve_script "scripts/context_hygiene_hook.py")" || return 1
357
+ exec python3 "$script" "$@"
358
+ }
359
+
360
+ cmd_dispatch_hook() {
361
+ require_python3
362
+ local script
363
+ script="$(resolve_script "scripts/hooks/dispatch_hook.py")" || return 1
364
+ exec python3 "$script" "$@"
365
+ }
366
+
367
+ cmd_hooks_status() {
368
+ require_python3
369
+ local script
370
+ script="$(resolve_script "scripts/hooks_status.py")" || return 1
371
+ exec python3 "$script" "$@"
372
+ }
373
+
328
374
  cmd_chat_history_checkpoint() {
329
375
  require_python3
330
376
  local script
@@ -420,6 +466,17 @@ cmd_keys_install_openai() {
420
466
  exec bash "$script" "$@"
421
467
  }
422
468
 
469
+ # Council CLI — non-interactive wrapper around scripts.ai_council.orchestrator.
470
+ # Three subcommands share one Python entry point; we forward the subcommand
471
+ # verb so `./agent-config council:run --confirm` lands on `council_cli.py run`.
472
+ cmd_council() {
473
+ require_python3
474
+ local sub="$1"; shift || true
475
+ local script
476
+ script="$(resolve_script "scripts/council_cli.py")" || return 1
477
+ exec env PYTHONPATH="$PACKAGE_ROOT" python3 "$script" "$sub" "$@"
478
+ }
479
+
423
480
  main() {
424
481
  local cmd="${1-}"
425
482
  [[ $# -gt 0 ]] && shift || true
@@ -446,9 +503,16 @@ main() {
446
503
  chat-history:hook) cmd_chat_history_hook "$@" ;;
447
504
  chat-history:checkpoint) cmd_chat_history_checkpoint "$@" ;;
448
505
  roadmap-progress:hook) cmd_roadmap_progress_hook "$@" ;;
506
+ onboarding-gate:hook) cmd_onboarding_gate_hook "$@" ;;
507
+ context-hygiene:hook) cmd_context_hygiene_hook "$@" ;;
508
+ dispatch:hook) cmd_dispatch_hook "$@" ;;
509
+ hooks:status) cmd_hooks_status "$@" ;;
449
510
  telemetry:record) cmd_telemetry_record "$@" ;;
450
511
  telemetry:status) cmd_telemetry_status "$@" ;;
451
512
  telemetry:report) cmd_telemetry_report "$@" ;;
513
+ council:estimate) cmd_council estimate "$@" ;;
514
+ council:run) cmd_council run "$@" ;;
515
+ council:render) cmd_council render "$@" ;;
452
516
  help|--help|-h|"") usage ;;
453
517
  --version|-V) print_version ;;
454
518
  *)
@@ -38,11 +38,11 @@ class CouncilContext:
38
38
  # placeholder. Order matters — the most specific pattern goes first.
39
39
 
40
40
  _REDACTION_LINE_PATTERNS: list[tuple[re.Pattern[str], str]] = [
41
- (re.compile(r".*~?/?\.config/agent-config/[^/\s]+\.key.*"),
41
+ (re.compile(r"~?/?\.config/agent-config/[^/\s]+\.key"),
42
42
  "[redacted: agent-config key path]"),
43
- (re.compile(r"^\s*Authorization:\s.*", re.IGNORECASE),
43
+ (re.compile(r"^\s*Authorization:\s", re.IGNORECASE),
44
44
  "[redacted: Authorization header]"),
45
- (re.compile(r"(?i).*(api[_-]?key|secret|token|password)\s*[:=].*"),
45
+ (re.compile(r"(?i)(api[_-]?key|secret|token|password)\s*[:=]"),
46
46
  "[redacted: secret-like assignment]"),
47
47
  (re.compile(r"sk-ant-[A-Za-z0-9_\-]{8,}"), "[redacted: anthropic-key-like token]"),
48
48
  (re.compile(r"sk-[A-Za-z0-9_\-]{20,}"), "[redacted: openai-key-like token]"),
@@ -34,6 +34,16 @@ OPENAI_KEY_PATH = Path.home() / ".config" / "agent-config" / "openai.key"
34
34
  DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-5"
35
35
  DEFAULT_OPENAI_MODEL = "gpt-4o"
36
36
 
37
+ # OpenAI reasoning models (o1, o3, o4 families) reject `max_tokens` and the
38
+ # `system` role; they require `max_completion_tokens` and accept only `user`
39
+ # (and `developer`) messages.
40
+ _REASONING_PREFIXES = ("o1", "o3", "o4")
41
+
42
+
43
+ def _is_reasoning_model(model: str) -> bool:
44
+ name = model.lower()
45
+ return any(name == p or name.startswith(p + "-") for p in _REASONING_PREFIXES)
46
+
37
47
 
38
48
  class KeyGateError(RuntimeError):
39
49
  """Raised when a provider key file violates the 0600 contract."""
@@ -189,15 +199,21 @@ class OpenAIClient(ExternalAIClient):
189
199
 
190
200
  def ask(self, system_prompt: str, user_prompt: str, max_tokens: int = 1024) -> CouncilResponse:
191
201
  t0 = time.monotonic()
202
+ kwargs: dict[str, object] = {"model": self.model}
203
+ if _is_reasoning_model(self.model):
204
+ # o1/o3/o4 reasoning models reject `max_tokens` and `system` role.
205
+ kwargs["max_completion_tokens"] = max_tokens
206
+ kwargs["messages"] = [
207
+ {"role": "user", "content": f"{system_prompt}\n\n---\n\n{user_prompt}"},
208
+ ]
209
+ else:
210
+ kwargs["max_tokens"] = max_tokens
211
+ kwargs["messages"] = [
212
+ {"role": "system", "content": system_prompt},
213
+ {"role": "user", "content": user_prompt},
214
+ ]
192
215
  try:
193
- response = self._client.chat.completions.create(
194
- model=self.model,
195
- max_tokens=max_tokens,
196
- messages=[
197
- {"role": "system", "content": system_prompt},
198
- {"role": "user", "content": user_prompt},
199
- ],
200
- )
216
+ response = self._client.chat.completions.create(**kwargs)
201
217
  except Exception as exc: # noqa: BLE001 - normalise all SDK errors
202
218
  return CouncilResponse(
203
219
  provider=self.name, model=self.model, text="",
@@ -0,0 +1,67 @@
1
+ # One-off archive — 2026-05
2
+
3
+ > Archived per **Phase 0a.2** of `agents/roadmaps/road-to-rule-hardening.md`.
4
+ > Each script here was a single-purpose AI-council probe or measurement
5
+ > tied to a specific phase of `road-to-structural-optimization.md` (now
6
+ > archived) or `road-to-rule-hardening.md`. The session output lives
7
+ > under `agents/council-sessions/` (durable evidence) and the linter
8
+ > `scripts/check_one_off_location.py` enforces that no new
9
+ > `_one_off_*.py` lands outside this folder.
10
+
11
+ ## Going forward — use the CLI, not new one-offs
12
+
13
+ > **Canonical pattern (Phase 6.7+):** new council runs go through
14
+ > `./agent-config council:{estimate,run,render}`. The CLI handles
15
+ > bundling, redaction, the cost gate, the `0600` key contract, the
16
+ > `enabled` check, and session persistence — every concern these
17
+ > archived one-offs reimplemented inline.
18
+ >
19
+ > ```bash
20
+ > ./agent-config council:estimate <question.md>
21
+ > ./agent-config council:run <question.md> \
22
+ > --output agents/council-sessions/<UTC-ts>.json --confirm
23
+ > ./agent-config council:render agents/council-sessions/<UTC-ts>.json
24
+ > ```
25
+ >
26
+ > Wire-level access (`scripts.ai_council.orchestrator`,
27
+ > `scripts.ai_council.bundler`) is still public for tests and library
28
+ > use, but writing a new `_one_off_*.py` purely to fan out to the
29
+ > council members is **not** the path. The scripts below are kept as
30
+ > historical evidence of the runs that produced specific roadmap
31
+ > decisions; they are not a template for new work.
32
+
33
+ ## Lifecycle rule (uniform — Phase 0.2 of context-layer-maturity)
34
+
35
+ > A one-off is **archived**, never deleted. The session manifest under
36
+ > `agents/council-sessions/` is the audit trail; the script itself is
37
+ > kept here so a future contributor can re-read intent, re-run a probe
38
+ > on a future branch, or extract a reusable helper.
39
+
40
+ ## Inventory
41
+
42
+ | Script | Roadmap / Phase | Council session id |
43
+ |---|---|---|
44
+ | `_one_off_2a4_acceptance.py` | structural-optimization 2A.4 | various 2A sessions |
45
+ | `_one_off_context_layer_v1_estimate.py` | context-layer-maturity v1 cost estimate | `2026-05-03T17-56-21Z` |
46
+ | `_one_off_context_layer_v1_review.py` | context-layer-maturity v1 review | `2026-05-03T17-56-21Z` |
47
+ | `_one_off_followups_review.py` | road-to-1-16-followups review | session under `agents/council-sessions/` |
48
+ | `_one_off_nondestructive_inline_audit.py` | non-destructive-by-default audit | session under `agents/council-sessions/` |
49
+ | `_one_off_phase4_dispatch_latency.py` | structural-optimization 4.3.1 cluster latency benchmark | local benchmark, no council |
50
+ | `_one_off_phase6_trigger_jaccard.py` | structural-optimization Phase 6 trigger overlap | local measurement |
51
+ | `_one_off_phase_2a_budget_rebalance.py` | structural-optimization 2A budget rebalance | `2026-05-03T*` |
52
+ | `_one_off_phase_2a_post_revert.py` | structural-optimization 2A post-revert | `2026-05-03T*` |
53
+ | `_one_off_rebalancing_audit.py` | rebalancing roadmap audit | session under `agents/council-sessions/` |
54
+ | `_one_off_roundtrip.py` | council client roundtrip smoke test | local smoke test |
55
+ | `_one_off_rule_hardening_v1.py` | rule-hardening v1 review | `2026-05-03T19-16-25Z` |
56
+ | `_one_off_structural_open_questions.py` | structural-optimization open questions | session under `agents/council-sessions/` |
57
+ | `_one_off_structural_optimization.py` | structural-optimization initial review | session under `agents/council-sessions/` |
58
+ | `_one_off_structural_v3_gaps.py` | structural-optimization v3 gap audit | session under `agents/council-sessions/` |
59
+ | `_one_off_structural_v3_review.py` | structural-optimization v3 review | session under `agents/council-sessions/` |
60
+
61
+ ## Re-running an archived script
62
+
63
+ Imports may have shifted (e.g. `scripts.ai_council.*`). If a probe
64
+ needs to be re-run against a current branch, copy it back to its
65
+ original location, fix imports, run, then move the working copy
66
+ back here. Do **not** edit in place — keep the archive immutable
67
+ beyond cosmetic README updates.
@@ -0,0 +1,206 @@
1
+ """Council audit of Budget-v2 result (Phase 4.5 of road-to-context-layer-maturity).
2
+
3
+ Phase 4 of road-to-context-layer-maturity selected two 4d-trim paths
4
+ (`direct-answers`, `no-cheap-questions`) from a fixed option set
5
+ documented in agents/contexts/budget-v2-matrix.md and shipped them.
6
+ Exit-gate actuals (run 2026-05-04): total 44,928 / 49,000 chars
7
+ (91.7 %, 4,072 chars headroom) — ≥ 4,000 headroom goal hit. Top-3
8
+ sum unchanged. Safety-floor rules untouched.
9
+
10
+ Phase 4.5 requires a council audit before archival: confirm the
11
+ trim choices were sound, no semantic drift introduced, no better
12
+ path missed inside the Phase 4 inputs gate.
13
+
14
+ Invocation:
15
+ .venv/bin/python -m scripts.ai_council.one_off_archive.2026-05._one_off_budget_v2_audit
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import sys
20
+ from pathlib import Path
21
+
22
+ from scripts.ai_council.bundler import bundle_files
23
+ from scripts.ai_council.clients import (
24
+ AnthropicClient,
25
+ OpenAIClient,
26
+ load_anthropic_key,
27
+ load_openai_key,
28
+ )
29
+ from scripts.ai_council.orchestrator import (
30
+ CostBudget,
31
+ CouncilQuestion,
32
+ consult,
33
+ estimate,
34
+ )
35
+ from scripts.ai_council.pricing import estimate_cost, load_prices
36
+ from scripts.ai_council.project_context import detect_project_context
37
+ from scripts.ai_council.session import SessionManifest, save as save_session
38
+
39
+ REPO_ROOT = Path(__file__).resolve().parents[4]
40
+ ARTEFACTS = [
41
+ REPO_ROOT / "docs/contracts/load-context-budget-model.md",
42
+ REPO_ROOT / "agents/contexts/budget-v2-matrix.md",
43
+ REPO_ROOT / ".agent-src.uncompressed/rules/direct-answers.md",
44
+ REPO_ROOT / ".agent-src.uncompressed/rules/no-cheap-questions.md",
45
+ ]
46
+
47
+ ORIGINAL_ASK = (
48
+ "Phase 4 of road-to-context-layer-maturity trimmed two always-rules "
49
+ "(direct-answers, no-cheap-questions) under the locked Model (b) "
50
+ "literal budget contract, hitting the ≥ 4,000-chars headroom goal "
51
+ "(actual: 4,072). Council task: audit the trim choices for "
52
+ "soundness and semantic drift before roadmap archival."
53
+ )
54
+
55
+ REVIEW_PROMPT = """\
56
+ # Council Audit — Budget-v2 Trim Result (Phase 4.5)
57
+
58
+ ## Context
59
+
60
+ Phase 4 selected two 4d-trim paths from a fixed option set documented
61
+ in `budget-v2-matrix.md`. The matrix evaluated 4a (demote→auto), 4b
62
+ (merge), 4c (shared-context, locked at 3a Model (b) literal — no-op),
63
+ and 4d (compress prose) for every touchable always-rule. Safety-floor
64
+ rules (scope-control, non-destructive-by-default, commit-policy,
65
+ agent-authority) were untouchable. Outcome-untested rules were
66
+ restricted to 4d only per the Phase 4.0 inputs gate.
67
+
68
+ ## Selected paths and result
69
+
70
+ - **4d on `direct-answers`** — emoji-scope subsection trimmed,
71
+ failure-mode collapsed to pointer. Δ ext: 4,098 → 3,987 (−111).
72
+ - **4d on `no-cheap-questions`** — "What counts as cheap" subsection
73
+ collapsed to pointer at `asking-and-brevity-examples.md`. Δ ext:
74
+ 4,257 → 3,933 (−324).
75
+
76
+ Combined: −435 chars · headroom 3,637 → 4,072 (+435) · top-3 sum
77
+ unchanged · safety-floor rules untouched.
78
+
79
+ ## Audit questions (please address each)
80
+
81
+ 1. **Trim soundness** — do the surviving Iron Laws in both rules still
82
+ carry the rule's purpose, or did the prose trim sacrifice precision?
83
+ Cite the specific subsection if you find drift.
84
+
85
+ 2. **Path selection** — was 4d the right choice for these two rules
86
+ given the matrix? Or should one of the deferred paths (4a, 4b)
87
+ have been picked despite the matrix verdict?
88
+
89
+ 3. **Missed leverage** — inside the Phase 4 inputs gate (4d only on
90
+ outcome-untested rules; safety-floor untouchable), is there a
91
+ higher-leverage 4d target the matrix missed?
92
+
93
+ 4. **Headroom durability** — 4,072 chars is +72 over the 4,000 goal.
94
+ Is this margin stable against expected near-term rule edits, or
95
+ should Phase 5 be tightened to defend it?
96
+
97
+ ## Output Contract (STRICT)
98
+
99
+ ```
100
+ ### Verdict
101
+ **Trim choices sound:** <YES — archive · NO — escalate>
102
+ **One-sentence rationale:** <≤ 30 words>
103
+ ```
104
+
105
+ ```
106
+ ### Per-question findings (1–4 above)
107
+ 1. <≤ 2 sentences>
108
+ 2. <≤ 2 sentences>
109
+ 3. <≤ 2 sentences>
110
+ 4. <≤ 2 sentences>
111
+ ```
112
+
113
+ ```
114
+ ### Risk note
115
+ **Single biggest residual risk:** <one sentence>
116
+ **Mitigation (if any):** <one sentence or NONE>
117
+ ```
118
+
119
+ Be decisive — total response ≤ 800 words. Artefacts follow verbatim.
120
+ """
121
+
122
+
123
+ def main() -> int:
124
+ anthropic = AnthropicClient(api_key=load_anthropic_key(), model="claude-sonnet-4-5")
125
+ openai = OpenAIClient(api_key=load_openai_key(), model="gpt-4o")
126
+ members = [anthropic, openai]
127
+
128
+ context = bundle_files(ARTEFACTS)
129
+ project = detect_project_context(REPO_ROOT)
130
+ table = load_prices()
131
+
132
+ user_prompt = REVIEW_PROMPT + "\n\n---\n\n" + context.text
133
+
134
+ question = CouncilQuestion(mode="files", user_prompt=user_prompt, max_tokens=2048)
135
+
136
+ estimates = estimate(question, members, table, project=project, original_ask=ORIGINAL_ASK)
137
+ print("=== ESTIMATE (single round) ===")
138
+ total_est = 0.0
139
+ for c, e in zip(members, estimates):
140
+ print(f" {c.name}/{c.model}: ~{e.input_tokens} in + {e.output_tokens} out = ${e.total_usd:.4f}")
141
+ total_est += e.total_usd
142
+ print(f" TOTAL per round (max): ${total_est:.4f}\n")
143
+
144
+ budget = CostBudget(
145
+ max_input_tokens=200_000,
146
+ max_output_tokens=80_000,
147
+ max_calls=20,
148
+ max_total_usd=2.50,
149
+ )
150
+
151
+ rounds_collected: list[list] = []
152
+
153
+ def _on_round_complete(round_idx: int, round_responses) -> None:
154
+ rounds_collected.append(list(round_responses))
155
+ print(f"=== ROUND {round_idx + 1} COMPLETE ===")
156
+ for r in round_responses:
157
+ if r.error:
158
+ print(f" [error] {r.provider}/{r.model}: {r.error}")
159
+ continue
160
+ actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
161
+ print(f" [done] {r.provider}/{r.model}: {r.input_tokens} in / "
162
+ f"{r.output_tokens} out · {r.latency_ms} ms · ${actual.total_usd:.4f}")
163
+ print()
164
+
165
+ print("=== CONSULT (1 round, Phase 4.5 Budget-v2 audit) ===")
166
+ consult(
167
+ members, question, budget,
168
+ rounds=1,
169
+ on_round_complete=_on_round_complete,
170
+ table=table, project=project, original_ask=ORIGINAL_ASK,
171
+ )
172
+
173
+ if not rounds_collected:
174
+ print("[error] no rounds completed", file=sys.stderr)
175
+ return 1
176
+
177
+ actual_total = 0.0
178
+ for round_responses in rounds_collected:
179
+ for r in round_responses:
180
+ if r.error:
181
+ continue
182
+ actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
183
+ actual_total += actual.total_usd
184
+ print(f"=== TOTAL ACTUAL: ${actual_total:.4f} ===")
185
+
186
+ final_round = rounds_collected[-1]
187
+ if not [r for r in final_round if not r.error]:
188
+ return 1
189
+
190
+ manifest = SessionManifest(
191
+ mode="files",
192
+ artefact="agents/roadmaps/road-to-context-layer-maturity.md",
193
+ original_ask=ORIGINAL_ASK,
194
+ members=[f"{r.provider}/{r.model}" for r in final_round],
195
+ rounds=len(rounds_collected),
196
+ cost_usd_estimated=total_est,
197
+ cost_usd_actual=actual_total,
198
+ extra={"purpose": "Phase 4.5 Budget-v2 trim-result audit"},
199
+ )
200
+ session_dir = save_session(manifest=manifest, responses=rounds_collected)
201
+ print(f"[saved] {session_dir.relative_to(REPO_ROOT)}/")
202
+ return 1 if any(r.error for round_r in rounds_collected for r in round_r) else 0
203
+
204
+
205
+ if __name__ == "__main__":
206
+ raise SystemExit(main())
@@ -1,14 +1,19 @@
1
- """One-off Phase-1 round-trip runner.
1
+ """One-off Phase-1 round-trip runner — HISTORICAL ARCHIVE.
2
2
 
3
- Used exactly once to generate the evidence artefact required to lift
4
- the capture-only fence on `road-to-ai-council.md` Phase 2+ and the
5
- end-to-end verification on `road-to-council-modes.md` Phase 2a.
3
+ Going forward, council runs go through the CLI:
6
4
 
7
- Not part of the public CLI surface — `/council` remains the supported
8
- entry point. This script is committed under `scripts/ai_council/` so
9
- the evidence is reproducible from the git history alone.
5
+ ./agent-config council:estimate <question.md>
6
+ ./agent-config council:run <question.md> \
7
+ --output agents/council-sessions/<UTC-ts>.json --confirm
8
+ ./agent-config council:render agents/council-sessions/<UTC-ts>.json
10
9
 
11
- Invocation:
10
+ This script predates `scripts/council_cli.py` (Phase 6.7) and is kept
11
+ only as the evidence artefact that lifted the capture-only fence on
12
+ `road-to-ai-council.md` Phase 2+ and the end-to-end verification on
13
+ `road-to-council-modes.md` Phase 2a. Do **not** copy it as a template
14
+ for new one-offs — write a question file and use the CLI instead.
15
+
16
+ Invocation (historical):
12
17
  .venv/bin/python -m scripts.ai_council._one_off_roundtrip
13
18
  """
14
19
  from __future__ import annotations