@event4u/agent-config 1.17.0 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/council/default.md +74 -76
- package/.agent-src/commands/feature/roadmap.md +22 -0
- package/.agent-src/commands/roadmap/create.md +38 -6
- package/.agent-src/commands/roadmap/execute.md +36 -9
- package/.agent-src/rules/agent-authority.md +1 -0
- package/.agent-src/rules/agent-docs.md +1 -0
- package/.agent-src/rules/analysis-skill-routing.md +1 -0
- package/.agent-src/rules/architecture.md +1 -0
- package/.agent-src/rules/artifact-drafting-protocol.md +1 -0
- package/.agent-src/rules/artifact-engagement-recording.md +1 -0
- package/.agent-src/rules/ask-when-uncertain.md +1 -0
- package/.agent-src/rules/augment-portability.md +1 -0
- package/.agent-src/rules/augment-source-of-truth.md +1 -0
- package/.agent-src/rules/autonomous-execution.md +1 -0
- package/.agent-src/rules/capture-learnings.md +1 -0
- package/.agent-src/rules/chat-history-cadence.md +34 -0
- package/.agent-src/rules/chat-history-ownership.md +1 -0
- package/.agent-src/rules/chat-history-visibility.md +1 -0
- package/.agent-src/rules/cli-output-handling.md +2 -2
- package/.agent-src/rules/command-suggestion-policy.md +1 -0
- package/.agent-src/rules/commit-conventions.md +1 -0
- package/.agent-src/rules/commit-policy.md +1 -0
- package/.agent-src/rules/context-hygiene.md +28 -0
- package/.agent-src/rules/direct-answers.md +18 -26
- package/.agent-src/rules/docker-commands.md +1 -0
- package/.agent-src/rules/docs-sync.md +1 -0
- package/.agent-src/rules/downstream-changes.md +1 -0
- package/.agent-src/rules/e2e-testing.md +1 -0
- package/.agent-src/rules/guidelines.md +1 -0
- package/.agent-src/rules/improve-before-implement.md +1 -0
- package/.agent-src/rules/language-and-tone.md +1 -0
- package/.agent-src/rules/laravel-translations.md +1 -0
- package/.agent-src/rules/markdown-safe-codeblocks.md +1 -0
- package/.agent-src/rules/minimal-safe-diff.md +1 -0
- package/.agent-src/rules/missing-tool-handling.md +1 -0
- package/.agent-src/rules/model-recommendation.md +1 -0
- package/.agent-src/rules/no-cheap-questions.md +15 -21
- package/.agent-src/rules/no-roadmap-references.md +1 -0
- package/.agent-src/rules/non-destructive-by-default.md +1 -0
- package/.agent-src/rules/onboarding-gate.md +33 -0
- package/.agent-src/rules/package-ci-checks.md +1 -0
- package/.agent-src/rules/php-coding.md +1 -0
- package/.agent-src/rules/preservation-guard.md +1 -0
- package/.agent-src/rules/review-routing-awareness.md +1 -0
- package/.agent-src/rules/reviewer-awareness.md +1 -0
- package/.agent-src/rules/roadmap-progress-sync.md +49 -0
- package/.agent-src/rules/role-mode-adherence.md +2 -2
- package/.agent-src/rules/rule-type-governance.md +29 -0
- package/.agent-src/rules/runtime-safety.md +1 -0
- package/.agent-src/rules/scope-control.md +1 -0
- package/.agent-src/rules/security-sensitive-stop.md +1 -0
- package/.agent-src/rules/size-enforcement.md +1 -0
- package/.agent-src/rules/skill-improvement-trigger.md +1 -0
- package/.agent-src/rules/skill-quality.md +1 -0
- package/.agent-src/rules/slash-command-routing-policy.md +39 -0
- package/.agent-src/rules/think-before-action.md +1 -0
- package/.agent-src/rules/token-efficiency.md +1 -0
- package/.agent-src/rules/tool-safety.md +1 -0
- package/.agent-src/rules/ui-audit-gate.md +1 -0
- package/.agent-src/rules/upstream-proposal.md +1 -0
- package/.agent-src/rules/user-interaction.md +1 -0
- package/.agent-src/rules/verify-before-complete.md +1 -0
- package/.agent-src/skills/roadmap-management/SKILL.md +29 -4
- package/.agent-src/skills/verify-completion-evidence/SKILL.md +8 -1
- package/.agent-src/templates/agent-settings.md +16 -0
- package/.agent-src/templates/roadmaps.md +12 -3
- package/.agent-src/templates/scripts/work_engine/hook_bootstrap.py +9 -0
- package/.agent-src/templates/scripts/work_engine/hooks/__init__.py +4 -0
- package/.agent-src/templates/scripts/work_engine/hooks/builtin/__init__.py +4 -0
- package/.agent-src/templates/scripts/work_engine/hooks/builtin/decision_trace.py +163 -0
- package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py +111 -0
- package/.agent-src/templates/scripts/work_engine/hooks/settings.py +36 -0
- package/.agent-src/templates/scripts/work_engine/scoring/decision_trace.py +141 -0
- package/.agent-src/templates/scripts/work_engine/scoring/memory_visibility.py +125 -0
- package/.claude-plugin/marketplace.json +1 -1
- package/CHANGELOG.md +97 -0
- package/README.md +20 -20
- package/config/agent-settings.template.yml +23 -0
- package/docs/architecture.md +1 -1
- package/docs/catalog.md +5 -2
- package/docs/contracts/adr-settings-sync-engine.md +127 -0
- package/docs/contracts/decision-trace-v1.md +146 -0
- package/docs/contracts/file-ownership-matrix.json +7 -0
- package/docs/contracts/hook-architecture-v1.md +213 -0
- package/docs/contracts/load-context-budget-model.md +80 -0
- package/docs/contracts/load-context-schema.md +20 -0
- package/docs/contracts/memory-visibility-v1.md +138 -0
- package/docs/contracts/one-off-script-lifecycle.md +109 -0
- package/docs/contracts/roadmap-complexity-standard.md +137 -0
- package/docs/contracts/rule-interactions.yml +22 -0
- package/docs/customization.md +1 -0
- package/docs/development.md +4 -1
- package/docs/guidelines/agent-infra/ask-when-uncertain-demos.md +134 -0
- package/docs/guidelines/agent-infra/direct-answers-demos.md +145 -0
- package/docs/guidelines/agent-infra/layered-settings.md +32 -13
- package/docs/guidelines/agent-infra/verify-before-complete-demos.md +128 -0
- package/package.json +1 -1
- package/scripts/agent-config +64 -0
- package/scripts/ai_council/bundler.py +3 -3
- package/scripts/ai_council/clients.py +24 -8
- package/scripts/ai_council/one_off_archive/2026-05/README.md +67 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_budget_v2_audit.py +206 -0
- package/scripts/ai_council/{_one_off_roundtrip.py → one_off_archive/2026-05/_one_off_roundtrip.py} +13 -8
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_tier_retrofit.py +180 -0
- package/scripts/ai_council/session.py +92 -0
- package/scripts/build_rule_trigger_matrix.py +360 -0
- package/scripts/capture_showcase_session.py +361 -0
- package/scripts/chat_history.py +11 -1
- package/scripts/check_always_budget.py +46 -2
- package/scripts/check_one_off_location.py +81 -0
- package/scripts/check_references.py +6 -0
- package/scripts/compress.py +5 -2
- package/scripts/context_hygiene_hook.py +181 -0
- package/scripts/council_cli.py +357 -0
- package/scripts/hook_manifest.yaml +184 -0
- package/scripts/hooks/__init__.py +1 -0
- package/scripts/hooks/augment-context-hygiene.sh +55 -0
- package/scripts/hooks/augment-dispatcher.sh +72 -0
- package/scripts/hooks/augment-onboarding-gate.sh +55 -0
- package/scripts/hooks/cline-dispatcher.sh +86 -0
- package/scripts/hooks/cursor-dispatcher.sh +76 -0
- package/scripts/hooks/dispatch_hook.py +348 -0
- package/scripts/hooks/envelope.py +98 -0
- package/scripts/hooks/gemini-dispatcher.sh +117 -0
- package/scripts/hooks/state_io.py +122 -0
- package/scripts/hooks/windsurf-dispatcher.sh +123 -0
- package/scripts/hooks_status.py +146 -0
- package/scripts/install.py +728 -51
- package/scripts/install.sh +1 -1
- package/scripts/lint_examples.py +98 -0
- package/scripts/lint_hook_manifest.py +216 -0
- package/scripts/lint_one_off_age.py +184 -0
- package/scripts/lint_roadmap_complexity.py +127 -0
- package/scripts/lint_rule_tiers.py +78 -0
- package/scripts/lint_showcase_sessions.py +148 -0
- package/scripts/minimal_safe_diff_hook.py +245 -0
- package/scripts/onboarding_gate_hook.py +142 -0
- package/scripts/readme_linter.py +12 -3
- package/scripts/roadmap_progress_hook.py +5 -0
- package/scripts/schemas/rule.schema.json +5 -0
- package/scripts/sync_agent_settings.py +32 -129
- package/scripts/sync_yaml_rt.py +734 -0
- package/scripts/verify_before_complete_hook.py +216 -0
- /package/scripts/ai_council/{_one_off_2a4_acceptance.py → one_off_archive/2026-05/_one_off_2a4_acceptance.py} +0 -0
- /package/scripts/ai_council/{_one_off_context_layer_v1_estimate.py → one_off_archive/2026-05/_one_off_context_layer_v1_estimate.py} +0 -0
- /package/scripts/ai_council/{_one_off_context_layer_v1_review.py → one_off_archive/2026-05/_one_off_context_layer_v1_review.py} +0 -0
- /package/scripts/ai_council/{_one_off_followups_review.py → one_off_archive/2026-05/_one_off_followups_review.py} +0 -0
- /package/scripts/ai_council/{_one_off_nondestructive_inline_audit.py → one_off_archive/2026-05/_one_off_nondestructive_inline_audit.py} +0 -0
- /package/scripts/{_one_off_phase4_dispatch_latency.py → ai_council/one_off_archive/2026-05/_one_off_phase4_dispatch_latency.py} +0 -0
- /package/scripts/{_one_off_phase6_trigger_jaccard.py → ai_council/one_off_archive/2026-05/_one_off_phase6_trigger_jaccard.py} +0 -0
- /package/scripts/ai_council/{_one_off_phase_2a_budget_rebalance.py → one_off_archive/2026-05/_one_off_phase_2a_budget_rebalance.py} +0 -0
- /package/scripts/ai_council/{_one_off_phase_2a_post_revert.py → one_off_archive/2026-05/_one_off_phase_2a_post_revert.py} +0 -0
- /package/scripts/ai_council/{_one_off_rebalancing_audit.py → one_off_archive/2026-05/_one_off_rebalancing_audit.py} +0 -0
- /package/scripts/ai_council/{_one_off_rule_hardening_v1.py → one_off_archive/2026-05/_one_off_rule_hardening_v1.py} +0 -0
- /package/scripts/ai_council/{_one_off_structural_open_questions.py → one_off_archive/2026-05/_one_off_structural_open_questions.py} +0 -0
- /package/scripts/ai_council/{_one_off_structural_optimization.py → one_off_archive/2026-05/_one_off_structural_optimization.py} +0 -0
- /package/scripts/ai_council/{_one_off_structural_v3_gaps.py → one_off_archive/2026-05/_one_off_structural_v3_gaps.py} +0 -0
- /package/scripts/ai_council/{_one_off_structural_v3_review.py → one_off_archive/2026-05/_one_off_structural_v3_review.py} +0 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
---
|
|
2
|
+
demo_for: verify-before-complete
|
|
3
|
+
layer: pattern-memory
|
|
4
|
+
prose_delta:
|
|
5
|
+
rule_chars_before: 2196
|
|
6
|
+
rule_chars_after: 2603
|
|
7
|
+
measured_at: phase-3.3-cross-link-landing
|
|
8
|
+
note: "+407 chars from the Examples cross-link block; future prose-pruning rounds (out of scope for Phase 3) will pull pattern prose from the rule into this demo and drive the after-count below the before-count."
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# verify-before-complete — Pattern Memory
|
|
12
|
+
|
|
13
|
+
> Wrong / right / why demos for the `verify-before-complete` Iron
|
|
14
|
+
> Law (no completion claim without fresh evidence in this message).
|
|
15
|
+
> Linked from
|
|
16
|
+
> [`verify-before-complete`](../../.agent-src.uncompressed/rules/verify-before-complete.md)
|
|
17
|
+
> § Examples. Outcome baseline at
|
|
18
|
+
> [`tests/golden/outcomes/verify_before_complete.json`](../../tests/golden/outcomes/verify_before_complete.json).
|
|
19
|
+
|
|
20
|
+
## Demo 1 — Hedged completion claim
|
|
21
|
+
|
|
22
|
+
### Wrong shape
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
User: ist der Fix fertig?
|
|
26
|
+
|
|
27
|
+
Agent: Ja, sollte jetzt durchlaufen — ich habe die fehlende
|
|
28
|
+
Bedingung ergänzt und die Tests müssten grün sein.
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
**Failure mode:** "sollte", "müssten" are red-flag hedges (§ Red
|
|
32
|
+
flags — STOP immediately). No fresh command output in this message.
|
|
33
|
+
The claim rides on inference, not evidence.
|
|
34
|
+
|
|
35
|
+
### Right shape
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
User: ist der Fix fertig?
|
|
39
|
+
|
|
40
|
+
Agent: Frisch gerade ausgeführt:
|
|
41
|
+
|
|
42
|
+
$ python3 -m pytest tests/test_load_context.py -v
|
|
43
|
+
============================== 7 passed in 0.07s ===============================
|
|
44
|
+
|
|
45
|
+
Exit code 0, 7/7 grün. Damit ist der Fix verifiziert.
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Why it works
|
|
49
|
+
|
|
50
|
+
Literal command line, fresh output, exit code, pass count — all in
|
|
51
|
+
this message. The user can scroll up and see the same shape that
|
|
52
|
+
the test runner produced. No hedging vocabulary.
|
|
53
|
+
|
|
54
|
+
## Demo 2 — Trusting an earlier run
|
|
55
|
+
|
|
56
|
+
### Wrong shape
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
[12 messages earlier, agent ran tests — green]
|
|
60
|
+
|
|
61
|
+
User: also fertig zum committen?
|
|
62
|
+
|
|
63
|
+
Agent: Ja, die Tests waren ja grün vorhin. Du kannst committen.
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
**Failure mode:** "Trusting a previous run from earlier in the
|
|
67
|
+
conversation" — explicit red flag. Code drifted in the meantime;
|
|
68
|
+
the earlier run is stale evidence.
|
|
69
|
+
|
|
70
|
+
### Right shape
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
User: also fertig zum committen?
|
|
74
|
+
|
|
75
|
+
Agent: Ich verifiziere kurz — Tests laufen jetzt:
|
|
76
|
+
|
|
77
|
+
$ python3 -m pytest tests/ -q
|
|
78
|
+
... 47 passed in 2.31s
|
|
79
|
+
|
|
80
|
+
Grün. Plus ein quality-tools-Lauf:
|
|
81
|
+
|
|
82
|
+
$ task ci
|
|
83
|
+
... ✅ All checks passed
|
|
84
|
+
|
|
85
|
+
Jetzt ist der Stand sauber.
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Why it works
|
|
89
|
+
|
|
90
|
+
Re-runs both gates *now*. The `quality-tools` skill handles the
|
|
91
|
+
specific commands; the rule only mandates "fresh in this message".
|
|
92
|
+
"Stand sauber" is anchored on actual exit codes, not on memory.
|
|
93
|
+
|
|
94
|
+
## Demo 3 — Partial verification claimed as complete
|
|
95
|
+
|
|
96
|
+
### Wrong shape
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
User: ready für den PR?
|
|
100
|
+
|
|
101
|
+
Agent: Tests sind grün, ich öffne den PR.
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
**Failure mode:** "Relying on partial verification (ran tests but
|
|
105
|
+
not PHPStan)" — explicit red flag. PR-readiness needs the full
|
|
106
|
+
gate, not a single channel.
|
|
107
|
+
|
|
108
|
+
### Right shape
|
|
109
|
+
|
|
110
|
+
```
|
|
111
|
+
User: ready für den PR?
|
|
112
|
+
|
|
113
|
+
Agent: Ich check beide Gates:
|
|
114
|
+
|
|
115
|
+
$ vendor/bin/pest
|
|
116
|
+
... 47 passed
|
|
117
|
+
$ vendor/bin/phpstan analyse
|
|
118
|
+
... [OK] No errors
|
|
119
|
+
|
|
120
|
+
Beide grün. Soll ich den PR jetzt erstellen?
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Why it works
|
|
124
|
+
|
|
125
|
+
Both gates run fresh, both outputs in the message, exit summaries
|
|
126
|
+
visible. The follow-up question is permission-gated per
|
|
127
|
+
`scope-control` § Git operations — the verification is done
|
|
128
|
+
independently of the user's go-ahead.
|
package/package.json
CHANGED
package/scripts/agent-config
CHANGED
|
@@ -72,9 +72,24 @@ Commands:
|
|
|
72
72
|
(CHECKPOINT fallback for platforms without native hooks)
|
|
73
73
|
roadmap-progress:hook PostToolUse hook entry point (read JSON from stdin)
|
|
74
74
|
Regenerates roadmaps-progress.md when a tool wrote under agents/roadmaps/
|
|
75
|
+
onboarding-gate:hook Hook entry point (drains stdin)
|
|
76
|
+
Writes .augment/state/onboarding-gate.json from .agent-settings.yml
|
|
77
|
+
context-hygiene:hook PostToolUse hook entry point (read JSON from stdin)
|
|
78
|
+
Maintains .augment/state/context-hygiene.json (turn count, loop, freshness)
|
|
79
|
+
dispatch:hook Universal hook dispatcher (Phase 7, hook-architecture-v1.md)
|
|
80
|
+
Usage: dispatch:hook --platform <name> --event <event> [--native-event <native>]
|
|
81
|
+
Reads scripts/hook_manifest.yaml and runs the resolved concern chain.
|
|
82
|
+
hooks:status Print the runtime hook matrix (per-platform install + bindings)
|
|
83
|
+
Flags: --format json|table, --strict (CI), --project-root <path>
|
|
75
84
|
telemetry:record Append one artefact-engagement event (default-off)
|
|
76
85
|
telemetry:status Print artefact-engagement telemetry status (read-only)
|
|
77
86
|
telemetry:report Aggregate the engagement log into a quartile report
|
|
87
|
+
council:estimate Pre-call council cost preview (no API call, no spend)
|
|
88
|
+
Usage: council:estimate <question> [--input-mode prompt|roadmap]
|
|
89
|
+
council:run Run the council. Requires --confirm to spend.
|
|
90
|
+
Usage: council:run <question> --output <path> --confirm
|
|
91
|
+
council:render Re-render a saved council responses JSON to markdown
|
|
92
|
+
Usage: council:render <responses.json>
|
|
78
93
|
help Show this help
|
|
79
94
|
--version, -V Print package version
|
|
80
95
|
|
|
@@ -98,6 +113,9 @@ Examples:
|
|
|
98
113
|
./agent-config telemetry:status --format json
|
|
99
114
|
./agent-config telemetry:report --since 30d --top 20
|
|
100
115
|
./agent-config telemetry:report --since 7d --format json --top 0
|
|
116
|
+
./agent-config council:estimate prompt.txt
|
|
117
|
+
./agent-config council:run prompt.txt --output agents/council-sessions/out.json --confirm
|
|
118
|
+
./agent-config council:render agents/council-sessions/out.json
|
|
101
119
|
|
|
102
120
|
All commands operate on the CURRENT DIRECTORY (your project root).
|
|
103
121
|
The CLI is strictly consumer-facing. Maintainer tasks live in Taskfile.yml.
|
|
@@ -325,6 +343,34 @@ cmd_roadmap_progress_hook() {
|
|
|
325
343
|
exec python3 "$script" "$@"
|
|
326
344
|
}
|
|
327
345
|
|
|
346
|
+
cmd_onboarding_gate_hook() {
|
|
347
|
+
require_python3
|
|
348
|
+
local script
|
|
349
|
+
script="$(resolve_script "scripts/onboarding_gate_hook.py")" || return 1
|
|
350
|
+
exec python3 "$script" "$@"
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
cmd_context_hygiene_hook() {
|
|
354
|
+
require_python3
|
|
355
|
+
local script
|
|
356
|
+
script="$(resolve_script "scripts/context_hygiene_hook.py")" || return 1
|
|
357
|
+
exec python3 "$script" "$@"
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
cmd_dispatch_hook() {
|
|
361
|
+
require_python3
|
|
362
|
+
local script
|
|
363
|
+
script="$(resolve_script "scripts/hooks/dispatch_hook.py")" || return 1
|
|
364
|
+
exec python3 "$script" "$@"
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
cmd_hooks_status() {
|
|
368
|
+
require_python3
|
|
369
|
+
local script
|
|
370
|
+
script="$(resolve_script "scripts/hooks_status.py")" || return 1
|
|
371
|
+
exec python3 "$script" "$@"
|
|
372
|
+
}
|
|
373
|
+
|
|
328
374
|
cmd_chat_history_checkpoint() {
|
|
329
375
|
require_python3
|
|
330
376
|
local script
|
|
@@ -420,6 +466,17 @@ cmd_keys_install_openai() {
|
|
|
420
466
|
exec bash "$script" "$@"
|
|
421
467
|
}
|
|
422
468
|
|
|
469
|
+
# Council CLI — non-interactive wrapper around scripts.ai_council.orchestrator.
|
|
470
|
+
# Three subcommands share one Python entry point; we forward the subcommand
|
|
471
|
+
# verb so `./agent-config council:run --confirm` lands on `council_cli.py run`.
|
|
472
|
+
cmd_council() {
|
|
473
|
+
require_python3
|
|
474
|
+
local sub="$1"; shift || true
|
|
475
|
+
local script
|
|
476
|
+
script="$(resolve_script "scripts/council_cli.py")" || return 1
|
|
477
|
+
exec env PYTHONPATH="$PACKAGE_ROOT" python3 "$script" "$sub" "$@"
|
|
478
|
+
}
|
|
479
|
+
|
|
423
480
|
main() {
|
|
424
481
|
local cmd="${1-}"
|
|
425
482
|
[[ $# -gt 0 ]] && shift || true
|
|
@@ -446,9 +503,16 @@ main() {
|
|
|
446
503
|
chat-history:hook) cmd_chat_history_hook "$@" ;;
|
|
447
504
|
chat-history:checkpoint) cmd_chat_history_checkpoint "$@" ;;
|
|
448
505
|
roadmap-progress:hook) cmd_roadmap_progress_hook "$@" ;;
|
|
506
|
+
onboarding-gate:hook) cmd_onboarding_gate_hook "$@" ;;
|
|
507
|
+
context-hygiene:hook) cmd_context_hygiene_hook "$@" ;;
|
|
508
|
+
dispatch:hook) cmd_dispatch_hook "$@" ;;
|
|
509
|
+
hooks:status) cmd_hooks_status "$@" ;;
|
|
449
510
|
telemetry:record) cmd_telemetry_record "$@" ;;
|
|
450
511
|
telemetry:status) cmd_telemetry_status "$@" ;;
|
|
451
512
|
telemetry:report) cmd_telemetry_report "$@" ;;
|
|
513
|
+
council:estimate) cmd_council estimate "$@" ;;
|
|
514
|
+
council:run) cmd_council run "$@" ;;
|
|
515
|
+
council:render) cmd_council render "$@" ;;
|
|
452
516
|
help|--help|-h|"") usage ;;
|
|
453
517
|
--version|-V) print_version ;;
|
|
454
518
|
*)
|
|
@@ -38,11 +38,11 @@ class CouncilContext:
|
|
|
38
38
|
# placeholder. Order matters — the most specific pattern goes first.
|
|
39
39
|
|
|
40
40
|
_REDACTION_LINE_PATTERNS: list[tuple[re.Pattern[str], str]] = [
|
|
41
|
-
(re.compile(r"
|
|
41
|
+
(re.compile(r"~?/?\.config/agent-config/[^/\s]+\.key"),
|
|
42
42
|
"[redacted: agent-config key path]"),
|
|
43
|
-
(re.compile(r"^\s*Authorization:\s
|
|
43
|
+
(re.compile(r"^\s*Authorization:\s", re.IGNORECASE),
|
|
44
44
|
"[redacted: Authorization header]"),
|
|
45
|
-
(re.compile(r"(?i)
|
|
45
|
+
(re.compile(r"(?i)(api[_-]?key|secret|token|password)\s*[:=]"),
|
|
46
46
|
"[redacted: secret-like assignment]"),
|
|
47
47
|
(re.compile(r"sk-ant-[A-Za-z0-9_\-]{8,}"), "[redacted: anthropic-key-like token]"),
|
|
48
48
|
(re.compile(r"sk-[A-Za-z0-9_\-]{20,}"), "[redacted: openai-key-like token]"),
|
|
@@ -34,6 +34,16 @@ OPENAI_KEY_PATH = Path.home() / ".config" / "agent-config" / "openai.key"
|
|
|
34
34
|
DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-5"
|
|
35
35
|
DEFAULT_OPENAI_MODEL = "gpt-4o"
|
|
36
36
|
|
|
37
|
+
# OpenAI reasoning models (o1, o3, o4 families) reject `max_tokens` and the
|
|
38
|
+
# `system` role; they require `max_completion_tokens` and accept only `user`
|
|
39
|
+
# (and `developer`) messages.
|
|
40
|
+
_REASONING_PREFIXES = ("o1", "o3", "o4")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _is_reasoning_model(model: str) -> bool:
|
|
44
|
+
name = model.lower()
|
|
45
|
+
return any(name == p or name.startswith(p + "-") for p in _REASONING_PREFIXES)
|
|
46
|
+
|
|
37
47
|
|
|
38
48
|
class KeyGateError(RuntimeError):
|
|
39
49
|
"""Raised when a provider key file violates the 0600 contract."""
|
|
@@ -189,15 +199,21 @@ class OpenAIClient(ExternalAIClient):
|
|
|
189
199
|
|
|
190
200
|
def ask(self, system_prompt: str, user_prompt: str, max_tokens: int = 1024) -> CouncilResponse:
|
|
191
201
|
t0 = time.monotonic()
|
|
202
|
+
kwargs: dict[str, object] = {"model": self.model}
|
|
203
|
+
if _is_reasoning_model(self.model):
|
|
204
|
+
# o1/o3/o4 reasoning models reject `max_tokens` and `system` role.
|
|
205
|
+
kwargs["max_completion_tokens"] = max_tokens
|
|
206
|
+
kwargs["messages"] = [
|
|
207
|
+
{"role": "user", "content": f"{system_prompt}\n\n---\n\n{user_prompt}"},
|
|
208
|
+
]
|
|
209
|
+
else:
|
|
210
|
+
kwargs["max_tokens"] = max_tokens
|
|
211
|
+
kwargs["messages"] = [
|
|
212
|
+
{"role": "system", "content": system_prompt},
|
|
213
|
+
{"role": "user", "content": user_prompt},
|
|
214
|
+
]
|
|
192
215
|
try:
|
|
193
|
-
response = self._client.chat.completions.create(
|
|
194
|
-
model=self.model,
|
|
195
|
-
max_tokens=max_tokens,
|
|
196
|
-
messages=[
|
|
197
|
-
{"role": "system", "content": system_prompt},
|
|
198
|
-
{"role": "user", "content": user_prompt},
|
|
199
|
-
],
|
|
200
|
-
)
|
|
216
|
+
response = self._client.chat.completions.create(**kwargs)
|
|
201
217
|
except Exception as exc: # noqa: BLE001 - normalise all SDK errors
|
|
202
218
|
return CouncilResponse(
|
|
203
219
|
provider=self.name, model=self.model, text="",
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# One-off archive — 2026-05
|
|
2
|
+
|
|
3
|
+
> Archived per **Phase 0a.2** of `agents/roadmaps/road-to-rule-hardening.md`.
|
|
4
|
+
> Each script here was a single-purpose AI-council probe or measurement
|
|
5
|
+
> tied to a specific phase of `road-to-structural-optimization.md` (now
|
|
6
|
+
> archived) or `road-to-rule-hardening.md`. The session output lives
|
|
7
|
+
> under `agents/council-sessions/` (durable evidence) and the linter
|
|
8
|
+
> `scripts/check_one_off_location.py` enforces that no new
|
|
9
|
+
> `_one_off_*.py` lands outside this folder.
|
|
10
|
+
|
|
11
|
+
## Going forward — use the CLI, not new one-offs
|
|
12
|
+
|
|
13
|
+
> **Canonical pattern (Phase 6.7+):** new council runs go through
|
|
14
|
+
> `./agent-config council:{estimate,run,render}`. The CLI handles
|
|
15
|
+
> bundling, redaction, the cost gate, the `0600` key contract, the
|
|
16
|
+
> `enabled` check, and session persistence — every concern these
|
|
17
|
+
> archived one-offs reimplemented inline.
|
|
18
|
+
>
|
|
19
|
+
> ```bash
|
|
20
|
+
> ./agent-config council:estimate <question.md>
|
|
21
|
+
> ./agent-config council:run <question.md> \
|
|
22
|
+
> --output agents/council-sessions/<UTC-ts>.json --confirm
|
|
23
|
+
> ./agent-config council:render agents/council-sessions/<UTC-ts>.json
|
|
24
|
+
> ```
|
|
25
|
+
>
|
|
26
|
+
> Wire-level access (`scripts.ai_council.orchestrator`,
|
|
27
|
+
> `scripts.ai_council.bundler`) is still public for tests and library
|
|
28
|
+
> use, but writing a new `_one_off_*.py` purely to fan out to the
|
|
29
|
+
> council members is **not** the path. The scripts below are kept as
|
|
30
|
+
> historical evidence of the runs that produced specific roadmap
|
|
31
|
+
> decisions; they are not a template for new work.
|
|
32
|
+
|
|
33
|
+
## Lifecycle rule (uniform — Phase 0.2 of context-layer-maturity)
|
|
34
|
+
|
|
35
|
+
> A one-off is **archived**, never deleted. The session manifest under
|
|
36
|
+
> `agents/council-sessions/` is the audit trail; the script itself is
|
|
37
|
+
> kept here so a future contributor can re-read intent, re-run a probe
|
|
38
|
+
> on a future branch, or extract a reusable helper.
|
|
39
|
+
|
|
40
|
+
## Inventory
|
|
41
|
+
|
|
42
|
+
| Script | Roadmap / Phase | Council session id |
|
|
43
|
+
|---|---|---|
|
|
44
|
+
| `_one_off_2a4_acceptance.py` | structural-optimization 2A.4 | various 2A sessions |
|
|
45
|
+
| `_one_off_context_layer_v1_estimate.py` | context-layer-maturity v1 cost estimate | `2026-05-03T17-56-21Z` |
|
|
46
|
+
| `_one_off_context_layer_v1_review.py` | context-layer-maturity v1 review | `2026-05-03T17-56-21Z` |
|
|
47
|
+
| `_one_off_followups_review.py` | road-to-1-16-followups review | session under `agents/council-sessions/` |
|
|
48
|
+
| `_one_off_nondestructive_inline_audit.py` | non-destructive-by-default audit | session under `agents/council-sessions/` |
|
|
49
|
+
| `_one_off_phase4_dispatch_latency.py` | structural-optimization 4.3.1 cluster latency benchmark | local benchmark, no council |
|
|
50
|
+
| `_one_off_phase6_trigger_jaccard.py` | structural-optimization Phase 6 trigger overlap | local measurement |
|
|
51
|
+
| `_one_off_phase_2a_budget_rebalance.py` | structural-optimization 2A budget rebalance | `2026-05-03T*` |
|
|
52
|
+
| `_one_off_phase_2a_post_revert.py` | structural-optimization 2A post-revert | `2026-05-03T*` |
|
|
53
|
+
| `_one_off_rebalancing_audit.py` | rebalancing roadmap audit | session under `agents/council-sessions/` |
|
|
54
|
+
| `_one_off_roundtrip.py` | council client roundtrip smoke test | local smoke test |
|
|
55
|
+
| `_one_off_rule_hardening_v1.py` | rule-hardening v1 review | `2026-05-03T19-16-25Z` |
|
|
56
|
+
| `_one_off_structural_open_questions.py` | structural-optimization open questions | session under `agents/council-sessions/` |
|
|
57
|
+
| `_one_off_structural_optimization.py` | structural-optimization initial review | session under `agents/council-sessions/` |
|
|
58
|
+
| `_one_off_structural_v3_gaps.py` | structural-optimization v3 gap audit | session under `agents/council-sessions/` |
|
|
59
|
+
| `_one_off_structural_v3_review.py` | structural-optimization v3 review | session under `agents/council-sessions/` |
|
|
60
|
+
|
|
61
|
+
## Re-running an archived script
|
|
62
|
+
|
|
63
|
+
Imports may have shifted (e.g. `scripts.ai_council.*`). If a probe
|
|
64
|
+
needs to be re-run against a current branch, copy it back to its
|
|
65
|
+
original location, fix imports, run, then move the working copy
|
|
66
|
+
back here. Do **not** edit in place — keep the archive immutable
|
|
67
|
+
beyond cosmetic README updates.
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""Council audit of Budget-v2 result (Phase 4.5 of road-to-context-layer-maturity).
|
|
2
|
+
|
|
3
|
+
Phase 4 of road-to-context-layer-maturity selected two 4d-trim paths
|
|
4
|
+
(`direct-answers`, `no-cheap-questions`) from a fixed option set
|
|
5
|
+
documented in agents/contexts/budget-v2-matrix.md and shipped them.
|
|
6
|
+
Exit-gate actuals (run 2026-05-04): total 44,928 / 49,000 chars
|
|
7
|
+
(91.7 %, 4,072 chars headroom) — ≥ 4,000 headroom goal hit. Top-3
|
|
8
|
+
sum unchanged. Safety-floor rules untouched.
|
|
9
|
+
|
|
10
|
+
Phase 4.5 requires a council audit before archival: confirm the
|
|
11
|
+
trim choices were sound, no semantic drift introduced, no better
|
|
12
|
+
path missed inside the Phase 4 inputs gate.
|
|
13
|
+
|
|
14
|
+
Invocation:
|
|
15
|
+
.venv/bin/python -m scripts.ai_council.one_off_archive.2026-05._one_off_budget_v2_audit
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import sys
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
from scripts.ai_council.bundler import bundle_files
|
|
23
|
+
from scripts.ai_council.clients import (
|
|
24
|
+
AnthropicClient,
|
|
25
|
+
OpenAIClient,
|
|
26
|
+
load_anthropic_key,
|
|
27
|
+
load_openai_key,
|
|
28
|
+
)
|
|
29
|
+
from scripts.ai_council.orchestrator import (
|
|
30
|
+
CostBudget,
|
|
31
|
+
CouncilQuestion,
|
|
32
|
+
consult,
|
|
33
|
+
estimate,
|
|
34
|
+
)
|
|
35
|
+
from scripts.ai_council.pricing import estimate_cost, load_prices
|
|
36
|
+
from scripts.ai_council.project_context import detect_project_context
|
|
37
|
+
from scripts.ai_council.session import SessionManifest, save as save_session
|
|
38
|
+
|
|
39
|
+
REPO_ROOT = Path(__file__).resolve().parents[4]
|
|
40
|
+
ARTEFACTS = [
|
|
41
|
+
REPO_ROOT / "docs/contracts/load-context-budget-model.md",
|
|
42
|
+
REPO_ROOT / "agents/contexts/budget-v2-matrix.md",
|
|
43
|
+
REPO_ROOT / ".agent-src.uncompressed/rules/direct-answers.md",
|
|
44
|
+
REPO_ROOT / ".agent-src.uncompressed/rules/no-cheap-questions.md",
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
ORIGINAL_ASK = (
|
|
48
|
+
"Phase 4 of road-to-context-layer-maturity trimmed two always-rules "
|
|
49
|
+
"(direct-answers, no-cheap-questions) under the locked Model (b) "
|
|
50
|
+
"literal budget contract, hitting the ≥ 4,000-chars headroom goal "
|
|
51
|
+
"(actual: 4,072). Council task: audit the trim choices for "
|
|
52
|
+
"soundness and semantic drift before roadmap archival."
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
REVIEW_PROMPT = """\
|
|
56
|
+
# Council Audit — Budget-v2 Trim Result (Phase 4.5)
|
|
57
|
+
|
|
58
|
+
## Context
|
|
59
|
+
|
|
60
|
+
Phase 4 selected two 4d-trim paths from a fixed option set documented
|
|
61
|
+
in `budget-v2-matrix.md`. The matrix evaluated 4a (demote→auto), 4b
|
|
62
|
+
(merge), 4c (shared-context, locked at 3a Model (b) literal — no-op),
|
|
63
|
+
and 4d (compress prose) for every touchable always-rule. Safety-floor
|
|
64
|
+
rules (scope-control, non-destructive-by-default, commit-policy,
|
|
65
|
+
agent-authority) were untouchable. Outcome-untested rules were
|
|
66
|
+
restricted to 4d only per the Phase 4.0 inputs gate.
|
|
67
|
+
|
|
68
|
+
## Selected paths and result
|
|
69
|
+
|
|
70
|
+
- **4d on `direct-answers`** — emoji-scope subsection trimmed,
|
|
71
|
+
failure-mode collapsed to pointer. Δ ext: 4,098 → 3,987 (−111).
|
|
72
|
+
- **4d on `no-cheap-questions`** — "What counts as cheap" subsection
|
|
73
|
+
collapsed to pointer at `asking-and-brevity-examples.md`. Δ ext:
|
|
74
|
+
4,257 → 3,933 (−324).
|
|
75
|
+
|
|
76
|
+
Combined: −435 chars · headroom 3,637 → 4,072 (+435) · top-3 sum
|
|
77
|
+
unchanged · safety-floor rules untouched.
|
|
78
|
+
|
|
79
|
+
## Audit questions (please address each)
|
|
80
|
+
|
|
81
|
+
1. **Trim soundness** — do the surviving Iron Laws in both rules still
|
|
82
|
+
carry the rule's purpose, or did the prose trim sacrifice precision?
|
|
83
|
+
Cite the specific subsection if you find drift.
|
|
84
|
+
|
|
85
|
+
2. **Path selection** — was 4d the right choice for these two rules
|
|
86
|
+
given the matrix? Or should one of the deferred paths (4a, 4b)
|
|
87
|
+
have been picked despite the matrix verdict?
|
|
88
|
+
|
|
89
|
+
3. **Missed leverage** — inside the Phase 4 inputs gate (4d only on
|
|
90
|
+
outcome-untested rules; safety-floor untouchable), is there a
|
|
91
|
+
higher-leverage 4d target the matrix missed?
|
|
92
|
+
|
|
93
|
+
4. **Headroom durability** — 4,072 chars is +72 over the 4,000 goal.
|
|
94
|
+
Is this margin stable against expected near-term rule edits, or
|
|
95
|
+
should Phase 5 be tightened to defend it?
|
|
96
|
+
|
|
97
|
+
## Output Contract (STRICT)
|
|
98
|
+
|
|
99
|
+
```
|
|
100
|
+
### Verdict
|
|
101
|
+
**Trim choices sound:** <YES — archive · NO — escalate>
|
|
102
|
+
**One-sentence rationale:** <≤ 30 words>
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
```
|
|
106
|
+
### Per-question findings (1–4 above)
|
|
107
|
+
1. <≤ 2 sentences>
|
|
108
|
+
2. <≤ 2 sentences>
|
|
109
|
+
3. <≤ 2 sentences>
|
|
110
|
+
4. <≤ 2 sentences>
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
```
|
|
114
|
+
### Risk note
|
|
115
|
+
**Single biggest residual risk:** <one sentence>
|
|
116
|
+
**Mitigation (if any):** <one sentence or NONE>
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Be decisive — total response ≤ 800 words. Artefacts follow verbatim.
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def main() -> int:
|
|
124
|
+
anthropic = AnthropicClient(api_key=load_anthropic_key(), model="claude-sonnet-4-5")
|
|
125
|
+
openai = OpenAIClient(api_key=load_openai_key(), model="gpt-4o")
|
|
126
|
+
members = [anthropic, openai]
|
|
127
|
+
|
|
128
|
+
context = bundle_files(ARTEFACTS)
|
|
129
|
+
project = detect_project_context(REPO_ROOT)
|
|
130
|
+
table = load_prices()
|
|
131
|
+
|
|
132
|
+
user_prompt = REVIEW_PROMPT + "\n\n---\n\n" + context.text
|
|
133
|
+
|
|
134
|
+
question = CouncilQuestion(mode="files", user_prompt=user_prompt, max_tokens=2048)
|
|
135
|
+
|
|
136
|
+
estimates = estimate(question, members, table, project=project, original_ask=ORIGINAL_ASK)
|
|
137
|
+
print("=== ESTIMATE (single round) ===")
|
|
138
|
+
total_est = 0.0
|
|
139
|
+
for c, e in zip(members, estimates):
|
|
140
|
+
print(f" {c.name}/{c.model}: ~{e.input_tokens} in + {e.output_tokens} out = ${e.total_usd:.4f}")
|
|
141
|
+
total_est += e.total_usd
|
|
142
|
+
print(f" TOTAL per round (max): ${total_est:.4f}\n")
|
|
143
|
+
|
|
144
|
+
budget = CostBudget(
|
|
145
|
+
max_input_tokens=200_000,
|
|
146
|
+
max_output_tokens=80_000,
|
|
147
|
+
max_calls=20,
|
|
148
|
+
max_total_usd=2.50,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
rounds_collected: list[list] = []
|
|
152
|
+
|
|
153
|
+
def _on_round_complete(round_idx: int, round_responses) -> None:
|
|
154
|
+
rounds_collected.append(list(round_responses))
|
|
155
|
+
print(f"=== ROUND {round_idx + 1} COMPLETE ===")
|
|
156
|
+
for r in round_responses:
|
|
157
|
+
if r.error:
|
|
158
|
+
print(f" [error] {r.provider}/{r.model}: {r.error}")
|
|
159
|
+
continue
|
|
160
|
+
actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
|
|
161
|
+
print(f" [done] {r.provider}/{r.model}: {r.input_tokens} in / "
|
|
162
|
+
f"{r.output_tokens} out · {r.latency_ms} ms · ${actual.total_usd:.4f}")
|
|
163
|
+
print()
|
|
164
|
+
|
|
165
|
+
print("=== CONSULT (1 round, Phase 4.5 Budget-v2 audit) ===")
|
|
166
|
+
consult(
|
|
167
|
+
members, question, budget,
|
|
168
|
+
rounds=1,
|
|
169
|
+
on_round_complete=_on_round_complete,
|
|
170
|
+
table=table, project=project, original_ask=ORIGINAL_ASK,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if not rounds_collected:
|
|
174
|
+
print("[error] no rounds completed", file=sys.stderr)
|
|
175
|
+
return 1
|
|
176
|
+
|
|
177
|
+
actual_total = 0.0
|
|
178
|
+
for round_responses in rounds_collected:
|
|
179
|
+
for r in round_responses:
|
|
180
|
+
if r.error:
|
|
181
|
+
continue
|
|
182
|
+
actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
|
|
183
|
+
actual_total += actual.total_usd
|
|
184
|
+
print(f"=== TOTAL ACTUAL: ${actual_total:.4f} ===")
|
|
185
|
+
|
|
186
|
+
final_round = rounds_collected[-1]
|
|
187
|
+
if not [r for r in final_round if not r.error]:
|
|
188
|
+
return 1
|
|
189
|
+
|
|
190
|
+
manifest = SessionManifest(
|
|
191
|
+
mode="files",
|
|
192
|
+
artefact="agents/roadmaps/road-to-context-layer-maturity.md",
|
|
193
|
+
original_ask=ORIGINAL_ASK,
|
|
194
|
+
members=[f"{r.provider}/{r.model}" for r in final_round],
|
|
195
|
+
rounds=len(rounds_collected),
|
|
196
|
+
cost_usd_estimated=total_est,
|
|
197
|
+
cost_usd_actual=actual_total,
|
|
198
|
+
extra={"purpose": "Phase 4.5 Budget-v2 trim-result audit"},
|
|
199
|
+
)
|
|
200
|
+
session_dir = save_session(manifest=manifest, responses=rounds_collected)
|
|
201
|
+
print(f"[saved] {session_dir.relative_to(REPO_ROOT)}/")
|
|
202
|
+
return 1 if any(r.error for round_r in rounds_collected for r in round_r) else 0
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
if __name__ == "__main__":
|
|
206
|
+
raise SystemExit(main())
|
package/scripts/ai_council/{_one_off_roundtrip.py → one_off_archive/2026-05/_one_off_roundtrip.py}
RENAMED
|
@@ -1,14 +1,19 @@
|
|
|
1
|
-
"""One-off Phase-1 round-trip runner.
|
|
1
|
+
"""One-off Phase-1 round-trip runner — HISTORICAL ARCHIVE.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
the capture-only fence on `road-to-ai-council.md` Phase 2+ and the
|
|
5
|
-
end-to-end verification on `road-to-council-modes.md` Phase 2a.
|
|
3
|
+
Going forward, council runs go through the CLI:
|
|
6
4
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
5
|
+
./agent-config council:estimate <question.md>
|
|
6
|
+
./agent-config council:run <question.md> \
|
|
7
|
+
--output agents/council-sessions/<UTC-ts>.json --confirm
|
|
8
|
+
./agent-config council:render agents/council-sessions/<UTC-ts>.json
|
|
10
9
|
|
|
11
|
-
|
|
10
|
+
This script predates `scripts/council_cli.py` (Phase 6.7) and is kept
|
|
11
|
+
only as the evidence artefact that lifted the capture-only fence on
|
|
12
|
+
`road-to-ai-council.md` Phase 2+ and the end-to-end verification on
|
|
13
|
+
`road-to-council-modes.md` Phase 2a. Do **not** copy it as a template
|
|
14
|
+
for new one-offs — write a question file and use the CLI instead.
|
|
15
|
+
|
|
16
|
+
Invocation (historical):
|
|
12
17
|
.venv/bin/python -m scripts.ai_council._one_off_roundtrip
|
|
13
18
|
"""
|
|
14
19
|
from __future__ import annotations
|