@event4u/agent-config 1.16.0 → 1.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/{agents-audit.md → agents/audit.md} +4 -3
- package/.agent-src/commands/{agents-cleanup.md → agents/cleanup.md} +12 -6
- package/.agent-src/commands/{agents-prepare.md → agents/prepare.md} +4 -3
- package/.agent-src/commands/agents.md +46 -0
- package/.agent-src/commands/{chat-history-checkpoint.md → chat-history/checkpoint.md} +4 -4
- package/.agent-src/commands/{chat-history-clear.md → chat-history/clear.md} +4 -4
- package/.agent-src/commands/{chat-history-resume.md → chat-history/resume.md} +4 -4
- package/.agent-src/commands/chat-history/show.md +107 -0
- package/.agent-src/commands/chat-history.md +33 -89
- package/.agent-src/commands/{commit-in-chunks.md → commit/in-chunks.md} +15 -13
- package/.agent-src/commands/commit.md +22 -2
- package/.agent-src/commands/{context-create.md → context/create.md} +4 -3
- package/.agent-src/commands/{context-refactor.md → context/refactor.md} +4 -3
- package/.agent-src/commands/context.md +44 -0
- package/.agent-src/commands/{copilot-agents-init.md → copilot-agents/init.md} +4 -3
- package/.agent-src/commands/{copilot-agents-optimize.md → copilot-agents/optimize.md} +4 -3
- package/.agent-src/commands/copilot-agents.md +44 -0
- package/.agent-src/commands/council/default.md +221 -0
- package/.agent-src/commands/{council-design.md → council/design.md} +6 -5
- package/.agent-src/commands/{council-optimize.md → council/optimize.md} +7 -6
- package/.agent-src/commands/{council-pr.md → council/pr.md} +6 -5
- package/.agent-src/commands/council.md +47 -212
- package/.agent-src/commands/{create-pr-description.md → create-pr/description-only.md} +4 -2
- package/.agent-src/commands/create-pr.md +26 -5
- package/.agent-src/commands/{feature-dev.md → feature/dev.md} +5 -10
- package/.agent-src/commands/{feature-explore.md → feature/explore.md} +4 -8
- package/.agent-src/commands/{feature-plan.md → feature/plan.md} +4 -8
- package/.agent-src/commands/{feature-refactor.md → feature/refactor.md} +4 -8
- package/.agent-src/commands/{feature-roadmap.md → feature/roadmap.md} +6 -10
- package/.agent-src/commands/feature.md +6 -12
- package/.agent-src/commands/{fix-ci.md → fix/ci.md} +4 -8
- package/.agent-src/commands/{fix-portability.md → fix/portability.md} +4 -8
- package/.agent-src/commands/{fix-pr-bot-comments.md → fix/pr-bots.md} +4 -8
- package/.agent-src/commands/{fix-pr-developer-comments.md → fix/pr-developers.md} +4 -8
- package/.agent-src/commands/{fix-pr-comments.md → fix/pr.md} +7 -11
- package/.agent-src/commands/{fix-references.md → fix/refs.md} +4 -8
- package/.agent-src/commands/{fix-seeder.md → fix/seeder.md} +4 -8
- package/.agent-src/commands/fix.md +7 -13
- package/.agent-src/commands/{do-and-judge.md → judge/on-diff.md} +4 -3
- package/.agent-src/commands/judge/solo.md +90 -0
- package/.agent-src/commands/{do-in-steps.md → judge/steps.md} +4 -3
- package/.agent-src/commands/judge.md +35 -70
- package/.agent-src/commands/{memory-add.md → memory/add.md} +4 -3
- package/.agent-src/commands/{memory-full.md → memory/load.md} +4 -3
- package/.agent-src/commands/{memory-promote.md → memory/promote.md} +4 -3
- package/.agent-src/commands/{propose-memory.md → memory/propose.md} +4 -3
- package/.agent-src/commands/memory.md +48 -0
- package/.agent-src/commands/{module-create.md → module/create.md} +4 -3
- package/.agent-src/commands/{module-explore.md → module/explore.md} +4 -3
- package/.agent-src/commands/module.md +44 -0
- package/.agent-src/commands/{optimize-agents.md → optimize/agents.md} +4 -8
- package/.agent-src/commands/{optimize-augmentignore.md → optimize/augmentignore.md} +4 -9
- package/.agent-src/commands/{optimize-rtk-filters.md → optimize/rtk.md} +4 -8
- package/.agent-src/commands/{optimize-skills.md → optimize/skills.md} +4 -8
- package/.agent-src/commands/optimize.md +4 -10
- package/.agent-src/commands/{override-create.md → override/create.md} +4 -3
- package/.agent-src/commands/{override-manage.md → override/manage.md} +4 -3
- package/.agent-src/commands/override.md +44 -0
- package/.agent-src/commands/{roadmap-create.md → roadmap/create.md} +4 -3
- package/.agent-src/commands/{roadmap-execute.md → roadmap/execute.md} +4 -3
- package/.agent-src/commands/roadmap.md +44 -0
- package/.agent-src/commands/{tests-create.md → tests/create.md} +4 -3
- package/.agent-src/commands/{tests-execute.md → tests/execute.md} +4 -3
- package/.agent-src/commands/tests.md +44 -0
- package/.agent-src/contexts/communication/rules-auto/artifact-engagement-recording-mechanics.md +72 -0
- package/.agent-src/contexts/communication/rules-auto/augment-portability-mechanics.md +79 -0
- package/.agent-src/contexts/communication/rules-auto/augment-source-of-truth-mechanics.md +98 -0
- package/.agent-src/contexts/communication/rules-auto/cli-output-handling-mechanics.md +87 -0
- package/.agent-src/contexts/communication/rules-auto/command-suggestion-policy-mechanics.md +62 -0
- package/.agent-src/contexts/communication/rules-auto/docs-sync-mechanics.md +78 -0
- package/.agent-src/contexts/communication/rules-auto/package-ci-checks-mechanics.md +85 -0
- package/.agent-src/contexts/communication/rules-auto/review-routing-awareness-mechanics.md +65 -0
- package/.agent-src/contexts/communication/rules-auto/roadmap-progress-sync-mechanics.md +78 -0
- package/.agent-src/contexts/communication/rules-auto/skill-quality-mechanics.md +62 -0
- package/.agent-src/contexts/communication/rules-auto/slash-command-routing-policy-mechanics.md +55 -0
- package/.agent-src/contexts/communication/rules-auto/ui-audit-gate-mechanics.md +53 -0
- package/.agent-src/contexts/communication/rules-auto/user-interaction-mechanics.md +77 -0
- package/.agent-src/contexts/judges/no-consolidate-rationale.md +102 -0
- package/.agent-src/contexts/judges/persona-voice-rubric.md +140 -0
- package/.agent-src/rules/artifact-engagement-recording.md +13 -69
- package/.agent-src/rules/ask-when-uncertain.md +27 -42
- package/.agent-src/rules/augment-portability.md +15 -61
- package/.agent-src/rules/augment-source-of-truth.md +27 -93
- package/.agent-src/rules/cli-output-handling.md +10 -76
- package/.agent-src/rules/command-suggestion-policy.md +18 -59
- package/.agent-src/rules/commit-conventions.md +17 -14
- package/.agent-src/rules/context-hygiene.md +6 -0
- package/.agent-src/rules/direct-answers.md +35 -59
- package/.agent-src/rules/docker-commands.md +5 -5
- package/.agent-src/rules/docs-sync.md +15 -69
- package/.agent-src/rules/language-and-tone.md +48 -72
- package/.agent-src/rules/missing-tool-handling.md +28 -22
- package/.agent-src/rules/no-cheap-questions.md +39 -53
- package/.agent-src/rules/no-roadmap-references.md +73 -0
- package/.agent-src/rules/onboarding-gate.md +7 -0
- package/.agent-src/rules/package-ci-checks.md +21 -61
- package/.agent-src/rules/preservation-guard.md +64 -29
- package/.agent-src/rules/review-routing-awareness.md +24 -43
- package/.agent-src/rules/roadmap-progress-sync.md +31 -65
- package/.agent-src/rules/rule-type-governance.md +28 -0
- package/.agent-src/rules/security-sensitive-stop.md +8 -8
- package/.agent-src/rules/skill-quality.md +16 -48
- package/.agent-src/rules/slash-command-routing-policy.md +7 -4
- package/.agent-src/rules/think-before-action.md +52 -42
- package/.agent-src/rules/tool-safety.md +19 -16
- package/.agent-src/rules/ui-audit-gate.md +24 -38
- package/.agent-src/rules/user-interaction.md +13 -68
- package/.agent-src/skills/ai-council/SKILL.md +2 -0
- package/.agent-src/skills/api-testing/SKILL.md +1 -1
- package/.agent-src/skills/check-refs/SKILL.md +59 -40
- package/.agent-src/skills/conventional-commits-writing/SKILL.md +86 -28
- package/.agent-src/skills/copilot-agents-optimization/SKILL.md +5 -5
- package/.agent-src/skills/developer-like-execution/SKILL.md +4 -4
- package/.agent-src/skills/finishing-a-development-branch/SKILL.md +101 -65
- package/.agent-src/skills/flux/SKILL.md +30 -10
- package/.agent-src/skills/github-ci/SKILL.md +2 -2
- package/.agent-src/skills/judge-code-quality/SKILL.md +7 -8
- package/.agent-src/skills/judge-security-auditor/SKILL.md +4 -5
- package/.agent-src/skills/judge-test-coverage/SKILL.md +3 -4
- package/.agent-src/skills/lint-skills/SKILL.md +57 -39
- package/.agent-src/skills/md-language-check/SKILL.md +61 -39
- package/.agent-src/skills/override-management/SKILL.md +5 -5
- package/.agent-src/skills/quality-tools/SKILL.md +2 -2
- package/.agent-src/skills/react-shadcn-ui/SKILL.md +116 -43
- package/.agent-src/skills/readme-reviewer/SKILL.md +30 -29
- package/.agent-src/skills/readme-writing/SKILL.md +78 -53
- package/.agent-src/skills/readme-writing-package/SKILL.md +50 -47
- package/.agent-src/skills/receiving-code-review/SKILL.md +52 -47
- package/.agent-src/skills/refine-prompt/SKILL.md +0 -1
- package/.agent-src/skills/requesting-code-review/SKILL.md +35 -30
- package/.agent-src/skills/security/SKILL.md +7 -2
- package/.agent-src/skills/security-audit/SKILL.md +7 -3
- package/.agent-src/skills/systematic-debugging/SKILL.md +68 -60
- package/.agent-src/skills/test-driven-development/SKILL.md +59 -57
- package/.agent-src/skills/test-performance/SKILL.md +0 -1
- package/.agent-src/skills/traefik/SKILL.md +4 -4
- package/.agent-src/skills/verify-completion-evidence/SKILL.md +28 -26
- package/.agent-src/templates/roadmaps.md +4 -0
- package/.claude-plugin/marketplace.json +22 -11
- package/AGENTS.md +2 -2
- package/CHANGELOG.md +125 -1
- package/README.md +18 -17
- package/docs/architecture.md +4 -6
- package/docs/catalog.md +67 -39
- package/docs/contracts/STABILITY.md +13 -7
- package/docs/contracts/adr-chat-history-split.md +1 -3
- package/docs/contracts/adr-command-suggestion.md +0 -2
- package/docs/contracts/adr-implement-ticket-runtime.md +1 -2
- package/docs/contracts/adr-product-ui-track.md +3 -6
- package/docs/contracts/adr-prompt-driven-execution.md +3 -4
- package/docs/contracts/agent-memory-contract.md +6 -11
- package/docs/contracts/artifact-engagement-flow.md +6 -9
- package/docs/contracts/command-clusters.md +56 -46
- package/docs/contracts/command-suggestion-flow.md +1 -3
- package/docs/contracts/context-paths.md +99 -0
- package/docs/contracts/file-ownership-matrix.json +6722 -0
- package/docs/contracts/file-ownership-matrix.md +134 -0
- package/docs/contracts/implement-ticket-flow.md +6 -9
- package/docs/contracts/linear-ai-rules-inclusion.md +0 -1
- package/docs/contracts/linear-ai-three-layers.md +0 -2
- package/docs/contracts/load-context-budget-model.md +258 -0
- package/docs/contracts/load-context-schema.md +21 -3
- package/docs/contracts/roadmap-complexity-standard.md +137 -0
- package/docs/contracts/rule-interactions.md +0 -1
- package/docs/contracts/rule-priority-hierarchy.md +1 -1
- package/docs/contracts/ui-track-flow.md +7 -17
- package/docs/customization.md +2 -0
- package/docs/getting-started.md +5 -4
- package/docs/guidelines/agent-infra/ask-when-uncertain-demos.md +134 -0
- package/docs/guidelines/agent-infra/asking-and-brevity-examples.md +100 -0
- package/docs/guidelines/agent-infra/direct-answers-demos.md +145 -0
- package/docs/guidelines/agent-infra/verify-before-complete-demos.md +128 -0
- package/package.json +1 -1
- package/scripts/_phase2_shim_helper.py +109 -0
- package/scripts/agent-config +30 -0
- package/scripts/ai_council/one_off_archive/2026-05/README.md +45 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_2a4_acceptance.py +208 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_budget_v2_audit.py +206 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_context_layer_v1_estimate.py +67 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_context_layer_v1_review.py +292 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_followups_review.py +259 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_nondestructive_inline_audit.py +209 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase4_dispatch_latency.py +108 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase6_trigger_jaccard.py +92 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase_2a_budget_rebalance.py +257 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase_2a_post_revert.py +197 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_rule_hardening_v1.py +251 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_structural_open_questions.py +232 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_structural_optimization.py +144 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_structural_v3_gaps.py +252 -0
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_structural_v3_review.py +240 -0
- package/scripts/build_rule_trigger_matrix.py +360 -0
- package/scripts/check_always_budget.py +402 -45
- package/scripts/check_cluster_patterns.py +159 -0
- package/scripts/check_command_count_messaging.py +14 -7
- package/scripts/check_context_paths.py +201 -0
- package/scripts/check_no_roadmap_refs.py +155 -0
- package/scripts/check_one_off_location.py +81 -0
- package/scripts/check_phase_coupling.py +148 -0
- package/scripts/check_portability.py +2 -0
- package/scripts/check_references.py +35 -2
- package/scripts/check_safety_floor_untouched.py +125 -0
- package/scripts/command_suggester/loader.py +4 -1
- package/scripts/compress.py +64 -15
- package/scripts/context_hygiene_hook.py +173 -0
- package/scripts/generate_index.py +6 -2
- package/scripts/generate_ownership_matrix.py +323 -0
- package/scripts/hooks/augment-context-hygiene.sh +55 -0
- package/scripts/hooks/augment-onboarding-gate.sh +55 -0
- package/scripts/hooks/augment-roadmap-progress.sh +57 -0
- package/scripts/install.py +105 -45
- package/scripts/lint_examples.py +98 -0
- package/scripts/lint_no_new_atomic_commands.py +12 -11
- package/scripts/lint_roadmap_complexity.py +127 -0
- package/scripts/onboarding_gate_hook.py +137 -0
- package/scripts/requirements-evals.txt +1 -0
- package/scripts/roadmap_progress_hook.py +159 -0
- package/scripts/schemas/command.schema.json +4 -3
- package/scripts/schemas/rule.schema.json +5 -0
- package/scripts/skill_linter.py +1 -0
- package/scripts/sync_agent_settings.py +25 -2
- package/scripts/update_counts.py +7 -0
- /package/scripts/ai_council/{_one_off_rebalancing_audit.py → one_off_archive/2026-05/_one_off_rebalancing_audit.py} +0 -0
- /package/scripts/ai_council/{_one_off_roundtrip.py → one_off_archive/2026-05/_one_off_roundtrip.py} +0 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"""Council inline-content audit for `non-destructive-by-default` (Phase 1.1.3).
|
|
2
|
+
|
|
3
|
+
Roadmap `road-to-1-16-followups` Phase 1.1.3 requires a council audit
|
|
4
|
+
before merging the `load_context:` rollout for the safety-floor rule.
|
|
5
|
+
Both reviewers must return PASS confirming that:
|
|
6
|
+
|
|
7
|
+
- The Hard-Floor trigger table stays inline.
|
|
8
|
+
- The Iron Law caps block stays inline.
|
|
9
|
+
- The Cloud Behavior section stays inline.
|
|
10
|
+
- No obligation moved to `destructive-mechanics.md` that should
|
|
11
|
+
have stayed in the rule.
|
|
12
|
+
|
|
13
|
+
This audit is post-rollout (the rollout already shipped via Phase 7.4
|
|
14
|
+
of `road-to-structural-optimization`); it certifies the *current* state
|
|
15
|
+
of `main` against the Phase 1.1.3 contract.
|
|
16
|
+
|
|
17
|
+
Invocation:
|
|
18
|
+
.venv/bin/python -m scripts.ai_council._one_off_nondestructive_inline_audit
|
|
19
|
+
"""
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import sys
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
|
|
25
|
+
from scripts.ai_council.bundler import bundle_files
|
|
26
|
+
from scripts.ai_council.clients import (
|
|
27
|
+
AnthropicClient,
|
|
28
|
+
OpenAIClient,
|
|
29
|
+
load_anthropic_key,
|
|
30
|
+
load_openai_key,
|
|
31
|
+
)
|
|
32
|
+
from scripts.ai_council.orchestrator import (
|
|
33
|
+
CostBudget,
|
|
34
|
+
CouncilQuestion,
|
|
35
|
+
consult,
|
|
36
|
+
estimate,
|
|
37
|
+
)
|
|
38
|
+
from scripts.ai_council.pricing import estimate_cost, load_prices
|
|
39
|
+
from scripts.ai_council.project_context import detect_project_context
|
|
40
|
+
from scripts.ai_council.session import SessionManifest, save as save_session
|
|
41
|
+
|
|
42
|
+
REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
43
|
+
ARTEFACTS = [
|
|
44
|
+
REPO_ROOT / ".agent-src.uncompressed/rules/non-destructive-by-default.md",
|
|
45
|
+
REPO_ROOT / ".agent-src.uncompressed/contexts/authority/destructive-mechanics.md",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
ORIGINAL_ASK = (
|
|
49
|
+
"Phase 1.1.3 of road-to-1-16-followups requires a council audit on "
|
|
50
|
+
"the `non-destructive-by-default` rule after `load_context:` rollout. "
|
|
51
|
+
"Verify that Hard-Floor table, Iron Law caps block, and Cloud Behavior "
|
|
52
|
+
"section are still inline; flag any obligation that moved to context "
|
|
53
|
+
"and should not have."
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
REVIEW_PROMPT = """\
|
|
57
|
+
# Council Inline-Content Audit — non-destructive-by-default
|
|
58
|
+
|
|
59
|
+
## Context
|
|
60
|
+
|
|
61
|
+
The host agent rolled out `load_context:` to the `non-destructive-by-default` \
|
|
62
|
+
rule (universal safety floor). Failure-mode catalog and Bulk-deletions-WIP \
|
|
63
|
+
allowed/forbidden lists moved to `destructive-mechanics.md`. The roadmap \
|
|
64
|
+
contract requires that the load-bearing inline obligations stay in the rule.
|
|
65
|
+
|
|
66
|
+
You are auditing the **current state** of two files (rule + mechanics) to \
|
|
67
|
+
certify the rollout. This is not a re-design review.
|
|
68
|
+
|
|
69
|
+
## Required-inline checklist
|
|
70
|
+
|
|
71
|
+
1. **Hard-Floor trigger table** — six rows: production-branch merge, \
|
|
72
|
+
deploy/release, push to remote, production data/infra, whimsical or \
|
|
73
|
+
unscoped bulk deletion, commit containing bulk deletions or infra changes.
|
|
74
|
+
2. **Iron Law caps block** — three lines starting with \
|
|
75
|
+
`HARD FLOOR OVERRIDES EVERYTHING.`
|
|
76
|
+
3. **Cloud Behavior section** — two-sentence statement that the floor applies \
|
|
77
|
+
on Claude.ai Web, Skills API, and any cloud agent, with no cloud override.
|
|
78
|
+
4. **"Triggers require explicit user confirmation on this turn" clause** — \
|
|
79
|
+
the not-from-previous-turn, not-from-roadmap, not-from-standing-autonomy \
|
|
80
|
+
anchor.
|
|
81
|
+
|
|
82
|
+
## Output Contract (STRICT)
|
|
83
|
+
|
|
84
|
+
Produce exactly these blocks in order. Total response budget <= 800 words.
|
|
85
|
+
|
|
86
|
+
```
|
|
87
|
+
### Inline-content audit
|
|
88
|
+
|
|
89
|
+
**Hard-Floor trigger table inline:** <YES | NO — list missing rows>
|
|
90
|
+
**Iron Law caps block inline:** <YES | NO — quote what is there>
|
|
91
|
+
**Cloud Behavior section inline:** <YES | NO — quote what is there>
|
|
92
|
+
**On-this-turn confirmation clause inline:** <YES | NO — quote anchor>
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
```
|
|
96
|
+
### Misplaced-content audit
|
|
97
|
+
|
|
98
|
+
**Any obligation moved to mechanics that should be inline:** <NONE | list>
|
|
99
|
+
**Any failure mode duplicated in both files (drift risk):** <NONE | list>
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
### Final verdict
|
|
104
|
+
|
|
105
|
+
**Verdict:** <PASS | FAIL>
|
|
106
|
+
**If FAIL, single blocking issue:** <one sentence>
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Verdict definitions:
|
|
110
|
+
- **PASS** — every required-inline item is present in the rule; no obligation \
|
|
111
|
+
is misplaced. The rollout meets the Phase 1.1.3 contract.
|
|
112
|
+
- **FAIL** — at least one required-inline item is missing, weakened, or \
|
|
113
|
+
duplicated in a way that creates a normative drift surface.
|
|
114
|
+
|
|
115
|
+
The two artefacts follow this prompt verbatim.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def main() -> int:
|
|
120
|
+
anthropic = AnthropicClient(api_key=load_anthropic_key(), model="claude-sonnet-4-5")
|
|
121
|
+
openai = OpenAIClient(api_key=load_openai_key(), model="gpt-4o")
|
|
122
|
+
members = [anthropic, openai]
|
|
123
|
+
|
|
124
|
+
context = bundle_files(ARTEFACTS)
|
|
125
|
+
project = detect_project_context(REPO_ROOT)
|
|
126
|
+
table = load_prices()
|
|
127
|
+
|
|
128
|
+
user_prompt = REVIEW_PROMPT + "\n\n---\n\n" + context.text
|
|
129
|
+
|
|
130
|
+
question = CouncilQuestion(
|
|
131
|
+
mode="files",
|
|
132
|
+
user_prompt=user_prompt,
|
|
133
|
+
max_tokens=2048,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
estimates = estimate(
|
|
137
|
+
question, members, table, project=project, original_ask=ORIGINAL_ASK,
|
|
138
|
+
)
|
|
139
|
+
print("=== ESTIMATE (single round, max tokens) ===")
|
|
140
|
+
total_est = 0.0
|
|
141
|
+
for c, e in zip(members, estimates):
|
|
142
|
+
print(f" {c.name}/{c.model}: ~{e.input_tokens} in + {e.output_tokens} out = ${e.total_usd:.4f}")
|
|
143
|
+
total_est += e.total_usd
|
|
144
|
+
print(f" TOTAL per round (max): ${total_est:.4f}")
|
|
145
|
+
print()
|
|
146
|
+
|
|
147
|
+
budget = CostBudget(
|
|
148
|
+
max_input_tokens=200_000,
|
|
149
|
+
max_output_tokens=80_000,
|
|
150
|
+
max_calls=20,
|
|
151
|
+
max_total_usd=2.50,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
rounds_collected: list[list] = []
|
|
155
|
+
|
|
156
|
+
def _on_round_complete(round_idx: int, round_responses) -> None:
|
|
157
|
+
rounds_collected.append(list(round_responses))
|
|
158
|
+
print(f"=== ROUND {round_idx + 1} COMPLETE ===")
|
|
159
|
+
for r in round_responses:
|
|
160
|
+
if r.error:
|
|
161
|
+
print(f" [error] {r.provider}/{r.model}: {r.error}")
|
|
162
|
+
continue
|
|
163
|
+
actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
|
|
164
|
+
print(f" [done] {r.provider}/{r.model}: {r.input_tokens} in / "
|
|
165
|
+
f"{r.output_tokens} out · {r.latency_ms} ms · ${actual.total_usd:.4f}")
|
|
166
|
+
print()
|
|
167
|
+
|
|
168
|
+
print("=== CONSULT (1 round, Phase 1.1.3 inline-content audit) ===")
|
|
169
|
+
consult(
|
|
170
|
+
members, question, budget,
|
|
171
|
+
rounds=1,
|
|
172
|
+
on_round_complete=_on_round_complete,
|
|
173
|
+
table=table, project=project, original_ask=ORIGINAL_ASK,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
if not rounds_collected:
|
|
177
|
+
print("[error] no rounds completed", file=sys.stderr)
|
|
178
|
+
return 1
|
|
179
|
+
|
|
180
|
+
actual_total = 0.0
|
|
181
|
+
for round_responses in rounds_collected:
|
|
182
|
+
for r in round_responses:
|
|
183
|
+
if r.error:
|
|
184
|
+
continue
|
|
185
|
+
actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
|
|
186
|
+
actual_total += actual.total_usd
|
|
187
|
+
print(f"=== TOTAL ACTUAL: ${actual_total:.4f} ===")
|
|
188
|
+
|
|
189
|
+
final_round = rounds_collected[-1]
|
|
190
|
+
if not [r for r in final_round if not r.error]:
|
|
191
|
+
return 1
|
|
192
|
+
|
|
193
|
+
manifest = SessionManifest(
|
|
194
|
+
mode="files",
|
|
195
|
+
artefact=".agent-src.uncompressed/rules/non-destructive-by-default.md",
|
|
196
|
+
original_ask=ORIGINAL_ASK,
|
|
197
|
+
members=[f"{r.provider}/{r.model}" for r in final_round],
|
|
198
|
+
rounds=len(rounds_collected),
|
|
199
|
+
cost_usd_estimated=total_est,
|
|
200
|
+
cost_usd_actual=actual_total,
|
|
201
|
+
extra={"purpose": "Phase 1.1.3 inline-content audit on non-destructive-by-default"},
|
|
202
|
+
)
|
|
203
|
+
session_dir = save_session(manifest=manifest, responses=rounds_collected)
|
|
204
|
+
print(f"[saved] {session_dir.relative_to(REPO_ROOT)}/")
|
|
205
|
+
return 1 if any(r.error for round_r in rounds_collected for r in round_r) else 0
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
if __name__ == "__main__":
|
|
209
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Phase 4.3.1 — council cluster dispatch-latency benchmark.
|
|
3
|
+
|
|
4
|
+
Measures the wall-clock overhead of the cluster dispatch layer for the
|
|
5
|
+
`/council` family. Compares:
|
|
6
|
+
|
|
7
|
+
baseline: directly read council-pr.md / council-design.md (atomic shape)
|
|
8
|
+
cluster : read council.md (dispatcher) + parse table + read council-pr.md
|
|
9
|
+
/ council-design.md (cluster shape)
|
|
10
|
+
|
|
11
|
+
The dispatch layer in agent-config is a markdown parse, not a runtime
|
|
12
|
+
function, so this benchmarks the file-system + frontmatter + table-row
|
|
13
|
+
extraction cost. Threshold per roadmap § 4.3.1: ≤ +100ms wall-clock.
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import re
|
|
18
|
+
import statistics
|
|
19
|
+
import time
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
ROOT = Path(__file__).resolve().parent.parent
|
|
23
|
+
COMMANDS = ROOT / ".agent-src/commands"
|
|
24
|
+
N_ITER = 1000 # cold + warm; markdown is tiny so we run a lot of iterations
|
|
25
|
+
|
|
26
|
+
FRONTMATTER_RE = re.compile(r"^---\n(.*?)\n---\n", re.DOTALL)
|
|
27
|
+
TABLE_ROW_RE = re.compile(r"\|\s*`/council\s+([a-z-]+)`\s*\|\s*`([^`]+)`")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _read_atomic(target: str) -> str:
|
|
31
|
+
"""Baseline: directly read the routed file (atomic shape)."""
|
|
32
|
+
path = COMMANDS / f"council-{target}.md"
|
|
33
|
+
text = path.read_text(encoding="utf-8")
|
|
34
|
+
m = FRONTMATTER_RE.match(text)
|
|
35
|
+
if not m:
|
|
36
|
+
raise RuntimeError(f"no frontmatter in {path}")
|
|
37
|
+
return text
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _read_cluster(target: str) -> str:
|
|
41
|
+
"""Cluster: read dispatcher, parse routing table, then read routed file."""
|
|
42
|
+
dispatcher = (COMMANDS / "council.md").read_text(encoding="utf-8")
|
|
43
|
+
routes = dict(TABLE_ROW_RE.findall(dispatcher))
|
|
44
|
+
routed = routes.get(target)
|
|
45
|
+
if routed is None:
|
|
46
|
+
raise RuntimeError(f"no route for {target!r} in dispatcher")
|
|
47
|
+
text = (COMMANDS / routed).read_text(encoding="utf-8")
|
|
48
|
+
m = FRONTMATTER_RE.match(text)
|
|
49
|
+
if not m:
|
|
50
|
+
raise RuntimeError(f"no frontmatter in {routed}")
|
|
51
|
+
return text
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _bench(fn, target: str, n: int) -> list[float]:
|
|
55
|
+
samples: list[float] = []
|
|
56
|
+
for _ in range(n):
|
|
57
|
+
t0 = time.perf_counter()
|
|
58
|
+
fn(target)
|
|
59
|
+
samples.append((time.perf_counter() - t0) * 1000.0)
|
|
60
|
+
return samples
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _summary(name: str, samples: list[float]) -> None:
|
|
64
|
+
samples = sorted(samples)
|
|
65
|
+
p50 = statistics.median(samples)
|
|
66
|
+
p95 = samples[int(len(samples) * 0.95)]
|
|
67
|
+
p99 = samples[int(len(samples) * 0.99)]
|
|
68
|
+
mean = statistics.mean(samples)
|
|
69
|
+
print(f" {name:18s} mean={mean:6.3f}ms p50={p50:6.3f}ms p95={p95:6.3f}ms p99={p99:6.3f}ms")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def main() -> int:
|
|
73
|
+
print(f"Phase 4.3.1 — council cluster dispatch latency (n={N_ITER} per probe)")
|
|
74
|
+
print()
|
|
75
|
+
|
|
76
|
+
overruns = 0
|
|
77
|
+
for target in ("pr", "design"):
|
|
78
|
+
print(f"target = /council {target}")
|
|
79
|
+
|
|
80
|
+
# warm cache
|
|
81
|
+
_read_atomic(target)
|
|
82
|
+
_read_cluster(target)
|
|
83
|
+
|
|
84
|
+
baseline = _bench(_read_atomic, target, N_ITER)
|
|
85
|
+
cluster = _bench(_read_cluster, target, N_ITER)
|
|
86
|
+
|
|
87
|
+
_summary("atomic (baseline)", baseline)
|
|
88
|
+
_summary("cluster (dispatcher)", cluster)
|
|
89
|
+
|
|
90
|
+
delta_mean = statistics.mean(cluster) - statistics.mean(baseline)
|
|
91
|
+
delta_p95 = sorted(cluster)[int(N_ITER * 0.95)] - sorted(baseline)[int(N_ITER * 0.95)]
|
|
92
|
+
verdict = "PASS" if delta_p95 <= 100.0 else "FAIL"
|
|
93
|
+
marker = "✅" if verdict == "PASS" else "❌"
|
|
94
|
+
print(f" delta-mean = {delta_mean:+.3f}ms delta-p95 = {delta_p95:+.3f}ms threshold = +100ms {marker} {verdict}")
|
|
95
|
+
print()
|
|
96
|
+
|
|
97
|
+
if delta_p95 > 100.0:
|
|
98
|
+
overruns += 1
|
|
99
|
+
|
|
100
|
+
if overruns:
|
|
101
|
+
print(f"❌ {overruns} probe(s) exceeded +100ms p95 threshold.")
|
|
102
|
+
return 1
|
|
103
|
+
print("✅ All probes within +100ms p95 threshold.")
|
|
104
|
+
return 0
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
if __name__ == "__main__":
|
|
108
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Phase 6.1 — chat-history-* trigger overlap (Jaccard).
|
|
3
|
+
|
|
4
|
+
Source of truth per rule = frontmatter `description:` field
|
|
5
|
+
(the trigger surface that decides when an `auto` rule activates).
|
|
6
|
+
Tokens = lowercased alphanum words length ≥ 3, minus a small
|
|
7
|
+
stop-list of file-name fragments and connective words that carry
|
|
8
|
+
no trigger signal.
|
|
9
|
+
|
|
10
|
+
Output: pairwise Jaccard + branch verdict per roadmap § 6.1.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
from itertools import combinations
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
ROOT = Path(__file__).resolve().parent.parent
|
|
19
|
+
RULES_DIR = ROOT / ".agent-src.uncompressed/rules"
|
|
20
|
+
|
|
21
|
+
RULES = [
|
|
22
|
+
"chat-history-cadence",
|
|
23
|
+
"chat-history-ownership",
|
|
24
|
+
"chat-history-visibility",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
STOP = {
|
|
28
|
+
"the", "and", "for", "with", "from", "via", "per", "not",
|
|
29
|
+
"into", "onto", "out", "off", "any", "all", "this", "that",
|
|
30
|
+
"agent", "chat", "history",
|
|
31
|
+
"agentchathistory", "chathistory",
|
|
32
|
+
"rule", "rules", "file", "files",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
DESC_RE = re.compile(r'^description:\s*"([^"]+)"', re.MULTILINE)
|
|
36
|
+
TOKEN_RE = re.compile(r"[a-z][a-z0-9_]{2,}")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def tokens(rule_id: str) -> set[str]:
|
|
40
|
+
text = (RULES_DIR / f"{rule_id}.md").read_text(encoding="utf-8")
|
|
41
|
+
m = DESC_RE.search(text)
|
|
42
|
+
if not m:
|
|
43
|
+
raise RuntimeError(f"no description in {rule_id}")
|
|
44
|
+
desc = m.group(1).lower()
|
|
45
|
+
raw = TOKEN_RE.findall(desc)
|
|
46
|
+
return {t for t in raw if t not in STOP}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def jaccard(a: set[str], b: set[str]) -> float:
|
|
50
|
+
union = a | b
|
|
51
|
+
if not union:
|
|
52
|
+
return 0.0
|
|
53
|
+
return len(a & b) / len(union)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def main() -> int:
|
|
57
|
+
sets = {r: tokens(r) for r in RULES}
|
|
58
|
+
|
|
59
|
+
print(f"Phase 6.1 — trigger Jaccard (source: frontmatter `description:`)")
|
|
60
|
+
print()
|
|
61
|
+
for r, ts in sets.items():
|
|
62
|
+
print(f" {r} ({len(ts)} tokens)")
|
|
63
|
+
print(f" {sorted(ts)}")
|
|
64
|
+
print()
|
|
65
|
+
|
|
66
|
+
print("Pairwise Jaccard:")
|
|
67
|
+
print()
|
|
68
|
+
print(f" {'pair':55s} intersect union Jaccard")
|
|
69
|
+
pairs_above = 0
|
|
70
|
+
for a, b in combinations(RULES, 2):
|
|
71
|
+
inter = sets[a] & sets[b]
|
|
72
|
+
union = sets[a] | sets[b]
|
|
73
|
+
j = jaccard(sets[a], sets[b])
|
|
74
|
+
marker = " **" if j >= 0.30 else ""
|
|
75
|
+
print(f" {a + ' × ' + b:55s} {len(inter):>8d} {len(union):>5d} {j:>6.3f}{marker}")
|
|
76
|
+
print(f" intersection: {sorted(inter)}")
|
|
77
|
+
if j >= 0.30:
|
|
78
|
+
pairs_above += 1
|
|
79
|
+
print()
|
|
80
|
+
|
|
81
|
+
if pairs_above >= 2:
|
|
82
|
+
print(f"VERDICT: ≥ 30% on {pairs_above}/3 pairs → PROCEED to 6.2 (unified shape).")
|
|
83
|
+
return 0
|
|
84
|
+
if pairs_above == 1:
|
|
85
|
+
print(f"VERDICT: mixed ({pairs_above}/3 pairs ≥ 30%) → ESCALATE to council.")
|
|
86
|
+
return 0
|
|
87
|
+
print(f"VERDICT: < 30% on all 3 pairs → STOP at 6.1 (orthogonal — current shape optimal).")
|
|
88
|
+
return 0
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
if __name__ == "__main__":
|
|
92
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""Council adjudication of Phase 2A always-budget overshoot.
|
|
2
|
+
|
|
3
|
+
Phase 2A of road-to-structural-optimization slimmed the top-3
|
|
4
|
+
budget-consuming `type: always` rules (language-and-tone,
|
|
5
|
+
ask-when-uncertain, direct-answers) by extracting MECHANICS into
|
|
6
|
+
contexts under contexts/communication/rules-always/. Under the
|
|
7
|
+
locked Model (b) literal, the extended budget grew from 47,448 to
|
|
8
|
+
52,534 chars (49,000 cap → 107.2 %), 5.2 pp above the 2 % G3
|
|
9
|
+
tolerance band defined in
|
|
10
|
+
docs/contracts/load-context-budget-model.md.
|
|
11
|
+
|
|
12
|
+
Per that contract: an overshoot above the band rejects the model
|
|
13
|
+
and escalates to council. Phase 2A § Abort/rollback fires its
|
|
14
|
+
budget kill-switch in this exact case.
|
|
15
|
+
|
|
16
|
+
This one-off bundles the linter output, the three slimmed rules,
|
|
17
|
+
the three new mechanics contexts, and the locked budget contract,
|
|
18
|
+
and asks the council to pick the smallest viable resolution.
|
|
19
|
+
|
|
20
|
+
Invocation:
|
|
21
|
+
.venv/bin/python -m scripts.ai_council._one_off_phase_2a_budget_rebalance
|
|
22
|
+
"""
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import sys
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
from scripts.ai_council.bundler import bundle_files
|
|
29
|
+
from scripts.ai_council.clients import (
|
|
30
|
+
AnthropicClient,
|
|
31
|
+
OpenAIClient,
|
|
32
|
+
load_anthropic_key,
|
|
33
|
+
load_openai_key,
|
|
34
|
+
)
|
|
35
|
+
from scripts.ai_council.orchestrator import (
|
|
36
|
+
CostBudget,
|
|
37
|
+
CouncilQuestion,
|
|
38
|
+
consult,
|
|
39
|
+
estimate,
|
|
40
|
+
)
|
|
41
|
+
from scripts.ai_council.pricing import estimate_cost, load_prices
|
|
42
|
+
from scripts.ai_council.project_context import detect_project_context
|
|
43
|
+
from scripts.ai_council.session import SessionManifest, save as save_session
|
|
44
|
+
|
|
45
|
+
REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
46
|
+
ARTEFACTS = [
|
|
47
|
+
REPO_ROOT / "docs/contracts/load-context-budget-model.md",
|
|
48
|
+
REPO_ROOT / ".agent-src.uncompressed/rules/language-and-tone.md",
|
|
49
|
+
REPO_ROOT / ".agent-src.uncompressed/rules/ask-when-uncertain.md",
|
|
50
|
+
REPO_ROOT / ".agent-src.uncompressed/rules/direct-answers.md",
|
|
51
|
+
REPO_ROOT / ".agent-src.uncompressed/contexts/communication/rules-always/language-and-tone-mechanics.md",
|
|
52
|
+
REPO_ROOT / ".agent-src.uncompressed/contexts/communication/rules-always/ask-when-uncertain-mechanics.md",
|
|
53
|
+
REPO_ROOT / ".agent-src.uncompressed/contexts/communication/rules-always/direct-answers-mechanics.md",
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
ORIGINAL_ASK = (
|
|
57
|
+
"Phase 2A of road-to-structural-optimization slimmed the top-3 "
|
|
58
|
+
"always-rules and breached the 2 % G3 tolerance band on the "
|
|
59
|
+
"always-budget under Model (b) literal. The contract requires "
|
|
60
|
+
"council escalation. Council task: pick the smallest viable "
|
|
61
|
+
"resolution from a fixed option set."
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
REVIEW_PROMPT = """\
|
|
65
|
+
# Council Adjudication — Phase 2A Always-Budget Overshoot
|
|
66
|
+
|
|
67
|
+
## Measured state (linter output, model (b) literal)
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
FAIL always-rule extended budget: 52,534 / 49,000 chars (107.2%)
|
|
71
|
+
thresholds: warn 80% · fail 90% · per-rule ≤ 6,000 (ext) ·
|
|
72
|
+
top-3 ≤ 24,500 (ext) · depth ≤ 2 · G3 band ≤ +2%
|
|
73
|
+
|
|
74
|
+
ext= 8529 raw= 4636 scope-control.md (top-3) allowlisted ≤ 8,529
|
|
75
|
+
ext= 7887 raw= 4607 non-destructive-by-default.md (top-3) allowlisted ≤ 7,887
|
|
76
|
+
ext= 6827 raw= 2424 ask-when-uncertain.md (top-3) PER-RULE BREACH
|
|
77
|
+
ext= 6283 raw= 2758 direct-answers.md PER-RULE BREACH
|
|
78
|
+
ext= 5863 raw= 3658 language-and-tone.md
|
|
79
|
+
ext= 5781 raw= 3309 commit-policy.md
|
|
80
|
+
ext= 5481 raw= 2196 verify-before-complete.md
|
|
81
|
+
ext= 4415 raw= 4415 no-cheap-questions.md
|
|
82
|
+
ext= 1468 raw= 1468 agent-authority.md
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Pre-Phase-2A baseline (Phase 0.2.3 retroactive test, locked):
|
|
86
|
+
**47,448 / 49,000 = 96.8 %** (within tolerance, model (b) accepted).
|
|
87
|
+
|
|
88
|
+
Phase-2A delta per slimmed rule (raw rule shrank, mechanics
|
|
89
|
+
context loaded as new dependency):
|
|
90
|
+
|
|
91
|
+
| Rule | ext before | ext after | delta |
|
|
92
|
+
|---|---:|---:|---:|
|
|
93
|
+
| language-and-tone | 5,832 | 5,863 | +31 |
|
|
94
|
+
| ask-when-uncertain | 5,196 | 6,827 | +1,631 |
|
|
95
|
+
| direct-answers | 4,722 | 6,283 | +1,561 |
|
|
96
|
+
|
|
97
|
+
Phase 0.4 worked example council-locked the +1,561 delta on
|
|
98
|
+
direct-answers. ask-when-uncertain (+1,631) is structurally
|
|
99
|
+
identical. The total overshoot is **+3,534 chars over cap**.
|
|
100
|
+
|
|
101
|
+
## Why the literal model can no longer hit budget
|
|
102
|
+
|
|
103
|
+
Under model (b) literal each rule pays full cost for every context
|
|
104
|
+
it loads. Each mechanics extraction adds ~1.5–2 KB of frontmatter +
|
|
105
|
+
headers + context-introduction overhead per rule. Phase 2A is
|
|
106
|
+
structurally unable to satisfy the roadmap success criterion
|
|
107
|
+
("budget delta ≥ −5 %") under model (b); extraction *always* adds
|
|
108
|
+
overhead unless mechanics are consolidated and shared across rules.
|
|
109
|
+
|
|
110
|
+
## Fixed option set (pick exactly one)
|
|
111
|
+
|
|
112
|
+
- **R1 — Switch to Model (c) shared-divisor.** Reserved by the
|
|
113
|
+
contract as "first refinement step if the 2 % tolerance band is
|
|
114
|
+
exceeded". Under (c), `Σ RawSize(c) / N_loaders`. Currently each
|
|
115
|
+
mechanics file has N=1, so (c) reduces to (b). Buy-in for (c)
|
|
116
|
+
enables R5 below.
|
|
117
|
+
- **R2 — Raise TOTAL_CAP.** Set cap to 56,000 chars (current
|
|
118
|
+
utilization 93.8 % under new cap). Acknowledges extraction
|
|
119
|
+
overhead is real; freezes Phase 2A wins; abandons the −5 %
|
|
120
|
+
delta success criterion.
|
|
121
|
+
- **R3 — Raise PER_RULE_CAP to 7,000 + add allowlist entries +
|
|
122
|
+
raise TOTAL_CAP to 54,000.** Minimal-change variant: keeps the
|
|
123
|
+
shape, accepts the two new per-rule breaches, raises cap
|
|
124
|
+
modestly.
|
|
125
|
+
- **R4 — Revert Phase 2A.** Abandon mechanics extraction on the
|
|
126
|
+
three rules; restore them to original size; mark Phase 2A as
|
|
127
|
+
"structurally infeasible under locked contract"; close roadmap
|
|
128
|
+
phase.
|
|
129
|
+
- **R5 — Consolidate the three mechanics into one shared context
|
|
130
|
+
+ adopt R1.** Single `rules-always-mechanics.md` loaded by all
|
|
131
|
+
three slimmed rules; under (c) each rule pays 1/3 of the
|
|
132
|
+
consolidated context size. Highest engineering cost, only path
|
|
133
|
+
that delivers a net negative delta.
|
|
134
|
+
|
|
135
|
+
## Output Contract (STRICT)
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
### Verdict
|
|
139
|
+
**Choice:** <R1 | R2 | R3 | R4 | R5>
|
|
140
|
+
**One-sentence rationale:** <≤ 30 words>
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
```
|
|
144
|
+
### Required follow-up actions (numbered, ≤ 4)
|
|
145
|
+
1. <smallest concrete step, files named>
|
|
146
|
+
2. <...>
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
```
|
|
150
|
+
### Risk note
|
|
151
|
+
**Single biggest risk of the chosen option:** <one sentence>
|
|
152
|
+
**Mitigation:** <one sentence>
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
```
|
|
156
|
+
### Contract amendment needed?
|
|
157
|
+
**Amend load-context-budget-model.md?** <YES — section · NO>
|
|
158
|
+
**Amend road-to-structural-optimization Phase 2A success criterion?**
|
|
159
|
+
<YES — new criterion in 1 line · NO>
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Be decisive — total response ≤ 1,000 words. Artefacts follow
|
|
163
|
+
verbatim.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def main() -> int:
|
|
168
|
+
anthropic = AnthropicClient(api_key=load_anthropic_key(), model="claude-sonnet-4-5")
|
|
169
|
+
openai = OpenAIClient(api_key=load_openai_key(), model="gpt-4o")
|
|
170
|
+
members = [anthropic, openai]
|
|
171
|
+
|
|
172
|
+
context = bundle_files(ARTEFACTS)
|
|
173
|
+
project = detect_project_context(REPO_ROOT)
|
|
174
|
+
table = load_prices()
|
|
175
|
+
|
|
176
|
+
user_prompt = REVIEW_PROMPT + "\n\n---\n\n" + context.text
|
|
177
|
+
|
|
178
|
+
question = CouncilQuestion(
|
|
179
|
+
mode="files",
|
|
180
|
+
user_prompt=user_prompt,
|
|
181
|
+
max_tokens=2560,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
estimates = estimate(
|
|
185
|
+
question, members, table, project=project, original_ask=ORIGINAL_ASK,
|
|
186
|
+
)
|
|
187
|
+
print("=== ESTIMATE (single round) ===")
|
|
188
|
+
total_est = 0.0
|
|
189
|
+
for c, e in zip(members, estimates):
|
|
190
|
+
print(f" {c.name}/{c.model}: ~{e.input_tokens} in + {e.output_tokens} out = ${e.total_usd:.4f}")
|
|
191
|
+
total_est += e.total_usd
|
|
192
|
+
print(f" TOTAL per round (max): ${total_est:.4f}")
|
|
193
|
+
print()
|
|
194
|
+
|
|
195
|
+
budget = CostBudget(
|
|
196
|
+
max_input_tokens=200_000,
|
|
197
|
+
max_output_tokens=80_000,
|
|
198
|
+
max_calls=20,
|
|
199
|
+
max_total_usd=2.50,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
rounds_collected: list[list] = []
|
|
203
|
+
|
|
204
|
+
def _on_round_complete(round_idx: int, round_responses) -> None:
|
|
205
|
+
rounds_collected.append(list(round_responses))
|
|
206
|
+
print(f"=== ROUND {round_idx + 1} COMPLETE ===")
|
|
207
|
+
for r in round_responses:
|
|
208
|
+
if r.error:
|
|
209
|
+
print(f" [error] {r.provider}/{r.model}: {r.error}")
|
|
210
|
+
continue
|
|
211
|
+
actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
|
|
212
|
+
print(f" [done] {r.provider}/{r.model}: {r.input_tokens} in / "
|
|
213
|
+
f"{r.output_tokens} out · {r.latency_ms} ms · ${actual.total_usd:.4f}")
|
|
214
|
+
print()
|
|
215
|
+
|
|
216
|
+
print("=== CONSULT (1 round, Phase 2A budget rebalance) ===")
|
|
217
|
+
consult(
|
|
218
|
+
members, question, budget,
|
|
219
|
+
rounds=1,
|
|
220
|
+
on_round_complete=_on_round_complete,
|
|
221
|
+
table=table, project=project, original_ask=ORIGINAL_ASK,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
if not rounds_collected:
|
|
225
|
+
print("[error] no rounds completed", file=sys.stderr)
|
|
226
|
+
return 1
|
|
227
|
+
|
|
228
|
+
actual_total = 0.0
|
|
229
|
+
for round_responses in rounds_collected:
|
|
230
|
+
for r in round_responses:
|
|
231
|
+
if r.error:
|
|
232
|
+
continue
|
|
233
|
+
actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
|
|
234
|
+
actual_total += actual.total_usd
|
|
235
|
+
print(f"=== TOTAL ACTUAL: ${actual_total:.4f} ===")
|
|
236
|
+
|
|
237
|
+
final_round = rounds_collected[-1]
|
|
238
|
+
if not [r for r in final_round if not r.error]:
|
|
239
|
+
return 1
|
|
240
|
+
|
|
241
|
+
manifest = SessionManifest(
|
|
242
|
+
mode="files",
|
|
243
|
+
artefact="agents/roadmaps/road-to-structural-optimization.md",
|
|
244
|
+
original_ask=ORIGINAL_ASK,
|
|
245
|
+
members=[f"{r.provider}/{r.model}" for r in final_round],
|
|
246
|
+
rounds=len(rounds_collected),
|
|
247
|
+
cost_usd_estimated=total_est,
|
|
248
|
+
cost_usd_actual=actual_total,
|
|
249
|
+
extra={"purpose": "Phase 2A always-budget overshoot adjudication"},
|
|
250
|
+
)
|
|
251
|
+
session_dir = save_session(manifest=manifest, responses=rounds_collected)
|
|
252
|
+
print(f"[saved] {session_dir.relative_to(REPO_ROOT)}/")
|
|
253
|
+
return 1 if any(r.error for round_r in rounds_collected for r in round_r) else 0
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
if __name__ == "__main__":
|
|
257
|
+
raise SystemExit(main())
|