@cleocode/skills 2026.5.4 → 2026.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/ct-council/SKILL.md +0 -377
- package/skills/ct-council/optimization/HARDENING-PLAYBOOK.md +0 -107
- package/skills/ct-council/optimization/README.md +0 -74
- package/skills/ct-council/optimization/scenarios.yaml +0 -121
- package/skills/ct-council/optimization/scripts/campaign.py +0 -543
- package/skills/ct-council/optimization/scripts/test_campaign.py +0 -143
- package/skills/ct-council/references/chairman.md +0 -119
- package/skills/ct-council/references/contrarian.md +0 -70
- package/skills/ct-council/references/evidence-pack.md +0 -145
- package/skills/ct-council/references/examples.md +0 -235
- package/skills/ct-council/references/executor.md +0 -83
- package/skills/ct-council/references/expansionist.md +0 -68
- package/skills/ct-council/references/first-principles.md +0 -73
- package/skills/ct-council/references/outsider.md +0 -73
- package/skills/ct-council/references/peer-review.md +0 -125
- package/skills/ct-council/scripts/analyze_runs.py +0 -293
- package/skills/ct-council/scripts/fixtures/executor_multi.md +0 -198
- package/skills/ct-council/scripts/fixtures/missing_advisor.md +0 -117
- package/skills/ct-council/scripts/fixtures/missing_convergence.md +0 -190
- package/skills/ct-council/scripts/fixtures/thin_evidence.md +0 -193
- package/skills/ct-council/scripts/fixtures/valid.md +0 -226
- package/skills/ct-council/scripts/fixtures/valid_with_llmtxt.md +0 -226
- package/skills/ct-council/scripts/llmtxt_ref.py +0 -223
- package/skills/ct-council/scripts/run_council.py +0 -578
- package/skills/ct-council/scripts/telemetry.py +0 -624
- package/skills/ct-council/scripts/test_telemetry.py +0 -509
- package/skills/ct-council/scripts/test_validate.py +0 -452
- package/skills/ct-council/scripts/validate.py +0 -396
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
# Council shakedown scenarios — campaign.py loads this at runtime.
|
|
2
|
-
#
|
|
3
|
-
# Each entry must have: id, number, title, dimension, shape, learn, briefing.
|
|
4
|
-
# The `id` is the slug used everywhere (CLI, manifest, runs/<scenario>).
|
|
5
|
-
# The `number` controls run order (lowest first; gaps are OK).
|
|
6
|
-
#
|
|
7
|
-
# To add a new scenario: append an entry below. No code changes required.
|
|
8
|
-
# To change a briefing: edit the `briefing` field. campaign.py picks it up
|
|
9
|
-
# on the next invocation.
|
|
10
|
-
#
|
|
11
|
-
# Keep `briefing` to multi-line text (use YAML's `|` literal block style).
|
|
12
|
-
# It's printed verbatim by `campaign.py next` to brief the orchestrator.
|
|
13
|
-
#
|
|
14
|
-
# Schema version: 1.0.0
|
|
15
|
-
|
|
16
|
-
schema_version: "1.0.0"
|
|
17
|
-
|
|
18
|
-
scenarios:
|
|
19
|
-
- id: baseline
|
|
20
|
-
number: 1
|
|
21
|
-
title: Narrow binary, dense evidence
|
|
22
|
-
dimension: Control run
|
|
23
|
-
shape: "Binary decision, 5-7 path:line / sha citations, no llmtxt:"
|
|
24
|
-
learn: Baseline cost / wall-clock / gate-pass distribution all subsequent runs compare against.
|
|
25
|
-
briefing: |
|
|
26
|
-
Pick a real, current cleocode (or active-project) decision with a clean binary shape:
|
|
27
|
-
- 'Should we X or stay with Y?'
|
|
28
|
-
- 'Is Z ready to ship?'
|
|
29
|
-
Evidence pack: 5-7 items, all path:line or sha citations from the live codebase. NO llmtxt:.
|
|
30
|
-
This run sets the campaign's baseline gate-pass distribution and token cost.
|
|
31
|
-
|
|
32
|
-
- id: external-doc-heavy
|
|
33
|
-
number: 2
|
|
34
|
-
title: External-doc heavy
|
|
35
|
-
dimension: "Live llmtxt: integration"
|
|
36
|
-
shape: "Binary, ≥3 of 7 evidence items as llmtxt:<slug>"
|
|
37
|
-
learn: Does the wrapper survive real subagent distribution under auth/rate-limit conditions?
|
|
38
|
-
briefing: |
|
|
39
|
-
Pick a question that genuinely depends on external docs (libraries, APIs, specs).
|
|
40
|
-
Evidence pack: ≥3 items as `llmtxt:<slug>` with concise inline rationale; 1-3 local citations.
|
|
41
|
-
If `llmtxt_ref.py` cannot fetch (auth/network), inline rationales are the ground-truth fallback.
|
|
42
|
-
|
|
43
|
-
- id: three-way
|
|
44
|
-
number: 3
|
|
45
|
-
title: Three-way trade-off
|
|
46
|
-
dimension: Chairman ranking, not binary approve
|
|
47
|
-
shape: "'Which of A / B / C?'"
|
|
48
|
-
learn: "Does the verdict template hold for N-way? Is `### Recommendation` flexible enough?"
|
|
49
|
-
briefing: |
|
|
50
|
-
Pose a genuinely 3-way question — three implementation choices, three vendors, three patterns.
|
|
51
|
-
The Chairman MUST produce a ranking (A >> B > C), not just 'pick A.' Watch for:
|
|
52
|
-
- Does the verdict cleanly state the rank?
|
|
53
|
-
- Does it justify the 2nd place's reason for not being 1st?
|
|
54
|
-
- Does at least one advisor argue for each option?
|
|
55
|
-
|
|
56
|
-
- id: sparse-ops
|
|
57
|
-
number: 4
|
|
58
|
-
title: Sparse / ops question
|
|
59
|
-
dimension: Advisors with no code to grep
|
|
60
|
-
shape: Configs + external docs only; no executable-code citations
|
|
61
|
-
learn: Do advisors honestly say "insufficient" or hallucinate to fill gaps?
|
|
62
|
-
briefing: |
|
|
63
|
-
Pick an ops/config question: CI matrix, package.json field, env var, deployment policy.
|
|
64
|
-
Evidence pack: only config files (yaml, json, toml) + external doc references. NO src/ citations.
|
|
65
|
-
In each subagent briefing, explicitly note: 'punish fabricated specificity, not honest abstention.'
|
|
66
|
-
Watch Outsider especially — sparse evidence is their highest-leverage scenario.
|
|
67
|
-
|
|
68
|
-
- id: contradictory
|
|
69
|
-
number: 5
|
|
70
|
-
title: Contradictory evidence
|
|
71
|
-
dimension: Contradiction handling
|
|
72
|
-
shape: Pack contains 2 items that disagree on purpose
|
|
73
|
-
learn: Does Outsider catch it? Does FP re-derive cleanly under conflicting overlay?
|
|
74
|
-
briefing: |
|
|
75
|
-
Plant a deliberate contradiction in the pack:
|
|
76
|
-
- Item N says X (e.g. an ADR / AGENTS.md rule)
|
|
77
|
-
- Item N+1 shows ¬X exists on disk (e.g. a directory listing or commit)
|
|
78
|
-
Do NOT signal the contradiction in the question — let the advisors find it.
|
|
79
|
-
Outsider's frame is the canonical antibody; FP must classify which side is load-bearing truth.
|
|
80
|
-
|
|
81
|
-
- id: non-cleo
|
|
82
|
-
number: 6
|
|
83
|
-
title: Non-CLEO project
|
|
84
|
-
dimension: Portability beyond cleocode conventions
|
|
85
|
-
shape: External small repo + bug report; council run inside it
|
|
86
|
-
learn: Does the skill work on any project, or has it accumulated cleocode-isms?
|
|
87
|
-
briefing: |
|
|
88
|
-
Clone a small external repo to /tmp (e.g. a popular OSS utility under <500 files).
|
|
89
|
-
Pose a real-looking bug report or design question against THAT repo's actual files.
|
|
90
|
-
Run the council with NO cleocode-specific evidence. Watch for:
|
|
91
|
-
- Persona files referencing 'CLEO' / 'BRAIN' / 'NEXUS' (cleocode-isms)
|
|
92
|
-
- Validator failures on conventions that don't apply outside cleocode
|
|
93
|
-
- llmtxt_ref / cleo CLI references that don't exist in the foreign repo
|
|
94
|
-
|
|
95
|
-
- id: mini
|
|
96
|
-
number: 7
|
|
97
|
-
title: Small-scope stress (3 items)
|
|
98
|
-
dimension: Overhead-vs-signal ratio
|
|
99
|
-
shape: Exactly 3 evidence items (the validator floor)
|
|
100
|
-
learn: Is a "mini-council" variant worth shipping? Can the gates fire on thin packs?
|
|
101
|
-
briefing: |
|
|
102
|
-
Pose a tightly-scoped question and discipline yourself to EXACTLY 3 evidence items.
|
|
103
|
-
Watch for:
|
|
104
|
-
- Gate-pass distribution (does G1 Rigor still fire reasonably?)
|
|
105
|
-
- Token cost (should be 60-70% of full-council baseline)
|
|
106
|
-
- Chairman confidence (does it stay ≥medium?)
|
|
107
|
-
If all three hold, document a 'mini-council' variant in SKILL.md.
|
|
108
|
-
|
|
109
|
-
- id: contention
|
|
110
|
-
number: 8
|
|
111
|
-
title: High-contention
|
|
112
|
-
dimension: Chairman reconciliation under genuine disagreement
|
|
113
|
-
shape: Question designed to produce a 3-vs-2 advisor split
|
|
114
|
-
learn: Does the Chairman template handle real contention rather than directional convergence?
|
|
115
|
-
briefing: |
|
|
116
|
-
Pose a question where 2-3 advisors will plausibly disagree with the other 2-3:
|
|
117
|
-
- Speed-vs-safety trade-offs (Executor/Expansionist vs Contrarian/FP)
|
|
118
|
-
- Autonomy-vs-control questions (Outsider / Contrarian split is common)
|
|
119
|
-
- The Council voting on its own composition / process
|
|
120
|
-
If the verdict converges directionally instead of splitting, the contention test failed —
|
|
121
|
-
consider a re-run with a sharper question.
|
|
@@ -1,543 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
campaign.py — programmatic tracker for Council hardening campaigns.
|
|
4
|
-
|
|
5
|
-
A campaign is an instance of the playbook: a sequence of shakedown runs with
|
|
6
|
-
shared telemetry and a cumulative findings log. Campaigns persist locally
|
|
7
|
-
(gitignored under `optimization/campaigns/<name>/`); the playbook itself
|
|
8
|
-
(`optimization/HARDENING-PLAYBOOK.md`) stays committed.
|
|
9
|
-
|
|
10
|
-
Subcommands:
|
|
11
|
-
|
|
12
|
-
new <name> Initialize a new campaign directory from the playbook.
|
|
13
|
-
status [--name <n>] Show campaign progress + exit-criteria scorecard.
|
|
14
|
-
next [--name <n>] Print the next scenario's full briefing.
|
|
15
|
-
done <scenario> <run-id> Mark a scenario complete (links the run dir).
|
|
16
|
-
log <failure> <fix> <reg> Append a hardening fix to findings.md.
|
|
17
|
-
list List all known campaigns under campaigns/.
|
|
18
|
-
active [--set <n>] Show or set the active campaign (used as default).
|
|
19
|
-
|
|
20
|
-
Usage:
|
|
21
|
-
|
|
22
|
-
python3 optimization/scripts/campaign.py new 2026-04-25-portability
|
|
23
|
-
python3 optimization/scripts/campaign.py next
|
|
24
|
-
python3 optimization/scripts/campaign.py done baseline 20260425T023423Z-0f82cea9
|
|
25
|
-
python3 optimization/scripts/campaign.py log "Executor mis-cite" "Pre-action verify rule" "yes"
|
|
26
|
-
python3 optimization/scripts/campaign.py status
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
from __future__ import annotations
|
|
30
|
-
|
|
31
|
-
import argparse
|
|
32
|
-
import datetime as _dt
|
|
33
|
-
import json
|
|
34
|
-
import sys
|
|
35
|
-
from dataclasses import dataclass
|
|
36
|
-
from pathlib import Path
|
|
37
|
-
|
|
38
|
-
# Resolve skill root from this script's location.
|
|
39
|
-
SCRIPT_PATH = Path(__file__).resolve()
|
|
40
|
-
OPTIMIZATION_DIR = SCRIPT_PATH.parent.parent
|
|
41
|
-
SKILL_ROOT = OPTIMIZATION_DIR.parent
|
|
42
|
-
CAMPAIGNS_DIR = OPTIMIZATION_DIR / "campaigns"
|
|
43
|
-
PLAYBOOK_PATH = OPTIMIZATION_DIR / "HARDENING-PLAYBOOK.md"
|
|
44
|
-
ACTIVE_FILE = OPTIMIZATION_DIR / ".active-campaign" # gitignored
|
|
45
|
-
TELEMETRY_LOG = SKILL_ROOT / ".cleo" / "council-runs.jsonl"
|
|
46
|
-
SKILL_RUNS_DIR = SKILL_ROOT / ".runs"
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
# ─── Scenario catalogue (loaded from optimization/scenarios.yaml) ────────────
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
@dataclass(frozen=True)
|
|
53
|
-
class Scenario:
|
|
54
|
-
id: str # e.g. "baseline", "external-doc-heavy"
|
|
55
|
-
number: int # campaign run-order key (lowest first)
|
|
56
|
-
title: str
|
|
57
|
-
dimension: str
|
|
58
|
-
shape: str
|
|
59
|
-
learn: str
|
|
60
|
-
briefing: str # multi-line guidance for the orchestrator
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
SCENARIOS_YAML_PATH = OPTIMIZATION_DIR / "scenarios.yaml"
|
|
64
|
-
SCENARIOS_JSON_PATH = OPTIMIZATION_DIR / "scenarios.json" # alternate format
|
|
65
|
-
|
|
66
|
-
# Hardcoded fallback used only if both scenarios.yaml and scenarios.json are
|
|
67
|
-
# missing (or unparseable) AND the YAML library isn't available. Keeps
|
|
68
|
-
# campaign.py runnable in clean-checkout / minimal-deps environments.
|
|
69
|
-
_FALLBACK_SCENARIOS: list[dict] = [
|
|
70
|
-
{
|
|
71
|
-
"id": "baseline",
|
|
72
|
-
"number": 1,
|
|
73
|
-
"title": "Narrow binary, dense evidence",
|
|
74
|
-
"dimension": "Control run",
|
|
75
|
-
"shape": "Binary decision, 5-7 path:line / sha citations, no llmtxt:",
|
|
76
|
-
"learn": "Baseline cost / wall-clock / gate-pass distribution all subsequent runs compare against.",
|
|
77
|
-
"briefing": (
|
|
78
|
-
"Pick a binary decision in the active project.\n"
|
|
79
|
-
"Evidence: 5-7 path:line or sha citations from the live codebase. No llmtxt:.\n"
|
|
80
|
-
"This run sets the campaign's baseline cost + gate-pass distribution.\n"
|
|
81
|
-
),
|
|
82
|
-
},
|
|
83
|
-
]
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def _load_scenarios() -> list[Scenario]:
|
|
87
|
-
"""Load scenarios from YAML (preferred), JSON (alternate), or fallback list.
|
|
88
|
-
|
|
89
|
-
Order of precedence:
|
|
90
|
-
1. optimization/scenarios.yaml (if pyyaml available + file present)
|
|
91
|
-
2. optimization/scenarios.json (always-available fallback for editing)
|
|
92
|
-
3. Hardcoded _FALLBACK_SCENARIOS (clean-checkout safety net)
|
|
93
|
-
"""
|
|
94
|
-
raw_entries: list[dict] | None = None
|
|
95
|
-
|
|
96
|
-
if SCENARIOS_YAML_PATH.exists():
|
|
97
|
-
try:
|
|
98
|
-
import yaml # type: ignore
|
|
99
|
-
data = yaml.safe_load(SCENARIOS_YAML_PATH.read_text())
|
|
100
|
-
if isinstance(data, dict) and isinstance(data.get("scenarios"), list):
|
|
101
|
-
raw_entries = data["scenarios"]
|
|
102
|
-
except ImportError:
|
|
103
|
-
print(
|
|
104
|
-
"ℹ️ scenarios.yaml exists but PyYAML isn't installed; "
|
|
105
|
-
"falling back to scenarios.json or hardcoded list. "
|
|
106
|
-
"Run `pip install pyyaml` to use YAML.",
|
|
107
|
-
file=sys.stderr,
|
|
108
|
-
)
|
|
109
|
-
except Exception as e:
|
|
110
|
-
print(f"⚠️ Could not parse {SCENARIOS_YAML_PATH}: {e}", file=sys.stderr)
|
|
111
|
-
|
|
112
|
-
if raw_entries is None and SCENARIOS_JSON_PATH.exists():
|
|
113
|
-
try:
|
|
114
|
-
data = json.loads(SCENARIOS_JSON_PATH.read_text())
|
|
115
|
-
if isinstance(data, dict) and isinstance(data.get("scenarios"), list):
|
|
116
|
-
raw_entries = data["scenarios"]
|
|
117
|
-
elif isinstance(data, list):
|
|
118
|
-
raw_entries = data
|
|
119
|
-
except json.JSONDecodeError as e:
|
|
120
|
-
print(f"⚠️ Could not parse {SCENARIOS_JSON_PATH}: {e}", file=sys.stderr)
|
|
121
|
-
|
|
122
|
-
if raw_entries is None:
|
|
123
|
-
raw_entries = _FALLBACK_SCENARIOS
|
|
124
|
-
|
|
125
|
-
out: list[Scenario] = []
|
|
126
|
-
required_fields = ["id", "number", "title", "dimension", "shape", "learn", "briefing"]
|
|
127
|
-
for i, entry in enumerate(raw_entries, 1):
|
|
128
|
-
if not isinstance(entry, dict):
|
|
129
|
-
print(f"⚠️ Scenario #{i} is not a mapping; skipping.", file=sys.stderr)
|
|
130
|
-
continue
|
|
131
|
-
missing = [f for f in required_fields if f not in entry]
|
|
132
|
-
if missing:
|
|
133
|
-
print(f"⚠️ Scenario #{i} ({entry.get('id', '?')}) missing fields: {missing}; skipping.", file=sys.stderr)
|
|
134
|
-
continue
|
|
135
|
-
out.append(Scenario(
|
|
136
|
-
id=entry["id"],
|
|
137
|
-
number=int(entry["number"]),
|
|
138
|
-
title=entry["title"],
|
|
139
|
-
dimension=entry["dimension"],
|
|
140
|
-
shape=entry["shape"],
|
|
141
|
-
learn=entry["learn"],
|
|
142
|
-
briefing=entry["briefing"],
|
|
143
|
-
))
|
|
144
|
-
out.sort(key=lambda s: s.number)
|
|
145
|
-
if not out:
|
|
146
|
-
print("⚠️ No valid scenarios loaded; using hardcoded fallback.", file=sys.stderr)
|
|
147
|
-
out = [Scenario(**e) for e in _FALLBACK_SCENARIOS]
|
|
148
|
-
return out
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
SCENARIOS: list[Scenario] = _load_scenarios()
|
|
152
|
-
SCENARIO_BY_ID: dict[str, Scenario] = {s.id: s for s in SCENARIOS}
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
# ─── Campaign helpers ───────────────────────────────────────────────────────
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
def _campaigns_dir() -> Path:
|
|
159
|
-
CAMPAIGNS_DIR.mkdir(parents=True, exist_ok=True)
|
|
160
|
-
return CAMPAIGNS_DIR
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
def _read_active_campaign() -> str | None:
|
|
164
|
-
if ACTIVE_FILE.exists():
|
|
165
|
-
return ACTIVE_FILE.read_text().strip() or None
|
|
166
|
-
# If exactly one campaign exists, use it as default.
|
|
167
|
-
dirs = [p for p in _campaigns_dir().iterdir() if p.is_dir()]
|
|
168
|
-
if len(dirs) == 1:
|
|
169
|
-
return dirs[0].name
|
|
170
|
-
return None
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
def _write_active_campaign(name: str) -> None:
|
|
174
|
-
ACTIVE_FILE.write_text(name + "\n")
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
def _resolve_campaign(name: str | None) -> Path:
|
|
178
|
-
name = name or _read_active_campaign()
|
|
179
|
-
if not name:
|
|
180
|
-
sys.exit(
|
|
181
|
-
"❌ No campaign specified and no active campaign set.\n"
|
|
182
|
-
" Run: campaign.py new <name> OR campaign.py active --set <name>"
|
|
183
|
-
)
|
|
184
|
-
path = _campaigns_dir() / name
|
|
185
|
-
if not path.exists():
|
|
186
|
-
sys.exit(f"❌ Campaign not found: {path}\n Existing: campaign.py list")
|
|
187
|
-
return path
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
def _read_manifest(campaign_dir: Path) -> dict:
|
|
191
|
-
p = campaign_dir / "manifest.json"
|
|
192
|
-
if not p.exists():
|
|
193
|
-
return {"name": campaign_dir.name, "completed": {}, "fixes": []}
|
|
194
|
-
try:
|
|
195
|
-
return json.loads(p.read_text())
|
|
196
|
-
except json.JSONDecodeError:
|
|
197
|
-
return {"name": campaign_dir.name, "completed": {}, "fixes": []}
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
def _write_manifest(campaign_dir: Path, manifest: dict) -> None:
|
|
201
|
-
(campaign_dir / "manifest.json").write_text(
|
|
202
|
-
json.dumps(manifest, indent=2, sort_keys=True) + "\n"
|
|
203
|
-
)
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
def _next_scenario(manifest: dict) -> Scenario | None:
|
|
207
|
-
completed_ids = set(manifest.get("completed", {}).keys())
|
|
208
|
-
for s in SCENARIOS:
|
|
209
|
-
if s.id not in completed_ids:
|
|
210
|
-
return s
|
|
211
|
-
return None
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
# ─── Subcommands ────────────────────────────────────────────────────────────
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
def cmd_new(args) -> int:
|
|
218
|
-
name = args.name.strip()
|
|
219
|
-
if "/" in name or name.startswith("."):
|
|
220
|
-
sys.exit("❌ Campaign name must be a simple slug (no slashes, no leading dot).")
|
|
221
|
-
|
|
222
|
-
path = _campaigns_dir() / name
|
|
223
|
-
if path.exists():
|
|
224
|
-
sys.exit(f"❌ Campaign already exists: {path}")
|
|
225
|
-
|
|
226
|
-
path.mkdir(parents=True)
|
|
227
|
-
(path / "runs").mkdir()
|
|
228
|
-
|
|
229
|
-
manifest = {
|
|
230
|
-
"name": name,
|
|
231
|
-
"schema_version": "1.0.0",
|
|
232
|
-
"created_at": _dt.datetime.now(tz=_dt.timezone.utc).isoformat(timespec="seconds"),
|
|
233
|
-
"playbook": str(PLAYBOOK_PATH.relative_to(SKILL_ROOT)),
|
|
234
|
-
"telemetry_log": str(TELEMETRY_LOG.relative_to(SKILL_ROOT)),
|
|
235
|
-
"completed": {}, # scenario_id → {run_id, completed_at}
|
|
236
|
-
"fixes": [], # list of {at, failure, fix, regression_test}
|
|
237
|
-
}
|
|
238
|
-
_write_manifest(path, manifest)
|
|
239
|
-
|
|
240
|
-
findings_md = (
|
|
241
|
-
f"# Findings — campaign `{name}`\n\n"
|
|
242
|
-
"Failure-mode diff table — appended via `campaign.py log` between runs.\n"
|
|
243
|
-
"Each row pairs a failure surfaced in run N with the fix shipped before run N+1.\n\n"
|
|
244
|
-
"| Run | Scenario | Failure surfaced | Fix shipped | Regression test |\n"
|
|
245
|
-
"|---|---|---|---|---|\n"
|
|
246
|
-
)
|
|
247
|
-
(path / "findings.md").write_text(findings_md)
|
|
248
|
-
|
|
249
|
-
plan_md = (
|
|
250
|
-
f"# Plan — campaign `{name}`\n\n"
|
|
251
|
-
"Generated from `optimization/HARDENING-PLAYBOOK.md`. "
|
|
252
|
-
"Edit this file to add campaign-specific notes (skipped scenarios, custom questions, etc.) — "
|
|
253
|
-
"the manifest tracks scenario completion separately.\n\n"
|
|
254
|
-
"## Scenarios (run in order)\n\n"
|
|
255
|
-
)
|
|
256
|
-
for s in SCENARIOS:
|
|
257
|
-
plan_md += f"### {s.number}. {s.id} — {s.title}\n\n"
|
|
258
|
-
plan_md += f"**Dimension:** {s.dimension}\n\n"
|
|
259
|
-
plan_md += f"**Shape:** {s.shape}\n\n"
|
|
260
|
-
plan_md += f"**Learn:** {s.learn}\n\n"
|
|
261
|
-
plan_md += f"**Status:** _pending_\n\n"
|
|
262
|
-
(path / "plan.md").write_text(plan_md)
|
|
263
|
-
|
|
264
|
-
_write_active_campaign(name)
|
|
265
|
-
|
|
266
|
-
print(f"📁 Campaign initialized: {path}")
|
|
267
|
-
print(f" Active campaign set to: {name}")
|
|
268
|
-
print(f" Next: campaign.py next")
|
|
269
|
-
return 0
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
def cmd_next(args) -> int:
|
|
273
|
-
campaign_dir = _resolve_campaign(args.name)
|
|
274
|
-
manifest = _read_manifest(campaign_dir)
|
|
275
|
-
s = _next_scenario(manifest)
|
|
276
|
-
if s is None:
|
|
277
|
-
print("✅ All 8 scenarios completed for this campaign.")
|
|
278
|
-
print(" Run: campaign.py status # for the exit-criteria scorecard")
|
|
279
|
-
return 0
|
|
280
|
-
|
|
281
|
-
print(f"# Next scenario — {s.number}/{len(SCENARIOS)} · {s.id}")
|
|
282
|
-
print()
|
|
283
|
-
print(f"**Title:** {s.title}")
|
|
284
|
-
print(f"**Dimension:** {s.dimension}")
|
|
285
|
-
print(f"**Shape:** {s.shape}")
|
|
286
|
-
print(f"**Learn:** {s.learn}")
|
|
287
|
-
print()
|
|
288
|
-
print("## Briefing")
|
|
289
|
-
print()
|
|
290
|
-
print(s.briefing)
|
|
291
|
-
print("## Suggested commands")
|
|
292
|
-
print()
|
|
293
|
-
print(f" python3 scripts/run_council.py init '<your question>' --scenario {s.id} --subagent-mode")
|
|
294
|
-
print(f" # write evidence pack into <run-dir>/phase0.md")
|
|
295
|
-
print(f" # spawn 5 advisor agents → 5 peer review agents → write phase2_5.md + phase3.md → assemble output.md")
|
|
296
|
-
print(f" python3 scripts/run_council.py ingest <run-dir>")
|
|
297
|
-
print(f" python3 optimization/scripts/campaign.py done {s.id} <run-dir-id>")
|
|
298
|
-
print()
|
|
299
|
-
return 0
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
def cmd_done(args) -> int:
|
|
303
|
-
if args.scenario not in SCENARIO_BY_ID:
|
|
304
|
-
sys.exit(f"❌ Unknown scenario: {args.scenario}\n Valid: {', '.join(s.id for s in SCENARIOS)}")
|
|
305
|
-
campaign_dir = _resolve_campaign(args.name)
|
|
306
|
-
manifest = _read_manifest(campaign_dir)
|
|
307
|
-
|
|
308
|
-
if args.scenario in manifest.get("completed", {}):
|
|
309
|
-
existing = manifest["completed"][args.scenario]
|
|
310
|
-
print(f"⚠️ Scenario {args.scenario} already marked complete (run_id={existing['run_id']}). Overwriting.")
|
|
311
|
-
|
|
312
|
-
manifest.setdefault("completed", {})[args.scenario] = {
|
|
313
|
-
"run_id": args.run_id,
|
|
314
|
-
"completed_at": _dt.datetime.now(tz=_dt.timezone.utc).isoformat(timespec="seconds"),
|
|
315
|
-
}
|
|
316
|
-
_write_manifest(campaign_dir, manifest)
|
|
317
|
-
|
|
318
|
-
# Best-effort symlink the run dir into campaign_dir/runs/.
|
|
319
|
-
# The skill's run dirs live at <skill-root>/.runs/<run-id> by convention.
|
|
320
|
-
src = SKILL_RUNS_DIR / args.run_id
|
|
321
|
-
if not src.exists():
|
|
322
|
-
# User may have passed a full run-dir name with timestamp prefix.
|
|
323
|
-
candidates = list(SKILL_RUNS_DIR.glob(f"*{args.run_id}*"))
|
|
324
|
-
if len(candidates) == 1:
|
|
325
|
-
src = candidates[0]
|
|
326
|
-
if src.exists():
|
|
327
|
-
link = campaign_dir / "runs" / src.name
|
|
328
|
-
if not link.exists():
|
|
329
|
-
try:
|
|
330
|
-
link.symlink_to(src.resolve())
|
|
331
|
-
print(f"🔗 Linked {link.name} → {src}")
|
|
332
|
-
except OSError as e:
|
|
333
|
-
print(f"⚠️ Symlink failed ({e}); run accessible at {src}")
|
|
334
|
-
|
|
335
|
-
s = SCENARIO_BY_ID[args.scenario]
|
|
336
|
-
next_s = _next_scenario(manifest)
|
|
337
|
-
print(f"✅ Marked done: scenario #{s.number} {s.id}")
|
|
338
|
-
if next_s:
|
|
339
|
-
print(f" Next: campaign.py next # → scenario {next_s.id}")
|
|
340
|
-
else:
|
|
341
|
-
print(f" All 8 scenarios complete. Run: campaign.py status")
|
|
342
|
-
return 0
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
def cmd_log(args) -> int:
|
|
346
|
-
campaign_dir = _resolve_campaign(args.name)
|
|
347
|
-
manifest = _read_manifest(campaign_dir)
|
|
348
|
-
|
|
349
|
-
completed_count = len(manifest.get("completed", {}))
|
|
350
|
-
last_scenario = None
|
|
351
|
-
if completed_count > 0:
|
|
352
|
-
last_scenario = sorted(
|
|
353
|
-
manifest["completed"].items(),
|
|
354
|
-
key=lambda kv: kv[1].get("completed_at", ""),
|
|
355
|
-
)[-1][0]
|
|
356
|
-
|
|
357
|
-
fix = {
|
|
358
|
-
"at": _dt.datetime.now(tz=_dt.timezone.utc).isoformat(timespec="seconds"),
|
|
359
|
-
"after_run": completed_count,
|
|
360
|
-
"after_scenario": last_scenario,
|
|
361
|
-
"failure": args.failure,
|
|
362
|
-
"fix": args.fix,
|
|
363
|
-
"regression_test": args.regression,
|
|
364
|
-
}
|
|
365
|
-
manifest.setdefault("fixes", []).append(fix)
|
|
366
|
-
_write_manifest(campaign_dir, manifest)
|
|
367
|
-
|
|
368
|
-
findings_path = campaign_dir / "findings.md"
|
|
369
|
-
findings = findings_path.read_text() if findings_path.exists() else "| Run | Scenario | Failure surfaced | Fix shipped | Regression test |\n|---|---|---|---|---|\n"
|
|
370
|
-
row = f"| {completed_count} | {last_scenario or '—'} | {args.failure} | {args.fix} | {args.regression} |\n"
|
|
371
|
-
if not findings.endswith("\n"):
|
|
372
|
-
findings += "\n"
|
|
373
|
-
findings_path.write_text(findings + row)
|
|
374
|
-
|
|
375
|
-
print(f"📝 Logged fix #{len(manifest['fixes'])} to findings.md")
|
|
376
|
-
return 0
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
def cmd_status(args) -> int:
|
|
380
|
-
campaign_dir = _resolve_campaign(args.name)
|
|
381
|
-
manifest = _read_manifest(campaign_dir)
|
|
382
|
-
completed = manifest.get("completed", {})
|
|
383
|
-
fixes = manifest.get("fixes", [])
|
|
384
|
-
|
|
385
|
-
print(f"# Campaign — {manifest.get('name')}")
|
|
386
|
-
print(f"_Created: {manifest.get('created_at', '?')} · Path: {campaign_dir.relative_to(SKILL_ROOT)}_")
|
|
387
|
-
print()
|
|
388
|
-
print(f"**Progress:** {len(completed)}/{len(SCENARIOS)} scenarios complete")
|
|
389
|
-
print(f"**Fixes shipped:** {len(fixes)}")
|
|
390
|
-
print()
|
|
391
|
-
|
|
392
|
-
print("## Scenario status")
|
|
393
|
-
print()
|
|
394
|
-
print("| # | Scenario | Status | Run ID | Completed |")
|
|
395
|
-
print("|---|---|---|---|---|")
|
|
396
|
-
for s in SCENARIOS:
|
|
397
|
-
c = completed.get(s.id)
|
|
398
|
-
if c:
|
|
399
|
-
print(f"| {s.number} | `{s.id}` | ✅ done | `{c['run_id']}` | {c['completed_at']} |")
|
|
400
|
-
else:
|
|
401
|
-
print(f"| {s.number} | `{s.id}` | ☐ pending | — | — |")
|
|
402
|
-
print()
|
|
403
|
-
|
|
404
|
-
if fixes:
|
|
405
|
-
print("## Hardening fixes shipped")
|
|
406
|
-
print()
|
|
407
|
-
print("| # | After run | After scenario | Failure | Fix | Regression |")
|
|
408
|
-
print("|---|---|---|---|---|---|")
|
|
409
|
-
for i, f in enumerate(fixes, 1):
|
|
410
|
-
print(f"| {i} | {f.get('after_run', '?')} | {f.get('after_scenario', '—')} | {f['failure']} | {f['fix']} | {f.get('regression_test', '?')} |")
|
|
411
|
-
print()
|
|
412
|
-
|
|
413
|
-
# Read telemetry from the skill-root jsonl. Filter to runs done in this campaign.
|
|
414
|
-
if TELEMETRY_LOG.exists():
|
|
415
|
-
run_ids = {c["run_id"] for c in completed.values()}
|
|
416
|
-
records = []
|
|
417
|
-
for line in TELEMETRY_LOG.read_text().splitlines():
|
|
418
|
-
line = line.strip()
|
|
419
|
-
if not line:
|
|
420
|
-
continue
|
|
421
|
-
try:
|
|
422
|
-
rec = json.loads(line)
|
|
423
|
-
except json.JSONDecodeError:
|
|
424
|
-
continue
|
|
425
|
-
# Match by source_path containing run_id.
|
|
426
|
-
sp = (rec.get("metrics") or {}).get("source_path") or ""
|
|
427
|
-
if any(rid in sp for rid in run_ids):
|
|
428
|
-
records.append(rec)
|
|
429
|
-
|
|
430
|
-
if records:
|
|
431
|
-
print(f"## Exit-criteria scorecard ({len(records)} ingested runs)")
|
|
432
|
-
print()
|
|
433
|
-
target_n = len(SCENARIOS)
|
|
434
|
-
valid = sum(1 for r in records if (r.get("validation") or {}).get("valid"))
|
|
435
|
-
print(f"- Validate pass rate: {valid}/{len(records)} {'✅' if valid == len(records) else '❌'}")
|
|
436
|
-
|
|
437
|
-
from collections import defaultdict
|
|
438
|
-
advisor_passes = defaultdict(list)
|
|
439
|
-
for r in records:
|
|
440
|
-
for advisor, body in (r.get("advisors") or {}).items():
|
|
441
|
-
advisor_passes[advisor].append(body.get("gate_pass_count", 0))
|
|
442
|
-
avg_str = ", ".join(f"{a}={sum(v)/len(v):.2f}" for a, v in sorted(advisor_passes.items()))
|
|
443
|
-
min_avg = min((sum(v)/len(v) for v in advisor_passes.values()), default=0)
|
|
444
|
-
print(f"- Advisor avg gate-pass (≥3.0 target): {avg_str} {'✅' if min_avg >= 3.0 else '❌'}")
|
|
445
|
-
|
|
446
|
-
convergence_raised = sum(1 for r in records if (r.get("convergence") or {}).get("flag") is True)
|
|
447
|
-
print(f"- Convergence flags raised: {convergence_raised} (target ≤1) {'✅' if convergence_raised <= 1 else '❌'}")
|
|
448
|
-
|
|
449
|
-
high_or_above = sum(1 for r in records if (r.get("chairman") or {}).get("confidence") in ("high", "medium-high"))
|
|
450
|
-
print(f"- High/medium-high confidence: {high_or_above}/{len(records)} (target ≥6/{target_n}) {'✅' if (high_or_above >= 6 or len(records) < target_n) else '❌'}")
|
|
451
|
-
|
|
452
|
-
tokens = [(r.get("metrics") or {}).get("tokens") for r in records if (r.get("metrics") or {}).get("tokens")]
|
|
453
|
-
if tokens and len(tokens) > 1:
|
|
454
|
-
spread = ((max(tokens) - min(tokens)) / (sum(tokens) / len(tokens))) * 100
|
|
455
|
-
print(f"- Token spread: {spread:.1f}% (target ≤20%) {'✅' if spread <= 20 else '❌'}")
|
|
456
|
-
|
|
457
|
-
print()
|
|
458
|
-
|
|
459
|
-
if len(completed) == len(SCENARIOS):
|
|
460
|
-
print("🎉 Campaign complete. Consider promoting durable findings into `references/*.md` and archiving this campaign.")
|
|
461
|
-
else:
|
|
462
|
-
print(f" Next: campaign.py next # {len(SCENARIOS) - len(completed)} scenarios remaining")
|
|
463
|
-
return 0
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
def cmd_list(args) -> int:
|
|
467
|
-
dirs = sorted(p for p in _campaigns_dir().iterdir() if p.is_dir())
|
|
468
|
-
if not dirs:
|
|
469
|
-
print(f"(no campaigns under {CAMPAIGNS_DIR.relative_to(SKILL_ROOT)})")
|
|
470
|
-
return 0
|
|
471
|
-
active = _read_active_campaign()
|
|
472
|
-
width = max(len(d.name) for d in dirs)
|
|
473
|
-
for d in dirs:
|
|
474
|
-
manifest = _read_manifest(d)
|
|
475
|
-
completed = len(manifest.get("completed", {}))
|
|
476
|
-
marker = "*" if d.name == active else " "
|
|
477
|
-
print(f"{marker} {d.name:<{width}} {completed}/{len(SCENARIOS)} done · {manifest.get('created_at', '?')}")
|
|
478
|
-
if active:
|
|
479
|
-
print(f"\n_Active: {active}_")
|
|
480
|
-
return 0
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
def cmd_active(args) -> int:
|
|
484
|
-
if args.set_name:
|
|
485
|
-
path = _campaigns_dir() / args.set_name
|
|
486
|
-
if not path.exists():
|
|
487
|
-
sys.exit(f"❌ Campaign not found: {args.set_name}")
|
|
488
|
-
_write_active_campaign(args.set_name)
|
|
489
|
-
print(f"✓ Active campaign set to: {args.set_name}")
|
|
490
|
-
return 0
|
|
491
|
-
active = _read_active_campaign()
|
|
492
|
-
if active:
|
|
493
|
-
print(active)
|
|
494
|
-
else:
|
|
495
|
-
print("(no active campaign)")
|
|
496
|
-
return 0
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
# ─── Entry ──────────────────────────────────────────────────────────────────
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
def main():
|
|
503
|
-
parser = argparse.ArgumentParser(description="Council hardening campaign manager.")
|
|
504
|
-
sub = parser.add_subparsers(dest="cmd", required=True)
|
|
505
|
-
|
|
506
|
-
p_new = sub.add_parser("new", help="Initialize a new campaign.")
|
|
507
|
-
p_new.add_argument("name", help="Campaign slug, e.g. 2026-04-25-portability")
|
|
508
|
-
p_new.set_defaults(func=cmd_new)
|
|
509
|
-
|
|
510
|
-
p_status = sub.add_parser("status", help="Show campaign progress + scorecard.")
|
|
511
|
-
p_status.add_argument("--name", default=None, help="Campaign name (defaults to active).")
|
|
512
|
-
p_status.set_defaults(func=cmd_status)
|
|
513
|
-
|
|
514
|
-
p_next = sub.add_parser("next", help="Print next scenario's briefing.")
|
|
515
|
-
p_next.add_argument("--name", default=None)
|
|
516
|
-
p_next.set_defaults(func=cmd_next)
|
|
517
|
-
|
|
518
|
-
p_done = sub.add_parser("done", help="Mark a scenario complete.")
|
|
519
|
-
p_done.add_argument("scenario", help=f"Scenario id ({', '.join(s.id for s in SCENARIOS)})")
|
|
520
|
-
p_done.add_argument("run_id", help="Run dir id (e.g. 20260425T023423Z-0f82cea9)")
|
|
521
|
-
p_done.add_argument("--name", default=None)
|
|
522
|
-
p_done.set_defaults(func=cmd_done)
|
|
523
|
-
|
|
524
|
-
p_log = sub.add_parser("log", help="Append a hardening fix to findings.md.")
|
|
525
|
-
p_log.add_argument("failure", help="One-line failure description")
|
|
526
|
-
p_log.add_argument("fix", help="One-line fix description")
|
|
527
|
-
p_log.add_argument("regression", help="yes / no / n-a — was a regression test added?")
|
|
528
|
-
p_log.add_argument("--name", default=None)
|
|
529
|
-
p_log.set_defaults(func=cmd_log)
|
|
530
|
-
|
|
531
|
-
p_list = sub.add_parser("list", help="List all campaigns.")
|
|
532
|
-
p_list.set_defaults(func=cmd_list)
|
|
533
|
-
|
|
534
|
-
p_active = sub.add_parser("active", help="Show or set the active campaign.")
|
|
535
|
-
p_active.add_argument("--set", dest="set_name", default=None, help="Set the active campaign.")
|
|
536
|
-
p_active.set_defaults(func=cmd_active)
|
|
537
|
-
|
|
538
|
-
args = parser.parse_args()
|
|
539
|
-
sys.exit(args.func(args))
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
if __name__ == "__main__":
|
|
543
|
-
main()
|