@cleocode/skills 2026.5.4 → 2026.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/package.json +1 -1
  2. package/skills/ct-council/SKILL.md +0 -377
  3. package/skills/ct-council/optimization/HARDENING-PLAYBOOK.md +0 -107
  4. package/skills/ct-council/optimization/README.md +0 -74
  5. package/skills/ct-council/optimization/scenarios.yaml +0 -121
  6. package/skills/ct-council/optimization/scripts/campaign.py +0 -543
  7. package/skills/ct-council/optimization/scripts/test_campaign.py +0 -143
  8. package/skills/ct-council/references/chairman.md +0 -119
  9. package/skills/ct-council/references/contrarian.md +0 -70
  10. package/skills/ct-council/references/evidence-pack.md +0 -145
  11. package/skills/ct-council/references/examples.md +0 -235
  12. package/skills/ct-council/references/executor.md +0 -83
  13. package/skills/ct-council/references/expansionist.md +0 -68
  14. package/skills/ct-council/references/first-principles.md +0 -73
  15. package/skills/ct-council/references/outsider.md +0 -73
  16. package/skills/ct-council/references/peer-review.md +0 -125
  17. package/skills/ct-council/scripts/analyze_runs.py +0 -293
  18. package/skills/ct-council/scripts/fixtures/executor_multi.md +0 -198
  19. package/skills/ct-council/scripts/fixtures/missing_advisor.md +0 -117
  20. package/skills/ct-council/scripts/fixtures/missing_convergence.md +0 -190
  21. package/skills/ct-council/scripts/fixtures/thin_evidence.md +0 -193
  22. package/skills/ct-council/scripts/fixtures/valid.md +0 -226
  23. package/skills/ct-council/scripts/fixtures/valid_with_llmtxt.md +0 -226
  24. package/skills/ct-council/scripts/llmtxt_ref.py +0 -223
  25. package/skills/ct-council/scripts/run_council.py +0 -578
  26. package/skills/ct-council/scripts/telemetry.py +0 -624
  27. package/skills/ct-council/scripts/test_telemetry.py +0 -509
  28. package/skills/ct-council/scripts/test_validate.py +0 -452
  29. package/skills/ct-council/scripts/validate.py +0 -396
@@ -1,121 +0,0 @@
1
- # Council shakedown scenarios — campaign.py loads this at runtime.
2
- #
3
- # Each entry must have: id, number, title, dimension, shape, learn, briefing.
4
- # The `id` is the slug used everywhere (CLI, manifest, runs/<scenario>).
5
- # The `number` controls run order (lowest first; gaps are OK).
6
- #
7
- # To add a new scenario: append an entry below. No code changes required.
8
- # To change a briefing: edit the `briefing` field. campaign.py picks it up
9
- # on the next invocation.
10
- #
11
- # Keep `briefing` to multi-line text (use YAML's `|` literal block style).
12
- # It's printed verbatim by `campaign.py next` to brief the orchestrator.
13
- #
14
- # Schema version: 1.0.0
15
-
16
- schema_version: "1.0.0"
17
-
18
- scenarios:
19
- - id: baseline
20
- number: 1
21
- title: Narrow binary, dense evidence
22
- dimension: Control run
23
- shape: "Binary decision, 5-7 path:line / sha citations, no llmtxt:"
24
- learn: Baseline cost / wall-clock / gate-pass distribution all subsequent runs compare against.
25
- briefing: |
26
- Pick a real, current cleocode (or active-project) decision with a clean binary shape:
27
- - 'Should we X or stay with Y?'
28
- - 'Is Z ready to ship?'
29
- Evidence pack: 5-7 items, all path:line or sha citations from the live codebase. NO llmtxt:.
30
- This run sets the campaign's baseline gate-pass distribution and token cost.
31
-
32
- - id: external-doc-heavy
33
- number: 2
34
- title: External-doc heavy
35
- dimension: "Live llmtxt: integration"
36
- shape: "Binary, ≥3 of 7 evidence items as llmtxt:<slug>"
37
- learn: Does the wrapper survive real subagent distribution under auth/rate-limit conditions?
38
- briefing: |
39
- Pick a question that genuinely depends on external docs (libraries, APIs, specs).
40
- Evidence pack: ≥3 items as `llmtxt:<slug>` with concise inline rationale; 1-3 local citations.
41
- If `llmtxt_ref.py` cannot fetch (auth/network), inline rationales are the ground-truth fallback.
42
-
43
- - id: three-way
44
- number: 3
45
- title: Three-way trade-off
46
- dimension: Chairman ranking, not binary approve
47
- shape: "'Which of A / B / C?'"
48
- learn: "Does the verdict template hold for N-way? Is `### Recommendation` flexible enough?"
49
- briefing: |
50
- Pose a genuinely 3-way question — three implementation choices, three vendors, three patterns.
51
- The Chairman MUST produce a ranking (A >> B > C), not just 'pick A.' Watch for:
52
- - Does the verdict cleanly state the rank?
53
- - Does it justify the 2nd place's reason for not being 1st?
54
- - Does at least one advisor argue for each option?
55
-
56
- - id: sparse-ops
57
- number: 4
58
- title: Sparse / ops question
59
- dimension: Advisors with no code to grep
60
- shape: Configs + external docs only; no executable-code citations
61
- learn: Do advisors honestly say "insufficient" or hallucinate to fill gaps?
62
- briefing: |
63
- Pick an ops/config question: CI matrix, package.json field, env var, deployment policy.
64
- Evidence pack: only config files (yaml, json, toml) + external doc references. NO src/ citations.
65
- In each subagent briefing, explicitly note: 'punish fabricated specificity, not honest abstention.'
66
- Watch Outsider especially — sparse evidence is their highest-leverage scenario.
67
-
68
- - id: contradictory
69
- number: 5
70
- title: Contradictory evidence
71
- dimension: Contradiction handling
72
- shape: Pack contains 2 items that disagree on purpose
73
- learn: Does Outsider catch it? Does FP re-derive cleanly under conflicting overlay?
74
- briefing: |
75
- Plant a deliberate contradiction in the pack:
76
- - Item N says X (e.g. an ADR / AGENTS.md rule)
77
- - Item N+1 shows ¬X exists on disk (e.g. a directory listing or commit)
78
- Do NOT signal the contradiction in the question — let the advisors find it.
79
- Outsider's frame is the canonical antibody; FP must classify which side is load-bearing truth.
80
-
81
- - id: non-cleo
82
- number: 6
83
- title: Non-CLEO project
84
- dimension: Portability beyond cleocode conventions
85
- shape: External small repo + bug report; council run inside it
86
- learn: Does the skill work on any project, or has it accumulated cleocode-isms?
87
- briefing: |
88
- Clone a small external repo to /tmp (e.g. a popular OSS utility under <500 files).
89
- Pose a real-looking bug report or design question against THAT repo's actual files.
90
- Run the council with NO cleocode-specific evidence. Watch for:
91
- - Persona files referencing 'CLEO' / 'BRAIN' / 'NEXUS' (cleocode-isms)
92
- - Validator failures on conventions that don't apply outside cleocode
93
- - llmtxt_ref / cleo CLI references that don't exist in the foreign repo
94
-
95
- - id: mini
96
- number: 7
97
- title: Small-scope stress (3 items)
98
- dimension: Overhead-vs-signal ratio
99
- shape: Exactly 3 evidence items (the validator floor)
100
- learn: Is a "mini-council" variant worth shipping? Can the gates fire on thin packs?
101
- briefing: |
102
- Pose a tightly-scoped question and discipline yourself to EXACTLY 3 evidence items.
103
- Watch for:
104
- - Gate-pass distribution (does G1 Rigor still fire reasonably?)
105
- - Token cost (should be 60-70% of full-council baseline)
106
- - Chairman confidence (does it stay ≥medium?)
107
- If all three hold, document a 'mini-council' variant in SKILL.md.
108
-
109
- - id: contention
110
- number: 8
111
- title: High-contention
112
- dimension: Chairman reconciliation under genuine disagreement
113
- shape: Question designed to produce a 3-vs-2 advisor split
114
- learn: Does the Chairman template handle real contention rather than directional convergence?
115
- briefing: |
116
- Pose a question where 2-3 advisors will plausibly disagree with the other 2-3:
117
- - Speed-vs-safety trade-offs (Executor/Expansionist vs Contrarian/FP)
118
- - Autonomy-vs-control questions (Outsider / Contrarian split is common)
119
- - The Council voting on its own composition / process
120
- If the verdict converges directionally instead of splitting, the contention test failed —
121
- consider a re-run with a sharper question.
@@ -1,543 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- campaign.py — programmatic tracker for Council hardening campaigns.
4
-
5
- A campaign is an instance of the playbook: a sequence of shakedown runs with
6
- shared telemetry and a cumulative findings log. Campaigns persist locally
7
- (gitignored under `optimization/campaigns/<name>/`); the playbook itself
8
- (`optimization/HARDENING-PLAYBOOK.md`) stays committed.
9
-
10
- Subcommands:
11
-
12
- new <name> Initialize a new campaign directory from the playbook.
13
- status [--name <n>] Show campaign progress + exit-criteria scorecard.
14
- next [--name <n>] Print the next scenario's full briefing.
15
- done <scenario> <run-id> Mark a scenario complete (links the run dir).
16
- log <failure> <fix> <reg> Append a hardening fix to findings.md.
17
- list List all known campaigns under campaigns/.
18
- active [--set <n>] Show or set the active campaign (used as default).
19
-
20
- Usage:
21
-
22
- python3 optimization/scripts/campaign.py new 2026-04-25-portability
23
- python3 optimization/scripts/campaign.py next
24
- python3 optimization/scripts/campaign.py done baseline 20260425T023423Z-0f82cea9
25
- python3 optimization/scripts/campaign.py log "Executor mis-cite" "Pre-action verify rule" "yes"
26
- python3 optimization/scripts/campaign.py status
27
- """
28
-
29
- from __future__ import annotations
30
-
31
- import argparse
32
- import datetime as _dt
33
- import json
34
- import sys
35
- from dataclasses import dataclass
36
- from pathlib import Path
37
-
38
- # Resolve skill root from this script's location.
39
- SCRIPT_PATH = Path(__file__).resolve()
40
- OPTIMIZATION_DIR = SCRIPT_PATH.parent.parent
41
- SKILL_ROOT = OPTIMIZATION_DIR.parent
42
- CAMPAIGNS_DIR = OPTIMIZATION_DIR / "campaigns"
43
- PLAYBOOK_PATH = OPTIMIZATION_DIR / "HARDENING-PLAYBOOK.md"
44
- ACTIVE_FILE = OPTIMIZATION_DIR / ".active-campaign" # gitignored
45
- TELEMETRY_LOG = SKILL_ROOT / ".cleo" / "council-runs.jsonl"
46
- SKILL_RUNS_DIR = SKILL_ROOT / ".runs"
47
-
48
-
49
- # ─── Scenario catalogue (loaded from optimization/scenarios.yaml) ────────────
50
-
51
-
52
- @dataclass(frozen=True)
53
- class Scenario:
54
- id: str # e.g. "baseline", "external-doc-heavy"
55
- number: int # campaign run-order key (lowest first)
56
- title: str
57
- dimension: str
58
- shape: str
59
- learn: str
60
- briefing: str # multi-line guidance for the orchestrator
61
-
62
-
63
- SCENARIOS_YAML_PATH = OPTIMIZATION_DIR / "scenarios.yaml"
64
- SCENARIOS_JSON_PATH = OPTIMIZATION_DIR / "scenarios.json" # alternate format
65
-
66
- # Hardcoded fallback used only if both scenarios.yaml and scenarios.json are
67
- # missing (or unparseable) AND the YAML library isn't available. Keeps
68
- # campaign.py runnable in clean-checkout / minimal-deps environments.
69
- _FALLBACK_SCENARIOS: list[dict] = [
70
- {
71
- "id": "baseline",
72
- "number": 1,
73
- "title": "Narrow binary, dense evidence",
74
- "dimension": "Control run",
75
- "shape": "Binary decision, 5-7 path:line / sha citations, no llmtxt:",
76
- "learn": "Baseline cost / wall-clock / gate-pass distribution all subsequent runs compare against.",
77
- "briefing": (
78
- "Pick a binary decision in the active project.\n"
79
- "Evidence: 5-7 path:line or sha citations from the live codebase. No llmtxt:.\n"
80
- "This run sets the campaign's baseline cost + gate-pass distribution.\n"
81
- ),
82
- },
83
- ]
84
-
85
-
86
- def _load_scenarios() -> list[Scenario]:
87
- """Load scenarios from YAML (preferred), JSON (alternate), or fallback list.
88
-
89
- Order of precedence:
90
- 1. optimization/scenarios.yaml (if pyyaml available + file present)
91
- 2. optimization/scenarios.json (always-available fallback for editing)
92
- 3. Hardcoded _FALLBACK_SCENARIOS (clean-checkout safety net)
93
- """
94
- raw_entries: list[dict] | None = None
95
-
96
- if SCENARIOS_YAML_PATH.exists():
97
- try:
98
- import yaml # type: ignore
99
- data = yaml.safe_load(SCENARIOS_YAML_PATH.read_text())
100
- if isinstance(data, dict) and isinstance(data.get("scenarios"), list):
101
- raw_entries = data["scenarios"]
102
- except ImportError:
103
- print(
104
- "ℹ️ scenarios.yaml exists but PyYAML isn't installed; "
105
- "falling back to scenarios.json or hardcoded list. "
106
- "Run `pip install pyyaml` to use YAML.",
107
- file=sys.stderr,
108
- )
109
- except Exception as e:
110
- print(f"⚠️ Could not parse {SCENARIOS_YAML_PATH}: {e}", file=sys.stderr)
111
-
112
- if raw_entries is None and SCENARIOS_JSON_PATH.exists():
113
- try:
114
- data = json.loads(SCENARIOS_JSON_PATH.read_text())
115
- if isinstance(data, dict) and isinstance(data.get("scenarios"), list):
116
- raw_entries = data["scenarios"]
117
- elif isinstance(data, list):
118
- raw_entries = data
119
- except json.JSONDecodeError as e:
120
- print(f"⚠️ Could not parse {SCENARIOS_JSON_PATH}: {e}", file=sys.stderr)
121
-
122
- if raw_entries is None:
123
- raw_entries = _FALLBACK_SCENARIOS
124
-
125
- out: list[Scenario] = []
126
- required_fields = ["id", "number", "title", "dimension", "shape", "learn", "briefing"]
127
- for i, entry in enumerate(raw_entries, 1):
128
- if not isinstance(entry, dict):
129
- print(f"⚠️ Scenario #{i} is not a mapping; skipping.", file=sys.stderr)
130
- continue
131
- missing = [f for f in required_fields if f not in entry]
132
- if missing:
133
- print(f"⚠️ Scenario #{i} ({entry.get('id', '?')}) missing fields: {missing}; skipping.", file=sys.stderr)
134
- continue
135
- out.append(Scenario(
136
- id=entry["id"],
137
- number=int(entry["number"]),
138
- title=entry["title"],
139
- dimension=entry["dimension"],
140
- shape=entry["shape"],
141
- learn=entry["learn"],
142
- briefing=entry["briefing"],
143
- ))
144
- out.sort(key=lambda s: s.number)
145
- if not out:
146
- print("⚠️ No valid scenarios loaded; using hardcoded fallback.", file=sys.stderr)
147
- out = [Scenario(**e) for e in _FALLBACK_SCENARIOS]
148
- return out
149
-
150
-
151
- SCENARIOS: list[Scenario] = _load_scenarios()
152
- SCENARIO_BY_ID: dict[str, Scenario] = {s.id: s for s in SCENARIOS}
153
-
154
-
155
- # ─── Campaign helpers ───────────────────────────────────────────────────────
156
-
157
-
158
- def _campaigns_dir() -> Path:
159
- CAMPAIGNS_DIR.mkdir(parents=True, exist_ok=True)
160
- return CAMPAIGNS_DIR
161
-
162
-
163
- def _read_active_campaign() -> str | None:
164
- if ACTIVE_FILE.exists():
165
- return ACTIVE_FILE.read_text().strip() or None
166
- # If exactly one campaign exists, use it as default.
167
- dirs = [p for p in _campaigns_dir().iterdir() if p.is_dir()]
168
- if len(dirs) == 1:
169
- return dirs[0].name
170
- return None
171
-
172
-
173
- def _write_active_campaign(name: str) -> None:
174
- ACTIVE_FILE.write_text(name + "\n")
175
-
176
-
177
- def _resolve_campaign(name: str | None) -> Path:
178
- name = name or _read_active_campaign()
179
- if not name:
180
- sys.exit(
181
- "❌ No campaign specified and no active campaign set.\n"
182
- " Run: campaign.py new <name> OR campaign.py active --set <name>"
183
- )
184
- path = _campaigns_dir() / name
185
- if not path.exists():
186
- sys.exit(f"❌ Campaign not found: {path}\n Existing: campaign.py list")
187
- return path
188
-
189
-
190
- def _read_manifest(campaign_dir: Path) -> dict:
191
- p = campaign_dir / "manifest.json"
192
- if not p.exists():
193
- return {"name": campaign_dir.name, "completed": {}, "fixes": []}
194
- try:
195
- return json.loads(p.read_text())
196
- except json.JSONDecodeError:
197
- return {"name": campaign_dir.name, "completed": {}, "fixes": []}
198
-
199
-
200
- def _write_manifest(campaign_dir: Path, manifest: dict) -> None:
201
- (campaign_dir / "manifest.json").write_text(
202
- json.dumps(manifest, indent=2, sort_keys=True) + "\n"
203
- )
204
-
205
-
206
- def _next_scenario(manifest: dict) -> Scenario | None:
207
- completed_ids = set(manifest.get("completed", {}).keys())
208
- for s in SCENARIOS:
209
- if s.id not in completed_ids:
210
- return s
211
- return None
212
-
213
-
214
- # ─── Subcommands ────────────────────────────────────────────────────────────
215
-
216
-
217
- def cmd_new(args) -> int:
218
- name = args.name.strip()
219
- if "/" in name or name.startswith("."):
220
- sys.exit("❌ Campaign name must be a simple slug (no slashes, no leading dot).")
221
-
222
- path = _campaigns_dir() / name
223
- if path.exists():
224
- sys.exit(f"❌ Campaign already exists: {path}")
225
-
226
- path.mkdir(parents=True)
227
- (path / "runs").mkdir()
228
-
229
- manifest = {
230
- "name": name,
231
- "schema_version": "1.0.0",
232
- "created_at": _dt.datetime.now(tz=_dt.timezone.utc).isoformat(timespec="seconds"),
233
- "playbook": str(PLAYBOOK_PATH.relative_to(SKILL_ROOT)),
234
- "telemetry_log": str(TELEMETRY_LOG.relative_to(SKILL_ROOT)),
235
- "completed": {}, # scenario_id → {run_id, completed_at}
236
- "fixes": [], # list of {at, failure, fix, regression_test}
237
- }
238
- _write_manifest(path, manifest)
239
-
240
- findings_md = (
241
- f"# Findings — campaign `{name}`\n\n"
242
- "Failure-mode diff table — appended via `campaign.py log` between runs.\n"
243
- "Each row pairs a failure surfaced in run N with the fix shipped before run N+1.\n\n"
244
- "| Run | Scenario | Failure surfaced | Fix shipped | Regression test |\n"
245
- "|---|---|---|---|---|\n"
246
- )
247
- (path / "findings.md").write_text(findings_md)
248
-
249
- plan_md = (
250
- f"# Plan — campaign `{name}`\n\n"
251
- "Generated from `optimization/HARDENING-PLAYBOOK.md`. "
252
- "Edit this file to add campaign-specific notes (skipped scenarios, custom questions, etc.) — "
253
- "the manifest tracks scenario completion separately.\n\n"
254
- "## Scenarios (run in order)\n\n"
255
- )
256
- for s in SCENARIOS:
257
- plan_md += f"### {s.number}. {s.id} — {s.title}\n\n"
258
- plan_md += f"**Dimension:** {s.dimension}\n\n"
259
- plan_md += f"**Shape:** {s.shape}\n\n"
260
- plan_md += f"**Learn:** {s.learn}\n\n"
261
- plan_md += f"**Status:** _pending_\n\n"
262
- (path / "plan.md").write_text(plan_md)
263
-
264
- _write_active_campaign(name)
265
-
266
- print(f"📁 Campaign initialized: {path}")
267
- print(f" Active campaign set to: {name}")
268
- print(f" Next: campaign.py next")
269
- return 0
270
-
271
-
272
- def cmd_next(args) -> int:
273
- campaign_dir = _resolve_campaign(args.name)
274
- manifest = _read_manifest(campaign_dir)
275
- s = _next_scenario(manifest)
276
- if s is None:
277
- print("✅ All 8 scenarios completed for this campaign.")
278
- print(" Run: campaign.py status # for the exit-criteria scorecard")
279
- return 0
280
-
281
- print(f"# Next scenario — {s.number}/{len(SCENARIOS)} · {s.id}")
282
- print()
283
- print(f"**Title:** {s.title}")
284
- print(f"**Dimension:** {s.dimension}")
285
- print(f"**Shape:** {s.shape}")
286
- print(f"**Learn:** {s.learn}")
287
- print()
288
- print("## Briefing")
289
- print()
290
- print(s.briefing)
291
- print("## Suggested commands")
292
- print()
293
- print(f" python3 scripts/run_council.py init '<your question>' --scenario {s.id} --subagent-mode")
294
- print(f" # write evidence pack into <run-dir>/phase0.md")
295
- print(f" # spawn 5 advisor agents → 5 peer review agents → write phase2_5.md + phase3.md → assemble output.md")
296
- print(f" python3 scripts/run_council.py ingest <run-dir>")
297
- print(f" python3 optimization/scripts/campaign.py done {s.id} <run-dir-id>")
298
- print()
299
- return 0
300
-
301
-
302
- def cmd_done(args) -> int:
303
- if args.scenario not in SCENARIO_BY_ID:
304
- sys.exit(f"❌ Unknown scenario: {args.scenario}\n Valid: {', '.join(s.id for s in SCENARIOS)}")
305
- campaign_dir = _resolve_campaign(args.name)
306
- manifest = _read_manifest(campaign_dir)
307
-
308
- if args.scenario in manifest.get("completed", {}):
309
- existing = manifest["completed"][args.scenario]
310
- print(f"⚠️ Scenario {args.scenario} already marked complete (run_id={existing['run_id']}). Overwriting.")
311
-
312
- manifest.setdefault("completed", {})[args.scenario] = {
313
- "run_id": args.run_id,
314
- "completed_at": _dt.datetime.now(tz=_dt.timezone.utc).isoformat(timespec="seconds"),
315
- }
316
- _write_manifest(campaign_dir, manifest)
317
-
318
- # Best-effort symlink the run dir into campaign_dir/runs/.
319
- # The skill's run dirs live at <skill-root>/.runs/<run-id> by convention.
320
- src = SKILL_RUNS_DIR / args.run_id
321
- if not src.exists():
322
- # User may have passed a full run-dir name with timestamp prefix.
323
- candidates = list(SKILL_RUNS_DIR.glob(f"*{args.run_id}*"))
324
- if len(candidates) == 1:
325
- src = candidates[0]
326
- if src.exists():
327
- link = campaign_dir / "runs" / src.name
328
- if not link.exists():
329
- try:
330
- link.symlink_to(src.resolve())
331
- print(f"🔗 Linked {link.name} → {src}")
332
- except OSError as e:
333
- print(f"⚠️ Symlink failed ({e}); run accessible at {src}")
334
-
335
- s = SCENARIO_BY_ID[args.scenario]
336
- next_s = _next_scenario(manifest)
337
- print(f"✅ Marked done: scenario #{s.number} {s.id}")
338
- if next_s:
339
- print(f" Next: campaign.py next # → scenario {next_s.id}")
340
- else:
341
- print(f" All 8 scenarios complete. Run: campaign.py status")
342
- return 0
343
-
344
-
345
- def cmd_log(args) -> int:
346
- campaign_dir = _resolve_campaign(args.name)
347
- manifest = _read_manifest(campaign_dir)
348
-
349
- completed_count = len(manifest.get("completed", {}))
350
- last_scenario = None
351
- if completed_count > 0:
352
- last_scenario = sorted(
353
- manifest["completed"].items(),
354
- key=lambda kv: kv[1].get("completed_at", ""),
355
- )[-1][0]
356
-
357
- fix = {
358
- "at": _dt.datetime.now(tz=_dt.timezone.utc).isoformat(timespec="seconds"),
359
- "after_run": completed_count,
360
- "after_scenario": last_scenario,
361
- "failure": args.failure,
362
- "fix": args.fix,
363
- "regression_test": args.regression,
364
- }
365
- manifest.setdefault("fixes", []).append(fix)
366
- _write_manifest(campaign_dir, manifest)
367
-
368
- findings_path = campaign_dir / "findings.md"
369
- findings = findings_path.read_text() if findings_path.exists() else "| Run | Scenario | Failure surfaced | Fix shipped | Regression test |\n|---|---|---|---|---|\n"
370
- row = f"| {completed_count} | {last_scenario or '—'} | {args.failure} | {args.fix} | {args.regression} |\n"
371
- if not findings.endswith("\n"):
372
- findings += "\n"
373
- findings_path.write_text(findings + row)
374
-
375
- print(f"📝 Logged fix #{len(manifest['fixes'])} to findings.md")
376
- return 0
377
-
378
-
379
- def cmd_status(args) -> int:
380
- campaign_dir = _resolve_campaign(args.name)
381
- manifest = _read_manifest(campaign_dir)
382
- completed = manifest.get("completed", {})
383
- fixes = manifest.get("fixes", [])
384
-
385
- print(f"# Campaign — {manifest.get('name')}")
386
- print(f"_Created: {manifest.get('created_at', '?')} · Path: {campaign_dir.relative_to(SKILL_ROOT)}_")
387
- print()
388
- print(f"**Progress:** {len(completed)}/{len(SCENARIOS)} scenarios complete")
389
- print(f"**Fixes shipped:** {len(fixes)}")
390
- print()
391
-
392
- print("## Scenario status")
393
- print()
394
- print("| # | Scenario | Status | Run ID | Completed |")
395
- print("|---|---|---|---|---|")
396
- for s in SCENARIOS:
397
- c = completed.get(s.id)
398
- if c:
399
- print(f"| {s.number} | `{s.id}` | ✅ done | `{c['run_id']}` | {c['completed_at']} |")
400
- else:
401
- print(f"| {s.number} | `{s.id}` | ☐ pending | — | — |")
402
- print()
403
-
404
- if fixes:
405
- print("## Hardening fixes shipped")
406
- print()
407
- print("| # | After run | After scenario | Failure | Fix | Regression |")
408
- print("|---|---|---|---|---|---|")
409
- for i, f in enumerate(fixes, 1):
410
- print(f"| {i} | {f.get('after_run', '?')} | {f.get('after_scenario', '—')} | {f['failure']} | {f['fix']} | {f.get('regression_test', '?')} |")
411
- print()
412
-
413
- # Read telemetry from the skill-root jsonl. Filter to runs done in this campaign.
414
- if TELEMETRY_LOG.exists():
415
- run_ids = {c["run_id"] for c in completed.values()}
416
- records = []
417
- for line in TELEMETRY_LOG.read_text().splitlines():
418
- line = line.strip()
419
- if not line:
420
- continue
421
- try:
422
- rec = json.loads(line)
423
- except json.JSONDecodeError:
424
- continue
425
- # Match by source_path containing run_id.
426
- sp = (rec.get("metrics") or {}).get("source_path") or ""
427
- if any(rid in sp for rid in run_ids):
428
- records.append(rec)
429
-
430
- if records:
431
- print(f"## Exit-criteria scorecard ({len(records)} ingested runs)")
432
- print()
433
- target_n = len(SCENARIOS)
434
- valid = sum(1 for r in records if (r.get("validation") or {}).get("valid"))
435
- print(f"- Validate pass rate: {valid}/{len(records)} {'✅' if valid == len(records) else '❌'}")
436
-
437
- from collections import defaultdict
438
- advisor_passes = defaultdict(list)
439
- for r in records:
440
- for advisor, body in (r.get("advisors") or {}).items():
441
- advisor_passes[advisor].append(body.get("gate_pass_count", 0))
442
- avg_str = ", ".join(f"{a}={sum(v)/len(v):.2f}" for a, v in sorted(advisor_passes.items()))
443
- min_avg = min((sum(v)/len(v) for v in advisor_passes.values()), default=0)
444
- print(f"- Advisor avg gate-pass (≥3.0 target): {avg_str} {'✅' if min_avg >= 3.0 else '❌'}")
445
-
446
- convergence_raised = sum(1 for r in records if (r.get("convergence") or {}).get("flag") is True)
447
- print(f"- Convergence flags raised: {convergence_raised} (target ≤1) {'✅' if convergence_raised <= 1 else '❌'}")
448
-
449
- high_or_above = sum(1 for r in records if (r.get("chairman") or {}).get("confidence") in ("high", "medium-high"))
450
- print(f"- High/medium-high confidence: {high_or_above}/{len(records)} (target ≥6/{target_n}) {'✅' if (high_or_above >= 6 or len(records) < target_n) else '❌'}")
451
-
452
- tokens = [(r.get("metrics") or {}).get("tokens") for r in records if (r.get("metrics") or {}).get("tokens")]
453
- if tokens and len(tokens) > 1:
454
- spread = ((max(tokens) - min(tokens)) / (sum(tokens) / len(tokens))) * 100
455
- print(f"- Token spread: {spread:.1f}% (target ≤20%) {'✅' if spread <= 20 else '❌'}")
456
-
457
- print()
458
-
459
- if len(completed) == len(SCENARIOS):
460
- print("🎉 Campaign complete. Consider promoting durable findings into `references/*.md` and archiving this campaign.")
461
- else:
462
- print(f" Next: campaign.py next # {len(SCENARIOS) - len(completed)} scenarios remaining")
463
- return 0
464
-
465
-
466
- def cmd_list(args) -> int:
467
- dirs = sorted(p for p in _campaigns_dir().iterdir() if p.is_dir())
468
- if not dirs:
469
- print(f"(no campaigns under {CAMPAIGNS_DIR.relative_to(SKILL_ROOT)})")
470
- return 0
471
- active = _read_active_campaign()
472
- width = max(len(d.name) for d in dirs)
473
- for d in dirs:
474
- manifest = _read_manifest(d)
475
- completed = len(manifest.get("completed", {}))
476
- marker = "*" if d.name == active else " "
477
- print(f"{marker} {d.name:<{width}} {completed}/{len(SCENARIOS)} done · {manifest.get('created_at', '?')}")
478
- if active:
479
- print(f"\n_Active: {active}_")
480
- return 0
481
-
482
-
483
- def cmd_active(args) -> int:
484
- if args.set_name:
485
- path = _campaigns_dir() / args.set_name
486
- if not path.exists():
487
- sys.exit(f"❌ Campaign not found: {args.set_name}")
488
- _write_active_campaign(args.set_name)
489
- print(f"✓ Active campaign set to: {args.set_name}")
490
- return 0
491
- active = _read_active_campaign()
492
- if active:
493
- print(active)
494
- else:
495
- print("(no active campaign)")
496
- return 0
497
-
498
-
499
- # ─── Entry ──────────────────────────────────────────────────────────────────
500
-
501
-
502
- def main():
503
- parser = argparse.ArgumentParser(description="Council hardening campaign manager.")
504
- sub = parser.add_subparsers(dest="cmd", required=True)
505
-
506
- p_new = sub.add_parser("new", help="Initialize a new campaign.")
507
- p_new.add_argument("name", help="Campaign slug, e.g. 2026-04-25-portability")
508
- p_new.set_defaults(func=cmd_new)
509
-
510
- p_status = sub.add_parser("status", help="Show campaign progress + scorecard.")
511
- p_status.add_argument("--name", default=None, help="Campaign name (defaults to active).")
512
- p_status.set_defaults(func=cmd_status)
513
-
514
- p_next = sub.add_parser("next", help="Print next scenario's briefing.")
515
- p_next.add_argument("--name", default=None)
516
- p_next.set_defaults(func=cmd_next)
517
-
518
- p_done = sub.add_parser("done", help="Mark a scenario complete.")
519
- p_done.add_argument("scenario", help=f"Scenario id ({', '.join(s.id for s in SCENARIOS)})")
520
- p_done.add_argument("run_id", help="Run dir id (e.g. 20260425T023423Z-0f82cea9)")
521
- p_done.add_argument("--name", default=None)
522
- p_done.set_defaults(func=cmd_done)
523
-
524
- p_log = sub.add_parser("log", help="Append a hardening fix to findings.md.")
525
- p_log.add_argument("failure", help="One-line failure description")
526
- p_log.add_argument("fix", help="One-line fix description")
527
- p_log.add_argument("regression", help="yes / no / n-a — was a regression test added?")
528
- p_log.add_argument("--name", default=None)
529
- p_log.set_defaults(func=cmd_log)
530
-
531
- p_list = sub.add_parser("list", help="List all campaigns.")
532
- p_list.set_defaults(func=cmd_list)
533
-
534
- p_active = sub.add_parser("active", help="Show or set the active campaign.")
535
- p_active.add_argument("--set", dest="set_name", default=None, help="Set the active campaign.")
536
- p_active.set_defaults(func=cmd_active)
537
-
538
- args = parser.parse_args()
539
- sys.exit(args.func(args))
540
-
541
-
542
- if __name__ == "__main__":
543
- main()