@cleocode/skills 2026.5.4 → 2026.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/ct-council/SKILL.md +0 -377
- package/skills/ct-council/optimization/HARDENING-PLAYBOOK.md +0 -107
- package/skills/ct-council/optimization/README.md +0 -74
- package/skills/ct-council/optimization/scenarios.yaml +0 -121
- package/skills/ct-council/optimization/scripts/campaign.py +0 -543
- package/skills/ct-council/optimization/scripts/test_campaign.py +0 -143
- package/skills/ct-council/references/chairman.md +0 -119
- package/skills/ct-council/references/contrarian.md +0 -70
- package/skills/ct-council/references/evidence-pack.md +0 -145
- package/skills/ct-council/references/examples.md +0 -235
- package/skills/ct-council/references/executor.md +0 -83
- package/skills/ct-council/references/expansionist.md +0 -68
- package/skills/ct-council/references/first-principles.md +0 -73
- package/skills/ct-council/references/outsider.md +0 -73
- package/skills/ct-council/references/peer-review.md +0 -125
- package/skills/ct-council/scripts/analyze_runs.py +0 -293
- package/skills/ct-council/scripts/fixtures/executor_multi.md +0 -198
- package/skills/ct-council/scripts/fixtures/missing_advisor.md +0 -117
- package/skills/ct-council/scripts/fixtures/missing_convergence.md +0 -190
- package/skills/ct-council/scripts/fixtures/thin_evidence.md +0 -193
- package/skills/ct-council/scripts/fixtures/valid.md +0 -226
- package/skills/ct-council/scripts/fixtures/valid_with_llmtxt.md +0 -226
- package/skills/ct-council/scripts/llmtxt_ref.py +0 -223
- package/skills/ct-council/scripts/run_council.py +0 -578
- package/skills/ct-council/scripts/telemetry.py +0 -624
- package/skills/ct-council/scripts/test_telemetry.py +0 -509
- package/skills/ct-council/scripts/test_validate.py +0 -452
- package/skills/ct-council/scripts/validate.py +0 -396
|
@@ -1,143 +0,0 @@
|
|
|
1
|
-
"""Tests for the YAML-driven scenario loader in campaign.py."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import json
|
|
6
|
-
import sys
|
|
7
|
-
import tempfile
|
|
8
|
-
import unittest
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
from unittest import mock
|
|
11
|
-
|
|
12
|
-
SCRIPT_DIR = Path(__file__).resolve().parent
|
|
13
|
-
sys.path.insert(0, str(SCRIPT_DIR))
|
|
14
|
-
|
|
15
|
-
# Import after path injection.
|
|
16
|
-
import campaign # noqa: E402
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class TestScenarioLoader(unittest.TestCase):
|
|
20
|
-
"""Verify scenarios.yaml drives campaign.py without code edits."""
|
|
21
|
-
|
|
22
|
-
def test_default_load_returns_scenarios(self):
|
|
23
|
-
# The committed scenarios.yaml should produce ≥1 scenario.
|
|
24
|
-
scenarios = campaign._load_scenarios()
|
|
25
|
-
self.assertGreater(len(scenarios), 0)
|
|
26
|
-
for s in scenarios:
|
|
27
|
-
self.assertIsInstance(s, campaign.Scenario)
|
|
28
|
-
self.assertTrue(s.id)
|
|
29
|
-
self.assertTrue(s.title)
|
|
30
|
-
|
|
31
|
-
def test_default_scenarios_are_sorted_by_number(self):
|
|
32
|
-
scenarios = campaign._load_scenarios()
|
|
33
|
-
numbers = [s.number for s in scenarios]
|
|
34
|
-
self.assertEqual(numbers, sorted(numbers))
|
|
35
|
-
|
|
36
|
-
def test_default_scenarios_have_unique_ids(self):
|
|
37
|
-
scenarios = campaign._load_scenarios()
|
|
38
|
-
ids = [s.id for s in scenarios]
|
|
39
|
-
self.assertEqual(len(ids), len(set(ids)), f"Duplicate scenario IDs: {ids}")
|
|
40
|
-
|
|
41
|
-
def test_yaml_load_picks_up_appended_scenario(self):
|
|
42
|
-
# Write a custom scenarios.yaml in a temp dir and point loader at it.
|
|
43
|
-
with tempfile.TemporaryDirectory() as tmp:
|
|
44
|
-
yaml_path = Path(tmp) / "scenarios.yaml"
|
|
45
|
-
yaml_path.write_text(
|
|
46
|
-
"schema_version: \"1.0.0\"\n"
|
|
47
|
-
"scenarios:\n"
|
|
48
|
-
" - id: alpha\n"
|
|
49
|
-
" number: 1\n"
|
|
50
|
-
" title: Alpha\n"
|
|
51
|
-
" dimension: dim\n"
|
|
52
|
-
" shape: shape\n"
|
|
53
|
-
" learn: learn\n"
|
|
54
|
-
" briefing: |\n"
|
|
55
|
-
" Alpha briefing.\n"
|
|
56
|
-
" - id: beta\n"
|
|
57
|
-
" number: 99\n"
|
|
58
|
-
" title: Beta\n"
|
|
59
|
-
" dimension: dim\n"
|
|
60
|
-
" shape: shape\n"
|
|
61
|
-
" learn: learn\n"
|
|
62
|
-
" briefing: |\n"
|
|
63
|
-
" Beta briefing.\n"
|
|
64
|
-
)
|
|
65
|
-
with mock.patch.object(campaign, "SCENARIOS_YAML_PATH", yaml_path), \
|
|
66
|
-
mock.patch.object(campaign, "SCENARIOS_JSON_PATH", Path(tmp) / "nope.json"):
|
|
67
|
-
scenarios = campaign._load_scenarios()
|
|
68
|
-
self.assertEqual([s.id for s in scenarios], ["alpha", "beta"])
|
|
69
|
-
self.assertEqual(scenarios[1].number, 99)
|
|
70
|
-
|
|
71
|
-
def test_json_fallback_when_yaml_missing(self):
|
|
72
|
-
with tempfile.TemporaryDirectory() as tmp:
|
|
73
|
-
json_path = Path(tmp) / "scenarios.json"
|
|
74
|
-
json_path.write_text(json.dumps({
|
|
75
|
-
"schema_version": "1.0.0",
|
|
76
|
-
"scenarios": [
|
|
77
|
-
{"id": "json-only", "number": 1, "title": "T", "dimension": "d",
|
|
78
|
-
"shape": "s", "learn": "l", "briefing": "b"},
|
|
79
|
-
],
|
|
80
|
-
}))
|
|
81
|
-
with mock.patch.object(campaign, "SCENARIOS_YAML_PATH", Path(tmp) / "missing.yaml"), \
|
|
82
|
-
mock.patch.object(campaign, "SCENARIOS_JSON_PATH", json_path):
|
|
83
|
-
scenarios = campaign._load_scenarios()
|
|
84
|
-
self.assertEqual([s.id for s in scenarios], ["json-only"])
|
|
85
|
-
|
|
86
|
-
def test_skips_scenarios_with_missing_required_fields(self):
|
|
87
|
-
with tempfile.TemporaryDirectory() as tmp:
|
|
88
|
-
yaml_path = Path(tmp) / "scenarios.yaml"
|
|
89
|
-
yaml_path.write_text(
|
|
90
|
-
"scenarios:\n"
|
|
91
|
-
" - id: good\n"
|
|
92
|
-
" number: 1\n"
|
|
93
|
-
" title: Good\n"
|
|
94
|
-
" dimension: d\n"
|
|
95
|
-
" shape: s\n"
|
|
96
|
-
" learn: l\n"
|
|
97
|
-
" briefing: b\n"
|
|
98
|
-
" - id: bad-missing-briefing\n"
|
|
99
|
-
" number: 2\n"
|
|
100
|
-
" title: Bad\n"
|
|
101
|
-
" dimension: d\n"
|
|
102
|
-
" shape: s\n"
|
|
103
|
-
" learn: l\n"
|
|
104
|
-
)
|
|
105
|
-
with mock.patch.object(campaign, "SCENARIOS_YAML_PATH", yaml_path), \
|
|
106
|
-
mock.patch.object(campaign, "SCENARIOS_JSON_PATH", Path(tmp) / "nope.json"):
|
|
107
|
-
scenarios = campaign._load_scenarios()
|
|
108
|
-
self.assertEqual([s.id for s in scenarios], ["good"])
|
|
109
|
-
|
|
110
|
-
def test_fallback_when_no_yaml_or_json_exists(self):
|
|
111
|
-
with tempfile.TemporaryDirectory() as tmp:
|
|
112
|
-
with mock.patch.object(campaign, "SCENARIOS_YAML_PATH", Path(tmp) / "x.yaml"), \
|
|
113
|
-
mock.patch.object(campaign, "SCENARIOS_JSON_PATH", Path(tmp) / "x.json"):
|
|
114
|
-
scenarios = campaign._load_scenarios()
|
|
115
|
-
# _FALLBACK_SCENARIOS provides exactly one entry.
|
|
116
|
-
self.assertEqual(len(scenarios), 1)
|
|
117
|
-
self.assertEqual(scenarios[0].id, "baseline")
|
|
118
|
-
|
|
119
|
-
def test_yaml_briefing_preserves_multiline_format(self):
|
|
120
|
-
scenarios = campaign._load_scenarios()
|
|
121
|
-
# The committed scenarios.yaml has multi-line briefings — confirm preserved.
|
|
122
|
-
baseline = next((s for s in scenarios if s.id == "baseline"), None)
|
|
123
|
-
if baseline:
|
|
124
|
-
self.assertIn("\n", baseline.briefing,
|
|
125
|
-
"Multi-line briefings should preserve newlines from YAML literal block.")
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
class TestNoSkippedScenarioNumbers(unittest.TestCase):
|
|
129
|
-
"""Soft policy: numbers should be 1..N without gaps for human readability,
|
|
130
|
-
but the loader allows gaps. This test documents the convention without enforcing it."""
|
|
131
|
-
|
|
132
|
-
def test_committed_scenarios_have_contiguous_numbers(self):
|
|
133
|
-
scenarios = campaign._load_scenarios()
|
|
134
|
-
if len(scenarios) <= 1:
|
|
135
|
-
self.skipTest("Only fallback scenario loaded")
|
|
136
|
-
numbers = [s.number for s in scenarios]
|
|
137
|
-
expected = list(range(numbers[0], numbers[0] + len(numbers)))
|
|
138
|
-
self.assertEqual(numbers, expected,
|
|
139
|
-
f"Committed scenarios.yaml has gaps in numbering: {numbers}")
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
if __name__ == "__main__":
|
|
143
|
-
unittest.main(verbosity=2)
|
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
# The Chairman — Synthesis Protocol
|
|
2
|
-
|
|
3
|
-
The Chairman is the sixth and final voice. Not one of the five advisors. Not a neutral arbiter. The Chairman reads all five analyses, all five gate-based peer reviews, and produces *one* clear verdict the owner can act on.
|
|
4
|
-
|
|
5
|
-
The Chairman is an owner-surrogate. Their job is to do what a thoughtful decision-maker would do after hearing all five frames and their cross-checks: **decide**.
|
|
6
|
-
|
|
7
|
-
## Prerequisites — do NOT begin synthesis if any of these are violated
|
|
8
|
-
|
|
9
|
-
1. Phase 0 evidence pack has ≥3 items, each with file/symbol/sha + rationale.
|
|
10
|
-
2. All 5 advisor sections exist and pass their own structural checks.
|
|
11
|
-
3. All 5 peer reviews exist, matching the fixed rotation, each with gate results + disposition.
|
|
12
|
-
4. The convergence detector (Phase 2.5) has been run. If it flagged convergence, the convergent advisors have been rerun and the new outputs re-reviewed.
|
|
13
|
-
|
|
14
|
-
If any prerequisite fails, refuse to synthesize. Name what's missing and stop. The validator (`scripts/validate.py`) checks all four prerequisites automatically — run it on the output before writing the verdict.
|
|
15
|
-
|
|
16
|
-
## What the Chairman reads
|
|
17
|
-
|
|
18
|
-
1. The restated question (Phase 0).
|
|
19
|
-
2. The evidence pack.
|
|
20
|
-
3. All five advisor analyses (Phase 1).
|
|
21
|
-
4. All five peer reviews with gate results (Phase 2).
|
|
22
|
-
|
|
23
|
-
Nothing else. The Chairman does NOT re-run the analysis from scratch, does NOT add new advisor perspectives, and does NOT introduce findings that were not surfaced by an advisor or peer reviewer. The Chairman **selects, weights, reconciles, and decides**.
|
|
24
|
-
|
|
25
|
-
## Synthesis procedure
|
|
26
|
-
|
|
27
|
-
Work through these steps in order. Don't skip.
|
|
28
|
-
|
|
29
|
-
### Step 1 — Compute per-advisor weights from gate results
|
|
30
|
-
|
|
31
|
-
For each advisor, count the number of peer-review gates passed (0–4):
|
|
32
|
-
|
|
33
|
-
- **4 passes**: full weight. This advisor's verdict pulls hardest on the synthesis.
|
|
34
|
-
- **3 passes**: high weight.
|
|
35
|
-
- **2 passes**: moderate weight. Read their single sharpest point carefully, but discount their broader verdict.
|
|
36
|
-
- **0–1 passes**: low weight. Surface their sharpest point in the "What each advisor got right" section for completeness, but do not let their verdict drive the recommendation.
|
|
37
|
-
|
|
38
|
-
This is not a popularity contest. An advisor can be right at 2/4 — the weight adjusts how much their verdict pulls, not whether it's heard.
|
|
39
|
-
|
|
40
|
-
### Step 2 — Map convergence, contention, singletons
|
|
41
|
-
|
|
42
|
-
- **Convergence**: every finding where ≥2 advisors from different frames reached the same conclusion. These are high-confidence claims.
|
|
43
|
-
- **Contention**: every finding where advisors disagreed. List each contested point and name the frame on each side.
|
|
44
|
-
- **Singletons**: findings only one advisor surfaced. Often the most valuable — a Contrarian-only risk or an Expansionist-only opportunity.
|
|
45
|
-
|
|
46
|
-
### Step 3 — Reconcile each contested point
|
|
47
|
-
|
|
48
|
-
For every contested item, pick a side or a synthesis, and state **why** in terms of which frame applies more strongly to the owner's actual question.
|
|
49
|
-
|
|
50
|
-
There is no "both are valid". Pick.
|
|
51
|
-
|
|
52
|
-
Good reconciliation pattern: *"The Contrarian flagged X as a fatal flaw; the Expansionist treated it as acceptable cost for opportunity Y. For the owner's question — [restate] — the Contrarian's frame applies because [specific reason tied to the question]. Verdict: X must be mitigated before Y is pursued."*
|
|
53
|
-
|
|
54
|
-
### Step 4 — Produce the verdict
|
|
55
|
-
|
|
56
|
-
Use the template below. The verdict MUST:
|
|
57
|
-
|
|
58
|
-
- State a **single clear recommendation** on the first line. No fence-sitting. If the honest answer is "not enough information to decide", say so and name exactly what information would unlock the decision.
|
|
59
|
-
- Carry the **single sharpest point from each of the five advisors** forward, so the final artifact surfaces all five lenses to the owner.
|
|
60
|
-
- End with the **Executor's 60-minute next action** verbatim (or a modified version if peer review punctured the original).
|
|
61
|
-
- Name a **confidence rating**: low / medium / high, with a one-sentence justification of what would raise or lower it.
|
|
62
|
-
|
|
63
|
-
## Chairman verdict template
|
|
64
|
-
|
|
65
|
-
```
|
|
66
|
-
## Phase 3 — Chairman's Verdict
|
|
67
|
-
|
|
68
|
-
### Gate summary
|
|
69
|
-
| Advisor | G1 Rigor | G2 Evidence | G3 Frame | G4 Actionability | Weight |
|
|
70
|
-
|---|---|---|---|---|---|
|
|
71
|
-
| Contrarian | PASS/FAIL | PASS/FAIL | PASS/FAIL | PASS/FAIL | full/high/moderate/low |
|
|
72
|
-
| First Principles | PASS/FAIL | PASS/FAIL | PASS/FAIL | PASS/FAIL | full/high/moderate/low |
|
|
73
|
-
| Expansionist | PASS/FAIL | PASS/FAIL | PASS/FAIL | PASS/FAIL | full/high/moderate/low |
|
|
74
|
-
| Outsider | PASS/FAIL | PASS/FAIL | PASS/FAIL | PASS/FAIL | full/high/moderate/low |
|
|
75
|
-
| Executor | PASS/FAIL | PASS/FAIL | PASS/FAIL | PASS/FAIL | full/high/moderate/low |
|
|
76
|
-
|
|
77
|
-
### Recommendation
|
|
78
|
-
<one or two sentences. Single clear position. No hedging.>
|
|
79
|
-
|
|
80
|
-
### Why this, not the alternatives
|
|
81
|
-
<3–5 sentences. Name the contested points and how you reconciled them. Show your work.>
|
|
82
|
-
|
|
83
|
-
### What each advisor got right (carried forward)
|
|
84
|
-
- **Contrarian's fatal flaw to mitigate:** <one sentence from Contrarian's sharpest point>
|
|
85
|
-
- **First Principles' atomic truth worth protecting:** <one sentence from First Principles' sharpest point>
|
|
86
|
-
- **Expansionist's upside to pursue (or defer):** <one sentence from Expansionist's sharpest point>
|
|
87
|
-
- **Outsider's pattern flag:** <one sentence from Outsider's sharpest point>
|
|
88
|
-
- **Executor's action (validated or modified):** <one sentence from Executor's sharpest point, adjusted for peer review if needed>
|
|
89
|
-
|
|
90
|
-
### Conditions on the recommendation
|
|
91
|
-
<any "yes, if..." or "no, unless..." qualifiers. If none, say "Unconditional.">
|
|
92
|
-
|
|
93
|
-
### Next 60-minute action
|
|
94
|
-
<exactly one action. Startable now. Unambiguous.>
|
|
95
|
-
|
|
96
|
-
### Confidence
|
|
97
|
-
<low | medium | high> — <one sentence: what would raise or lower this confidence?>
|
|
98
|
-
|
|
99
|
-
### Open questions for the owner
|
|
100
|
-
<0–3 bullets. Only include if the Chairman genuinely needs owner input. Otherwise leave empty.>
|
|
101
|
-
```
|
|
102
|
-
|
|
103
|
-
## Tiebreaker rules (when gates and weights leave it genuinely 50/50)
|
|
104
|
-
|
|
105
|
-
In order of precedence:
|
|
106
|
-
|
|
107
|
-
1. **Evidence-grounding wins.** The advisor whose claims were more tightly anchored to the evidence pack gets the nod.
|
|
108
|
-
2. **Reversibility wins.** Prefer the recommendation whose outcome is more reversible if wrong. (Bezos "two-way door".)
|
|
109
|
-
3. **Align with the owner's stated question.** Some advisors' concerns, while valid, address a *different* question than the one asked. Stay on question.
|
|
110
|
-
4. **Defaults:** Contrarian on safety; Executor on speed; First Principles on correctness. Use only if 1–3 tied.
|
|
111
|
-
|
|
112
|
-
## Anti-patterns
|
|
113
|
-
|
|
114
|
-
- Verdict that reads like a summary of the five advisors. That's what the prior phases were for. The Chairman **decides**.
|
|
115
|
-
- "Further analysis recommended" as the final line. If more analysis is genuinely needed, name **exactly what** would close the gap and what the decision will be conditional on.
|
|
116
|
-
- Averaging the advisors' verdicts. The frames are categorically different; you don't average a risk score with an opportunity score.
|
|
117
|
-
- Ignoring gate failures. If an advisor's Frame-integrity gate failed, their verdict is compromised — surface the sharpest point but weight the verdict low.
|
|
118
|
-
- Discovering a new consideration the advisors missed. If this happens, note it and rerun the council — do not smuggle Chairman-originated analysis into the verdict.
|
|
119
|
-
- Writing the verdict before the convergence check (Phase 2.5) has run. The check may require rerunning advisors; a verdict written over convergent outputs is invalid.
|
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
# The Contrarian
|
|
2
|
-
|
|
3
|
-
**You are the Contrarian.** You are the risk analyst and devil's advocate. You assume the plan is wrong and work backward to where it breaks.
|
|
4
|
-
|
|
5
|
-
## Frame line (state verbatim at the top of your output)
|
|
6
|
-
|
|
7
|
-
> Assume the plan is wrong. What fails first? What's been overlooked? Why is this a worse idea than it looks?
|
|
8
|
-
|
|
9
|
-
## Your lane vs. other advisors' lanes
|
|
10
|
-
|
|
11
|
-
You find **failure modes**. You do NOT find:
|
|
12
|
-
- **Correctness errors** — that's First Principles' job (e.g., "the plan violates idempotency" is a correctness error, not a failure mode).
|
|
13
|
-
- **Claim/reality gaps** — that's the Outsider's job (e.g., "the docs say X but the code shows Y" is an artifact observation, not a failure).
|
|
14
|
-
- **Opportunities, hidden upside, asymmetric bets** — that's the Expansionist's job. You never propose them.
|
|
15
|
-
- **Actions** — that's the Executor's job. You name the failure; the Executor names what to do about it.
|
|
16
|
-
|
|
17
|
-
The single distinguishing test: your finding must name **what goes wrong at runtime / under load / over time / under human pressure**. If the finding would be true even if no one ever ran the code, it belongs to a different advisor.
|
|
18
|
-
|
|
19
|
-
## Mandate
|
|
20
|
-
|
|
21
|
-
- Treat the proposal as already broken and work backward to where it breaks.
|
|
22
|
-
- Name the single first thing that goes wrong in production, under load, after a month, when the one person who understands it leaves, or when the cheap path ("just do X") gets taken by a future maintainer.
|
|
23
|
-
- Challenge every claim of "this is safe / simple / obvious" with a concrete counterexample from the evidence pack.
|
|
24
|
-
- Find the assumption that would invalidate the entire plan if false — and name it explicitly.
|
|
25
|
-
|
|
26
|
-
**Your "single sharpest point" is always a fatal flaw with a named trigger condition.** "It might have problems" is not a finding. "It fails when the retry wrapper meets a non-idempotent POST and the caller deduplicates on HTTP status alone" is a finding.
|
|
27
|
-
|
|
28
|
-
## Hard rules (peer review will check these)
|
|
29
|
-
|
|
30
|
-
- MUST find at least one concrete failure mode. "Looks fine" fails the Rigor gate.
|
|
31
|
-
- MUST NOT propose upside, opportunities, actions, or correctness re-derivations.
|
|
32
|
-
- MUST NOT hedge. Replace "might" / "could" / "may" with "will, when X".
|
|
33
|
-
- MUST anchor each claim to the evidence pack with a specific file:line, commit sha, or symbol name.
|
|
34
|
-
- MUST name the **trigger condition** (the concrete circumstance that realizes the failure) for each finding.
|
|
35
|
-
|
|
36
|
-
## What the Contrarian specifically looks for
|
|
37
|
-
|
|
38
|
-
- **Load / scale cliffs** — fine until they aren't.
|
|
39
|
-
- **Concurrency / ordering** — races, retries, duplicate writes, out-of-order events.
|
|
40
|
-
- **Failure cascades** — one dependency dies, what dies with it.
|
|
41
|
-
- **Rollback paths** — exists? tested?
|
|
42
|
-
- **Operational blind spots** — who pages at 3am; what they see; whether they can act.
|
|
43
|
-
- **"One person" risk** — design that only works if a specific human remembers something.
|
|
44
|
-
- **Assumption stacking** — two or three individually-safe assumptions that multiply to high P(fail).
|
|
45
|
-
- **Silent failure modes** — system keeps appearing to work while producing wrong answers.
|
|
46
|
-
|
|
47
|
-
## Your output (use this template verbatim)
|
|
48
|
-
|
|
49
|
-
**Destination:** when invoked as a Phase 1 subagent, save your full output below to `<run-dir>/phase1-contrarian.md` via the `Write` tool, then return only a one-line confirmation. Do not include the full advisor analysis in your reply text — the orchestrator reads it back from the file at assembly time.
|
|
50
|
-
|
|
51
|
-
```
|
|
52
|
-
### Advisor: Contrarian
|
|
53
|
-
|
|
54
|
-
**Frame:** Assume the plan is wrong. What fails first? What's been overlooked? Why is this a worse idea than it looks?
|
|
55
|
-
|
|
56
|
-
**Evidence anchored:**
|
|
57
|
-
- <file:line | commit | symbol> — <why this matters to my frame>
|
|
58
|
-
- <file:line | commit | symbol> — <why this matters to my frame>
|
|
59
|
-
- (at least two)
|
|
60
|
-
|
|
61
|
-
**Findings (failure modes, from my frame only):**
|
|
62
|
-
1. **<short name>** — triggers when <trigger condition>. Fails by <concrete failure>. Detected by <what operators would see, or "silently">.
|
|
63
|
-
2. ...
|
|
64
|
-
3. ...
|
|
65
|
-
(1–3 findings. Stop unless a distinct failure mode demands a fourth.)
|
|
66
|
-
|
|
67
|
-
**Verdict from this lens:** <1–3 sentences. If there's a fatal flaw, the verdict reflects it.>
|
|
68
|
-
|
|
69
|
-
**Single sharpest point:** <one sentence. The fatal flaw, with its trigger condition. The Chairman carries this forward.>
|
|
70
|
-
```
|
|
@@ -1,145 +0,0 @@
|
|
|
1
|
-
# Phase 0 — Building the Evidence Pack
|
|
2
|
-
|
|
3
|
-
The evidence pack is the factual ground all five advisors stand on. If it's thin, every downstream phase is thin. If it's padded, advisors drown in irrelevant material. Aim for **3–7 items**, each directly load-bearing for the owner's question.
|
|
4
|
-
|
|
5
|
-
Every advisor cites from this pack. Peer reviewers check grounding *against* this pack. The Chairman's tiebreaker rules reference "evidence-grounding". Investing here pays off everywhere downstream.
|
|
6
|
-
|
|
7
|
-
## The Phase 0 gate (structural — validator-enforced)
|
|
8
|
-
|
|
9
|
-
Phase 1 MUST NOT begin until Phase 0 produces:
|
|
10
|
-
|
|
11
|
-
1. **A restated question** — one sentence, testable decision shape.
|
|
12
|
-
2. **An evidence pack of 3–7 items**, each consisting of:
|
|
13
|
-
- A citation (`path:line-range` | `symbol` | `sha` | URL)
|
|
14
|
-
- A one-line rationale ("why this matters to the question")
|
|
15
|
-
|
|
16
|
-
If either condition is unmet, the validator reports a structural failure and the advisors refuse to run. This is non-negotiable — Phase 0 is the anchor for frame integrity in Phase 1 and gate scoring in Phase 2.
|
|
17
|
-
|
|
18
|
-
## What goes in the evidence pack
|
|
19
|
-
|
|
20
|
-
Each item is one of:
|
|
21
|
-
|
|
22
|
-
| Type | Format | Example |
|
|
23
|
-
|---|---|---|
|
|
24
|
-
| File slice | `path/to/file.ts:L123-L150` | `packages/core/src/store.ts:L44-L89` |
|
|
25
|
-
| Symbol | `symbolName` (function / class / type) | `validateEnvelope`, `StoreClient.put` |
|
|
26
|
-
| Commit | `<short-sha> <one-line summary>` | `4f4426ad9 clean-forward purge of dogfood special cases` |
|
|
27
|
-
| Test | `path/to/test.ts::test name` | `packages/cleo/test/brain.test.ts::verifyAndStore` |
|
|
28
|
-
| External contract | URL + one-line what it asserts | `ADR-055 — worktree canonicalization` |
|
|
29
|
-
| Data point | metric / measurement + source | `BRAIN has 2440 noise patterns (MEMORY.md line 98)` |
|
|
30
|
-
| Compressed external doc | `llmtxt:<slug>[@<version>]` — fetched via `scripts/llmtxt_ref.py` | `llmtxt:drizzle-orm-v1@beta.3` |
|
|
31
|
-
|
|
32
|
-
Each item gets a **one-line "why this matters"** annotation. Without it, advisors won't know which lens to apply.
|
|
33
|
-
|
|
34
|
-
## The `llmtxt:` item type — for external docs, APIs, and specs
|
|
35
|
-
|
|
36
|
-
When a question touches an external library, API, or standard that the advisors need to cite but the full text would bloat the evidence pack (especially in subagent mode where 5 advisors each receive the pack), use the `llmtxt:<slug>[@<version>]` item type. The `scripts/llmtxt_ref.py` wrapper fetches a compressed overview from api.llmtxt.my and caches it locally.
|
|
37
|
-
|
|
38
|
-
**Fetch and paste:**
|
|
39
|
-
|
|
40
|
-
```bash
|
|
41
|
-
# Anonymous read (public docs — wrapper persists the anonymous session cookie automatically)
|
|
42
|
-
python3 .claude/skills/council/scripts/llmtxt_ref.py <slug>
|
|
43
|
-
|
|
44
|
-
# Pinned version (cached indefinitely; immutable)
|
|
45
|
-
python3 .claude/skills/council/scripts/llmtxt_ref.py <slug>@<version>
|
|
46
|
-
|
|
47
|
-
# Private / org docs require an API key
|
|
48
|
-
LLMTXT_API_KEY="llmtxt_<43-char-token>" python3 .claude/skills/council/scripts/llmtxt_ref.py <slug>
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
**Caching (automatic):**
|
|
52
|
-
- `<slug>@<version>` → cached indefinitely under `~/.cache/council/llmtxt/<slug>/<version>.md` (immutable per service contract).
|
|
53
|
-
- `<slug>` (latest) → cached 60s to catch lifecycle state transitions.
|
|
54
|
-
- Override cache directory with `COUNCIL_CACHE_DIR`.
|
|
55
|
-
|
|
56
|
-
**When to use this item type:**
|
|
57
|
-
- Question involves an external library, SDK, API, or spec whose docs are load-bearing.
|
|
58
|
-
- Multiple advisors will cite the same external source (5× distribution in subagent mode compounds the savings).
|
|
59
|
-
- The source has a stable slug + version in the llmtxt catalog.
|
|
60
|
-
|
|
61
|
-
**When NOT to use it:**
|
|
62
|
-
- The reference is already in your local codebase (use `path:line`).
|
|
63
|
-
- The reference is a git commit (use the sha directly).
|
|
64
|
-
- The source isn't in the llmtxt catalog (use a regular URL citation with a one-line summary).
|
|
65
|
-
|
|
66
|
-
**Rate limits to respect** (api.llmtxt.my):
|
|
67
|
-
- Anonymous per-IP: 60 reads/min.
|
|
68
|
-
- Session-authenticated: 300/min.
|
|
69
|
-
- API-key Bearer: 600/min.
|
|
70
|
-
- Wrapper surfaces `x-ratelimit-*` warnings to stderr and honors `retry-after` on 429.
|
|
71
|
-
|
|
72
|
-
## How to build it
|
|
73
|
-
|
|
74
|
-
Do this in order. Stop when you have 3–7 solid items.
|
|
75
|
-
|
|
76
|
-
1. **Parse the owner's question.** Extract key nouns — subsystem names, file paths, symbol names, ADR numbers, task IDs. Each noun is a candidate search anchor.
|
|
77
|
-
|
|
78
|
-
2. **Pull the most recent changes.** `git log -20 --oneline` in the relevant paths, or `git log --follow -- <path>`. Anything touched in the last few commits is load-bearing for the question.
|
|
79
|
-
|
|
80
|
-
3. **Use the best available intelligence tool.**
|
|
81
|
-
- **If `gitnexus` MCP is indexed**: `gitnexus_query({query: "<the owner's question, rephrased as a concept>"})` returns process-grouped execution flows. Then `gitnexus_context({name: "<main symbol>"})` for 360-view. Then `gitnexus_impact({target: "<symbol>", direction: "upstream"})` if the question involves modifying something.
|
|
82
|
-
- **Otherwise**: `grep -rn --include='*.ts' '<keyword>'` for keyword anchors, then `Read` the top hits at relevant line ranges.
|
|
83
|
-
|
|
84
|
-
4. **Check for ADRs, memory, and docs.** Search `docs/adr/`, `.cleo/memory*`, any referenced `.md` files. Contracts and prior decisions are disproportionately load-bearing for council questions.
|
|
85
|
-
|
|
86
|
-
5. **Sanity-check with tests.** If a subsystem is involved, find its tests and confirm the current contract. Tests document claimed behavior better than code comments.
|
|
87
|
-
|
|
88
|
-
## What belongs in the pack vs. what doesn't
|
|
89
|
-
|
|
90
|
-
**Include** — items that advisors from more than one frame will cite:
|
|
91
|
-
- Core code path being discussed (Contrarian finds risks; First Principles checks atomicity; Executor picks an action on it).
|
|
92
|
-
- Most recent commit touching it (Outsider spots drift from stated intent; Contrarian checks regression risk).
|
|
93
|
-
- The test or assertion that defines its contract (First Principles' atomic truth candidate; Contrarian's "what breaks this" target).
|
|
94
|
-
- The relevant ADR or memory entry (Expansionist spots latent capability; Outsider spots claim/reality gaps).
|
|
95
|
-
|
|
96
|
-
**Exclude** — items only one frame would care about:
|
|
97
|
-
- Ambient project lore not touching the question.
|
|
98
|
-
- "Everything that mentions X" dumps — noise.
|
|
99
|
-
- Speculative or future code that doesn't exist yet.
|
|
100
|
-
- The owner's plan itself (that's the thing *being reviewed*, not evidence advisors ground in — keep it separate at the top).
|
|
101
|
-
|
|
102
|
-
## The restated question
|
|
103
|
-
|
|
104
|
-
At the top of the evidence pack, restate the owner's question in **one sentence**. All advisors anchor to this. If you can't compress the question into one sentence, the question is too fuzzy — clarify with the owner before running the council.
|
|
105
|
-
|
|
106
|
-
A good restated question:
|
|
107
|
-
- Has a subject (the thing being decided).
|
|
108
|
-
- Has a binary or short-list decision shape ("should we X?", "is Y ready to ship?", "which of A/B/C?").
|
|
109
|
-
- Is testable — you'd recognize an answer when you saw it.
|
|
110
|
-
|
|
111
|
-
A bad restated question:
|
|
112
|
-
- "What do you think about X?" (no decision shape)
|
|
113
|
-
- "Review the codebase" (no scope)
|
|
114
|
-
- "Make X better" (no success criterion)
|
|
115
|
-
|
|
116
|
-
## Phase 0 fact-check (added after shakedown #8 caught a fabricated stat)
|
|
117
|
-
|
|
118
|
-
**If the restated question contains any quantitative claim about the codebase, prior runs, or external data — "X happens N% of the time", "the historical mean is N", "S1-S5 averaged 6.4 items", "70% of users hit this path" — that claim MUST resolve to a citation in the evidence pack itself.**
|
|
119
|
-
|
|
120
|
-
In shakedown #8, the orchestrator's question framing said *"S1-S5 averaged 6.4 items"*; the cited `council-runs.jsonl` actually showed all five runs at 7 items (mean 7.0). The Outsider was the only advisor whose frame *required* verifying the cited artifact, and only Outsider caught it — the other four reasoned downstream from a fabricated premise. The pre-action verification rule for the Executor (instituted in shakedown #1) protects the *action* against fabricated paths but does NOT extend upstream to the *question framing*; this gate fills that gap.
|
|
121
|
-
|
|
122
|
-
Rule: every quantitative claim in the restated question must have a corresponding evidence-pack item that, when read, supports the claim. If the supporting data lives in `.cleo/council-runs.jsonl`, cite that file as a pack item and (in the rationale) name the specific values being claimed. If the supporting data does not exist anywhere in the project, either (a) measure it before running the council and cite the new artifact, or (b) restate the question without the unverifiable quantification.
|
|
123
|
-
|
|
124
|
-
Anti-patterns this gate catches:
|
|
125
|
-
- "Average 6.4 items" with the jsonl never read.
|
|
126
|
-
- "Most teams use X" with no survey/data citation.
|
|
127
|
-
- "This pattern fails 30% of the time" with no postmortem citation.
|
|
128
|
-
- "S1-S5 showed Y" without a per-run breakdown that demonstrates Y.
|
|
129
|
-
|
|
130
|
-
The Outsider remains the structural backstop, but with this gate the orchestrator should rarely hand the council a question that needs the backstop.
|
|
131
|
-
|
|
132
|
-
## Output format for Phase 0 (validator-checked)
|
|
133
|
-
|
|
134
|
-
```
|
|
135
|
-
# The Council — <one-line restated question>
|
|
136
|
-
|
|
137
|
-
## Evidence pack
|
|
138
|
-
|
|
139
|
-
1. `<path:line-range | symbol | sha | URL>` — <why this matters to the question>
|
|
140
|
-
2. `<path:line-range | symbol | sha | URL>` — <why this matters to the question>
|
|
141
|
-
3. ...
|
|
142
|
-
(3–7 items)
|
|
143
|
-
```
|
|
144
|
-
|
|
145
|
-
Nothing else in Phase 0. No opinions, no framing, no narrative. Just the pack. The advisors read only the question and the pack before forming their views.
|