@cleocode/skills 2026.4.161 → 2026.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/ct-council/SKILL.md +377 -0
- package/skills/ct-council/optimization/HARDENING-PLAYBOOK.md +107 -0
- package/skills/ct-council/optimization/README.md +74 -0
- package/skills/ct-council/optimization/scenarios.yaml +121 -0
- package/skills/ct-council/optimization/scripts/campaign.py +543 -0
- package/skills/ct-council/optimization/scripts/test_campaign.py +143 -0
- package/skills/ct-council/references/chairman.md +119 -0
- package/skills/ct-council/references/contrarian.md +70 -0
- package/skills/ct-council/references/evidence-pack.md +145 -0
- package/skills/ct-council/references/examples.md +235 -0
- package/skills/ct-council/references/executor.md +83 -0
- package/skills/ct-council/references/expansionist.md +68 -0
- package/skills/ct-council/references/first-principles.md +73 -0
- package/skills/ct-council/references/outsider.md +73 -0
- package/skills/ct-council/references/peer-review.md +125 -0
- package/skills/ct-council/scripts/analyze_runs.py +293 -0
- package/skills/ct-council/scripts/fixtures/executor_multi.md +198 -0
- package/skills/ct-council/scripts/fixtures/missing_advisor.md +117 -0
- package/skills/ct-council/scripts/fixtures/missing_convergence.md +190 -0
- package/skills/ct-council/scripts/fixtures/thin_evidence.md +193 -0
- package/skills/ct-council/scripts/fixtures/valid.md +226 -0
- package/skills/ct-council/scripts/fixtures/valid_with_llmtxt.md +226 -0
- package/skills/ct-council/scripts/llmtxt_ref.py +223 -0
- package/skills/ct-council/scripts/run_council.py +578 -0
- package/skills/ct-council/scripts/telemetry.py +624 -0
- package/skills/ct-council/scripts/test_telemetry.py +509 -0
- package/skills/ct-council/scripts/test_validate.py +452 -0
- package/skills/ct-council/scripts/validate.py +396 -0
|
@@ -0,0 +1,624 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
telemetry.py — extract one JSONL record from a Council run output.
|
|
4
|
+
|
|
5
|
+
Reads a council-output.md, validates it (using validate.py), and emits a
|
|
6
|
+
single JSON record describing the run: question, per-advisor gate-pass
|
|
7
|
+
rates, peer-review disposition distribution, convergence flag, Chairman
|
|
8
|
+
confidence, evidence-pack size, and (optional) externally-supplied
|
|
9
|
+
tokens / wall-clock metrics.
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
# Emit JSON to stdout (do not append to log).
|
|
13
|
+
python3 telemetry.py <output.md>
|
|
14
|
+
|
|
15
|
+
# Append one JSON line to .cleo/council-runs.jsonl (default log path).
|
|
16
|
+
python3 telemetry.py --append <output.md>
|
|
17
|
+
|
|
18
|
+
# Append to a specific log path.
|
|
19
|
+
python3 telemetry.py --log path/to/runs.jsonl <output.md>
|
|
20
|
+
|
|
21
|
+
# Stamp tokens / wall-clock from the orchestrator.
|
|
22
|
+
python3 telemetry.py --tokens 41250 --wall-clock 73.4 --append <output.md>
|
|
23
|
+
|
|
24
|
+
Exit codes:
|
|
25
|
+
0 — record emitted successfully
|
|
26
|
+
1 — validation failed (no record emitted)
|
|
27
|
+
2 — file not found / unreadable
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import argparse
|
|
33
|
+
import datetime as _dt
|
|
34
|
+
import hashlib
|
|
35
|
+
import json
|
|
36
|
+
import re
|
|
37
|
+
import sys
|
|
38
|
+
from dataclasses import dataclass, field, asdict
|
|
39
|
+
from pathlib import Path
|
|
40
|
+
|
|
41
|
+
# Re-use the validator's helpers — single source of truth for parsing.
|
|
42
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
|
43
|
+
from validate import ( # noqa: E402
|
|
44
|
+
ADVISORS,
|
|
45
|
+
PEER_REVIEW_ROTATION,
|
|
46
|
+
PEER_REVIEW_GATES,
|
|
47
|
+
Validator,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
SCHEMA_VERSION = "1.0.0"
|
|
52
|
+
DEFAULT_LOG_PATH = Path(".cleo/council-runs.jsonl")
|
|
53
|
+
|
|
54
|
+
GATE_KEYS = ["G1", "G2", "G3", "G4"]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class AdvisorRecord:
|
|
59
|
+
gates: dict[str, str] = field(default_factory=dict) # G1..G4 → PASS|FAIL
|
|
60
|
+
gate_pass_count: int = 0
|
|
61
|
+
weight: str = "low" # full | high | moderate | low
|
|
62
|
+
sharpest: str | None = None
|
|
63
|
+
reviewer: str | None = None # who graded this advisor
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class PeerReviewRecord:
|
|
68
|
+
reviewer: str
|
|
69
|
+
reviewee: str
|
|
70
|
+
disposition: str | None = None # Accept | Modify | Reject
|
|
71
|
+
gates_passed: int = 0
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class TelemetryRecord:
|
|
76
|
+
schema_version: str
|
|
77
|
+
run_id: str
|
|
78
|
+
timestamp: str
|
|
79
|
+
question: str
|
|
80
|
+
validation: dict
|
|
81
|
+
evidence_pack: dict
|
|
82
|
+
advisors: dict[str, dict]
|
|
83
|
+
peer_reviews: list[dict]
|
|
84
|
+
convergence: dict
|
|
85
|
+
chairman: dict
|
|
86
|
+
metrics: dict
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ─── helpers (mirror validate.py's line-based, fence-aware section scan) ────
|
|
90
|
+
|
|
91
|
+
_HEADER_RE = re.compile(r"^(#{1,6})\s+(.+?)\s*$")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _section_body(md: str, header_regex: str) -> str | None:
|
|
95
|
+
"""Body under the first header matching header_regex; ignores ``` fences."""
|
|
96
|
+
lines = md.split("\n")
|
|
97
|
+
in_fence = False
|
|
98
|
+
start_line: int | None = None
|
|
99
|
+
start_level: int | None = None
|
|
100
|
+
end_line = len(lines)
|
|
101
|
+
|
|
102
|
+
for i, line in enumerate(lines):
|
|
103
|
+
if line.lstrip().startswith("```"):
|
|
104
|
+
in_fence = not in_fence
|
|
105
|
+
continue
|
|
106
|
+
if in_fence:
|
|
107
|
+
continue
|
|
108
|
+
m = _HEADER_RE.match(line)
|
|
109
|
+
if not m:
|
|
110
|
+
continue
|
|
111
|
+
level = len(m.group(1))
|
|
112
|
+
title = m.group(2).strip()
|
|
113
|
+
if start_line is None:
|
|
114
|
+
if re.match(header_regex, title):
|
|
115
|
+
start_line = i + 1
|
|
116
|
+
start_level = level
|
|
117
|
+
else:
|
|
118
|
+
if level <= start_level:
|
|
119
|
+
end_line = i
|
|
120
|
+
break
|
|
121
|
+
|
|
122
|
+
if start_line is None:
|
|
123
|
+
return None
|
|
124
|
+
return "\n".join(lines[start_line:end_line])
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _weight_from_pass_count(n: int) -> str:
|
|
128
|
+
if n == 4:
|
|
129
|
+
return "full"
|
|
130
|
+
if n == 3:
|
|
131
|
+
return "high"
|
|
132
|
+
if n == 2:
|
|
133
|
+
return "moderate"
|
|
134
|
+
return "low"
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _extract_question(md: str) -> str:
|
|
138
|
+
m = re.search(r"^#\s+The Council\s+—\s+(.+)$", md, re.MULTILINE)
|
|
139
|
+
return m.group(1).strip() if m else ""
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _extract_evidence_pack(md: str) -> dict:
|
|
143
|
+
body = _section_body(md, r"^Evidence pack$")
|
|
144
|
+
if body is None:
|
|
145
|
+
return {"count": 0, "has_llmtxt": False}
|
|
146
|
+
items = re.findall(
|
|
147
|
+
r"^\s*\d+\.\s+(.+?)(?=^\s*\d+\.\s+|\Z)", body, re.MULTILINE | re.DOTALL
|
|
148
|
+
)
|
|
149
|
+
has_llmtxt = any("llmtxt:" in item for item in items)
|
|
150
|
+
return {"count": len(items), "has_llmtxt": has_llmtxt}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _extract_sharpest(advisor_body: str) -> str | None:
|
|
154
|
+
m = re.search(
|
|
155
|
+
r"\*\*Single sharpest point:\*\*\s*(.+?)(?=\n\*\*|\Z)",
|
|
156
|
+
advisor_body,
|
|
157
|
+
re.DOTALL,
|
|
158
|
+
)
|
|
159
|
+
if not m:
|
|
160
|
+
return None
|
|
161
|
+
return m.group(1).strip().splitlines()[0].strip() if m.group(1).strip() else None
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _extract_peer_review(md: str, reviewer: str, reviewee: str) -> PeerReviewRecord:
|
|
165
|
+
pr = PeerReviewRecord(reviewer=reviewer, reviewee=reviewee)
|
|
166
|
+
body = _section_body(md, rf"^{re.escape(reviewer)} reviewing {re.escape(reviewee)}$")
|
|
167
|
+
if body is None:
|
|
168
|
+
return pr
|
|
169
|
+
|
|
170
|
+
# Gate parsing: tolerate both "G1 Rigor" and the full label.
|
|
171
|
+
for key, full in zip(GATE_KEYS, PEER_REVIEW_GATES):
|
|
172
|
+
gate_re = rf"-\s+{re.escape(full)}:\s+(PASS|FAIL)\s+—"
|
|
173
|
+
m = re.search(gate_re, body)
|
|
174
|
+
if m:
|
|
175
|
+
pr.__dict__.setdefault("gates", {})[key] = m.group(1)
|
|
176
|
+
if m.group(1) == "PASS":
|
|
177
|
+
pr.gates_passed += 1
|
|
178
|
+
|
|
179
|
+
disp_match = re.search(r"\*\*Disposition:\*\*\s+(Accept|Modify|Reject)\b", body)
|
|
180
|
+
if disp_match:
|
|
181
|
+
pr.disposition = disp_match.group(1)
|
|
182
|
+
|
|
183
|
+
return pr
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _extract_advisor(md: str, advisor: str) -> AdvisorRecord:
|
|
187
|
+
rec = AdvisorRecord()
|
|
188
|
+
body = _section_body(md, rf"^Advisor:\s+{re.escape(advisor)}$")
|
|
189
|
+
if body is None:
|
|
190
|
+
return rec
|
|
191
|
+
rec.sharpest = _extract_sharpest(body)
|
|
192
|
+
return rec
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _annotate_advisor_with_gates(rec: AdvisorRecord, pr: PeerReviewRecord) -> None:
|
|
196
|
+
"""Attach the peer-review gate verdicts (the reviewee's gates) onto the advisor."""
|
|
197
|
+
rec.reviewer = pr.reviewer
|
|
198
|
+
gates_map: dict[str, str] = pr.__dict__.get("gates", {})
|
|
199
|
+
for gate_key in GATE_KEYS:
|
|
200
|
+
rec.gates[gate_key] = gates_map.get(gate_key, "MISSING")
|
|
201
|
+
rec.gate_pass_count = sum(1 for v in rec.gates.values() if v == "PASS")
|
|
202
|
+
rec.weight = _weight_from_pass_count(rec.gate_pass_count)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _extract_convergence(md: str) -> dict:
|
|
206
|
+
body = _section_body(md, r"^Phase 2\.5\s*[—-]\s*Convergence check$")
|
|
207
|
+
if body is None:
|
|
208
|
+
return {"flag": None, "rerun_advisors": []}
|
|
209
|
+
text = body.lower()
|
|
210
|
+
# Heuristic: "convergence flag" + a positive verb. Authors typically write
|
|
211
|
+
# "convergence flag raised" or "no convergence flag".
|
|
212
|
+
raised = bool(re.search(r"\bconvergence\b.*\b(raised|fired|triggered)\b", text))
|
|
213
|
+
cleared = bool(re.search(r"\bno convergence\b|\bproceeding to phase 3\b|\bdistinct subjects\b", text))
|
|
214
|
+
flag = True if (raised and not cleared) else (False if cleared else None)
|
|
215
|
+
rerun = re.findall(r"reran\s+(\w[\w \-]*?)\b", text)
|
|
216
|
+
return {"flag": flag, "rerun_advisors": [r.strip() for r in rerun]}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _extract_chairman(md: str) -> dict:
|
|
220
|
+
body = _section_body(md, r"^Phase 3\s*[—-]\s*Chairman['’]s verdict$")
|
|
221
|
+
if body is None:
|
|
222
|
+
return {
|
|
223
|
+
"confidence": None,
|
|
224
|
+
"recommendation_present": False,
|
|
225
|
+
"next_action_present": False,
|
|
226
|
+
"open_questions_count": 0,
|
|
227
|
+
}
|
|
228
|
+
rec_match = re.search(r"###\s+Recommendation\s*\n(.+?)(?=\n###|\Z)", body, re.DOTALL)
|
|
229
|
+
rec_present = bool(rec_match and rec_match.group(1).strip())
|
|
230
|
+
|
|
231
|
+
action_match = re.search(r"###\s+Next 60-minute action\s*\n(.+?)(?=\n###|\Z)", body, re.DOTALL)
|
|
232
|
+
action_present = bool(action_match and len(action_match.group(1).strip()) >= 15)
|
|
233
|
+
|
|
234
|
+
conf = None
|
|
235
|
+
conf_match = re.search(r"###\s+Confidence\s*\n(.+?)(?=\n###|\Z)", body, re.DOTALL)
|
|
236
|
+
if conf_match:
|
|
237
|
+
conf_text = conf_match.group(1).strip().lower()
|
|
238
|
+
# Order matters: medium-high before high.
|
|
239
|
+
for level in ("medium-high", "medium-low", "high", "medium", "low"):
|
|
240
|
+
if level in conf_text:
|
|
241
|
+
conf = level
|
|
242
|
+
break
|
|
243
|
+
|
|
244
|
+
open_q_match = re.search(r"###\s+Open questions for the owner\s*\n(.+?)(?=\n###|\Z)", body, re.DOTALL)
|
|
245
|
+
open_q_count = 0
|
|
246
|
+
if open_q_match:
|
|
247
|
+
open_q_count = len(re.findall(r"^\s*[-*]\s+", open_q_match.group(1), re.MULTILINE))
|
|
248
|
+
|
|
249
|
+
return {
|
|
250
|
+
"confidence": conf,
|
|
251
|
+
"recommendation_present": rec_present,
|
|
252
|
+
"next_action_present": action_present,
|
|
253
|
+
"open_questions_count": open_q_count,
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
# ─── public API ─────────────────────────────────────────────────────────────
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def extract_record(
|
|
261
|
+
md: str,
|
|
262
|
+
*,
|
|
263
|
+
source_path: str | None = None,
|
|
264
|
+
tokens: int | None = None,
|
|
265
|
+
wall_clock: float | None = None,
|
|
266
|
+
extra: dict | None = None,
|
|
267
|
+
) -> TelemetryRecord:
|
|
268
|
+
v = Validator(md)
|
|
269
|
+
violations = v.validate()
|
|
270
|
+
structural = sum(1 for x in violations if x.kind == "structural")
|
|
271
|
+
warnings = sum(1 for x in violations if x.kind == "warning")
|
|
272
|
+
|
|
273
|
+
question = _extract_question(md)
|
|
274
|
+
ep = _extract_evidence_pack(md)
|
|
275
|
+
|
|
276
|
+
advisors: dict[str, AdvisorRecord] = {a: _extract_advisor(md, a) for a in ADVISORS}
|
|
277
|
+
|
|
278
|
+
peer_reviews: list[PeerReviewRecord] = []
|
|
279
|
+
for reviewer, reviewee in PEER_REVIEW_ROTATION:
|
|
280
|
+
pr = _extract_peer_review(md, reviewer, reviewee)
|
|
281
|
+
peer_reviews.append(pr)
|
|
282
|
+
_annotate_advisor_with_gates(advisors[reviewee], pr)
|
|
283
|
+
|
|
284
|
+
convergence = _extract_convergence(md)
|
|
285
|
+
chairman = _extract_chairman(md)
|
|
286
|
+
|
|
287
|
+
payload = md.encode("utf-8")
|
|
288
|
+
run_id = hashlib.sha256(payload).hexdigest()[:16]
|
|
289
|
+
|
|
290
|
+
record = TelemetryRecord(
|
|
291
|
+
schema_version=SCHEMA_VERSION,
|
|
292
|
+
run_id=run_id,
|
|
293
|
+
timestamp=_dt.datetime.now(tz=_dt.timezone.utc).isoformat(timespec="seconds"),
|
|
294
|
+
question=question,
|
|
295
|
+
validation={
|
|
296
|
+
"valid": structural == 0,
|
|
297
|
+
"structural_violations": structural,
|
|
298
|
+
"warnings": warnings,
|
|
299
|
+
},
|
|
300
|
+
evidence_pack=ep,
|
|
301
|
+
advisors={
|
|
302
|
+
name: {
|
|
303
|
+
"gates": rec.gates,
|
|
304
|
+
"gate_pass_count": rec.gate_pass_count,
|
|
305
|
+
"weight": rec.weight,
|
|
306
|
+
"sharpest": rec.sharpest,
|
|
307
|
+
"reviewer": rec.reviewer,
|
|
308
|
+
}
|
|
309
|
+
for name, rec in advisors.items()
|
|
310
|
+
},
|
|
311
|
+
peer_reviews=[
|
|
312
|
+
{
|
|
313
|
+
"reviewer": pr.reviewer,
|
|
314
|
+
"reviewee": pr.reviewee,
|
|
315
|
+
"disposition": pr.disposition,
|
|
316
|
+
"gates_passed": pr.gates_passed,
|
|
317
|
+
}
|
|
318
|
+
for pr in peer_reviews
|
|
319
|
+
],
|
|
320
|
+
convergence=convergence,
|
|
321
|
+
chairman=chairman,
|
|
322
|
+
metrics={
|
|
323
|
+
"tokens": tokens,
|
|
324
|
+
"wall_clock_seconds": wall_clock,
|
|
325
|
+
"evidence_pack_count": ep["count"],
|
|
326
|
+
"source_path": source_path,
|
|
327
|
+
"size_bytes": len(payload),
|
|
328
|
+
**(extra or {}),
|
|
329
|
+
},
|
|
330
|
+
)
|
|
331
|
+
return record
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def append_jsonl(record: TelemetryRecord, log_path: Path) -> None:
|
|
335
|
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
336
|
+
with log_path.open("a", encoding="utf-8") as f:
|
|
337
|
+
f.write(json.dumps(asdict(record), ensure_ascii=False) + "\n")
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
# ─── Verdict + TL;DR generation (post-shakedown UX fix) ─────────────────────
|
|
341
|
+
#
|
|
342
|
+
# The full output.md (Phase 0 + 5 advisors + 5 peer reviews + 2.5 + 3) is the
|
|
343
|
+
# audit trail — ~300-400 lines. The owner consumes the *Chairman verdict*,
|
|
344
|
+
# which is the last ~60 lines. Forcing the reader to scroll through the full
|
|
345
|
+
# transcript to reach the recommendation is a UX failure, not a content
|
|
346
|
+
# problem.
|
|
347
|
+
#
|
|
348
|
+
# These functions extract two leaner deliverables from a validated output.md:
|
|
349
|
+
# - verdict.md — the Chairman section with the question prepended (~60-80 lines)
|
|
350
|
+
# - tldr.md — recommendation + action + confidence (~10-15 lines)
|
|
351
|
+
# The full output.md is preserved as-is for the audit trail.
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def _extract_chairman_section(md: str) -> str | None:
|
|
355
|
+
"""Return the literal Phase 3 markdown body (everything under '## Phase 3 — ...')."""
|
|
356
|
+
body = _section_body(md, r"^Phase 3\s*[—-]\s*Chairman['’]s verdict$")
|
|
357
|
+
return body
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def _extract_phase3_subsection(body: str, header: str) -> str | None:
|
|
361
|
+
"""Pull a specific `### <header>` subsection's body from a Phase 3 body."""
|
|
362
|
+
m = re.search(
|
|
363
|
+
rf"###\s+{re.escape(header)}\s*\n(.+?)(?=\n###|\Z)",
|
|
364
|
+
body,
|
|
365
|
+
re.DOTALL,
|
|
366
|
+
)
|
|
367
|
+
if not m:
|
|
368
|
+
return None
|
|
369
|
+
return m.group(1).strip()
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def render_verdict(md: str) -> str:
|
|
373
|
+
"""Render verdict.md from a full output.md — Chairman section + question header.
|
|
374
|
+
|
|
375
|
+
Output is structurally a standalone decision document: H1 question, gate
|
|
376
|
+
summary, recommendation, conditions, action, confidence. Suitable for
|
|
377
|
+
direct hand-off to the owner without scrolling past upstream artifacts.
|
|
378
|
+
"""
|
|
379
|
+
question = _extract_question(md) or "<question missing>"
|
|
380
|
+
chairman = _extract_chairman_section(md)
|
|
381
|
+
if chairman is None:
|
|
382
|
+
raise ValueError("output.md missing Phase 3 — Chairman's verdict section")
|
|
383
|
+
return f"# Council Verdict — {question}\n\n## Phase 3 — Chairman's verdict\n{chairman.rstrip()}\n"
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def render_tldr(md: str) -> str:
|
|
387
|
+
"""Render tldr.md — 10-15 line summary suitable for PR comments / chat.
|
|
388
|
+
|
|
389
|
+
Pulls only the load-bearing fields: recommendation, next action, confidence,
|
|
390
|
+
and a count of open questions / conditions. Not a substitute for the full
|
|
391
|
+
verdict — a *pointer* to it.
|
|
392
|
+
"""
|
|
393
|
+
question = _extract_question(md) or "<question missing>"
|
|
394
|
+
chairman_body = _extract_chairman_section(md) or ""
|
|
395
|
+
|
|
396
|
+
rec = _extract_phase3_subsection(chairman_body, "Recommendation") or "<missing>"
|
|
397
|
+
action = _extract_phase3_subsection(chairman_body, "Next 60-minute action") or "<missing>"
|
|
398
|
+
conf = _extract_phase3_subsection(chairman_body, "Confidence") or "<missing>"
|
|
399
|
+
conditions = _extract_phase3_subsection(chairman_body, "Conditions on the recommendation") or ""
|
|
400
|
+
open_q = _extract_phase3_subsection(chairman_body, "Open questions for the owner") or ""
|
|
401
|
+
|
|
402
|
+
# Trim each to a single first paragraph / line for concision.
|
|
403
|
+
rec_first = _first_paragraph(rec)
|
|
404
|
+
action_first = _first_paragraph(action)
|
|
405
|
+
# Confidence: just the level (first word) + first clause, not the full justification.
|
|
406
|
+
conf_level_match = re.match(r"\s*(medium-high|medium-low|high|medium|low)\b", conf, re.IGNORECASE)
|
|
407
|
+
if conf_level_match:
|
|
408
|
+
conf_first = conf_level_match.group(1).lower()
|
|
409
|
+
else:
|
|
410
|
+
conf_first = _first_paragraph(conf)[:80]
|
|
411
|
+
|
|
412
|
+
cond_count = len(re.findall(r"^\s*\d+\.\s+", conditions, re.MULTILINE))
|
|
413
|
+
open_q_count = len(re.findall(r"^\s*\d+\.\s+|^\s*[-*]\s+", open_q, re.MULTILINE))
|
|
414
|
+
open_q_marker = "none" if (not open_q.strip() or "none" in open_q.lower()[:80]) else f"{open_q_count}"
|
|
415
|
+
|
|
416
|
+
lines = [
|
|
417
|
+
f"# Council TL;DR — {question}",
|
|
418
|
+
"",
|
|
419
|
+
f"**Recommendation** — {rec_first}",
|
|
420
|
+
"",
|
|
421
|
+
f"**Next 60-minute action** — {action_first}",
|
|
422
|
+
"",
|
|
423
|
+
f"**Confidence** — {conf_first}",
|
|
424
|
+
"",
|
|
425
|
+
f"**Conditions:** {cond_count} · **Open questions:** {open_q_marker}",
|
|
426
|
+
"",
|
|
427
|
+
"_Full verdict: `verdict.md` · Full transcript: `output.md`_",
|
|
428
|
+
"",
|
|
429
|
+
]
|
|
430
|
+
return "\n".join(lines)
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
# ─── Phase 2.5 structured extractor (T-shakedown-1 verdict) ─────────────────
|
|
434
|
+
#
|
|
435
|
+
# The Phase 2.5 convergence detector currently emits free prose; downstream
|
|
436
|
+
# telemetry has to grep regex (`_extract_convergence` above). This extractor
|
|
437
|
+
# replaces that with a structured artifact built directly from the per-advisor
|
|
438
|
+
# `phase1-<advisor>.md` files in a run directory, before output.md is even
|
|
439
|
+
# assembled. Output schema:
|
|
440
|
+
#
|
|
441
|
+
# {
|
|
442
|
+
# "schema_version": "...",
|
|
443
|
+
# "run_id": "<8-char from run.json>",
|
|
444
|
+
# "sharpest_points": [{"advisor": str, "sentence": str}, ...],
|
|
445
|
+
# "pairwise_same": [[i, j], ...],
|
|
446
|
+
# "flag_mechanical": bool,
|
|
447
|
+
# "method": "exact-normalized | jaccard>=0.6 | 3-clique"
|
|
448
|
+
# }
|
|
449
|
+
#
|
|
450
|
+
# `flag_mechanical=True` iff a 3-clique exists in the pairwise-same graph —
|
|
451
|
+
# matching the protocol's "≥3 semantically the same finding" rule.
|
|
452
|
+
|
|
453
|
+
PHASE_2_5_SCHEMA_VERSION = "1.0.0"
|
|
454
|
+
JACCARD_THRESHOLD = 0.6
|
|
455
|
+
|
|
456
|
+
ADVISOR_FILE_SLUGS = {
|
|
457
|
+
"Contrarian": "contrarian",
|
|
458
|
+
"First Principles": "first-principles",
|
|
459
|
+
"Expansionist": "expansionist",
|
|
460
|
+
"Outsider": "outsider",
|
|
461
|
+
"Executor": "executor",
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def _normalize_sentence(s: str) -> str:
|
|
466
|
+
return re.sub(r"\s+", " ", s.strip().lower())
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def _tokenize(s: str) -> set[str]:
|
|
470
|
+
"""Token bag for Jaccard — lowercase, alphanum, words ≥3 chars to drop noise."""
|
|
471
|
+
return {t.lower() for t in re.findall(r"\w+", s) if len(t) >= 3}
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def _jaccard(a: set[str], b: set[str]) -> float:
|
|
475
|
+
if not a or not b:
|
|
476
|
+
return 0.0
|
|
477
|
+
return len(a & b) / len(a | b)
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
def _has_3_clique(pairs: list[list[int]], n: int) -> bool:
|
|
481
|
+
"""Detect a 3-clique in the undirected same-finding graph."""
|
|
482
|
+
edges = {(min(i, j), max(i, j)) for i, j in pairs}
|
|
483
|
+
for i in range(n):
|
|
484
|
+
for j in range(i + 1, n):
|
|
485
|
+
if (i, j) not in edges:
|
|
486
|
+
continue
|
|
487
|
+
for k in range(j + 1, n):
|
|
488
|
+
if (i, k) in edges and (j, k) in edges:
|
|
489
|
+
return True
|
|
490
|
+
return False
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def _read_sharpest(run_dir: Path, advisor: str) -> str | None:
|
|
494
|
+
"""Locate the advisor's `**Single sharpest point:**` marker at the start of a line.
|
|
495
|
+
|
|
496
|
+
Anchoring on `^` is load-bearing — the marker text can appear inline inside
|
|
497
|
+
other sections (e.g. the Executor's action body referencing the marker as a
|
|
498
|
+
parse target). The persona's output template always places the marker at
|
|
499
|
+
the start of its own line, so a multiline-mode start-of-line anchor
|
|
500
|
+
distinguishes the structural marker from inline mentions.
|
|
501
|
+
"""
|
|
502
|
+
slug = ADVISOR_FILE_SLUGS[advisor]
|
|
503
|
+
p = run_dir / f"phase1-{slug}.md"
|
|
504
|
+
if not p.exists():
|
|
505
|
+
return None
|
|
506
|
+
body = p.read_text()
|
|
507
|
+
m = re.search(
|
|
508
|
+
r"^\*\*Single sharpest point:\*\*\s*(.+?)(?=\n\*\*|\n##|\Z)",
|
|
509
|
+
body,
|
|
510
|
+
re.DOTALL | re.MULTILINE,
|
|
511
|
+
)
|
|
512
|
+
if not m:
|
|
513
|
+
return None
|
|
514
|
+
return _first_paragraph(m.group(1))
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def _first_paragraph(s: str) -> str:
|
|
518
|
+
parts = [p.strip() for p in s.strip().split("\n\n") if p.strip()]
|
|
519
|
+
if not parts:
|
|
520
|
+
return s.strip()
|
|
521
|
+
return re.sub(r"\s+", " ", parts[0])
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def extract_phase_2_5(run_dir: Path) -> dict:
|
|
525
|
+
"""Read phase1-*.md files in run_dir, compute structured Phase-2.5 verdict."""
|
|
526
|
+
run_meta_path = run_dir / "run.json"
|
|
527
|
+
run_id = None
|
|
528
|
+
if run_meta_path.exists():
|
|
529
|
+
try:
|
|
530
|
+
run_id = json.loads(run_meta_path.read_text()).get("run_id")
|
|
531
|
+
except json.JSONDecodeError:
|
|
532
|
+
run_id = None
|
|
533
|
+
|
|
534
|
+
sharpest_points: list[dict] = []
|
|
535
|
+
for advisor in ADVISORS:
|
|
536
|
+
sentence = _read_sharpest(run_dir, advisor)
|
|
537
|
+
sharpest_points.append({"advisor": advisor, "sentence": sentence or ""})
|
|
538
|
+
|
|
539
|
+
n = len(sharpest_points)
|
|
540
|
+
norm_strings = [_normalize_sentence(p["sentence"]) for p in sharpest_points]
|
|
541
|
+
token_sets = [_tokenize(p["sentence"]) for p in sharpest_points]
|
|
542
|
+
|
|
543
|
+
pairwise: list[list[int]] = []
|
|
544
|
+
pair_methods: dict[tuple[int, int], str] = {}
|
|
545
|
+
for i in range(n):
|
|
546
|
+
for j in range(i + 1, n):
|
|
547
|
+
if not norm_strings[i] or not norm_strings[j]:
|
|
548
|
+
continue
|
|
549
|
+
if norm_strings[i] == norm_strings[j]:
|
|
550
|
+
pairwise.append([i, j])
|
|
551
|
+
pair_methods[(i, j)] = "exact-normalized"
|
|
552
|
+
continue
|
|
553
|
+
score = _jaccard(token_sets[i], token_sets[j])
|
|
554
|
+
if score >= JACCARD_THRESHOLD:
|
|
555
|
+
pairwise.append([i, j])
|
|
556
|
+
pair_methods[(i, j)] = f"jaccard={score:.2f}"
|
|
557
|
+
|
|
558
|
+
flag_mechanical = _has_3_clique(pairwise, n)
|
|
559
|
+
|
|
560
|
+
return {
|
|
561
|
+
"schema_version": PHASE_2_5_SCHEMA_VERSION,
|
|
562
|
+
"run_id": run_id,
|
|
563
|
+
"run_dir": str(run_dir),
|
|
564
|
+
"sharpest_points": sharpest_points,
|
|
565
|
+
"pairwise_same": pairwise,
|
|
566
|
+
"pair_methods": {f"{i},{j}": m for (i, j), m in pair_methods.items()},
|
|
567
|
+
"flag_mechanical": flag_mechanical,
|
|
568
|
+
"jaccard_threshold": JACCARD_THRESHOLD,
|
|
569
|
+
"missing_advisors": [
|
|
570
|
+
sp["advisor"] for sp in sharpest_points if not sp["sentence"]
|
|
571
|
+
],
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def main():
|
|
576
|
+
parser = argparse.ArgumentParser(description="Emit telemetry from a Council run output.")
|
|
577
|
+
parser.add_argument("path", help="Path to the Council run markdown OR — with --phase-2-5 — a run directory.")
|
|
578
|
+
parser.add_argument("--append", action="store_true", help="Append to the JSONL log (default off — stdout only).")
|
|
579
|
+
parser.add_argument("--log", default=str(DEFAULT_LOG_PATH), help=f"JSONL log path (default: {DEFAULT_LOG_PATH}).")
|
|
580
|
+
parser.add_argument("--tokens", type=int, default=None, help="Total tokens consumed (orchestrator-supplied).")
|
|
581
|
+
parser.add_argument("--wall-clock", type=float, default=None, help="Wall-clock seconds (orchestrator-supplied).")
|
|
582
|
+
parser.add_argument("--allow-invalid", action="store_true", help="Emit a record even if validation fails.")
|
|
583
|
+
parser.add_argument("--phase-2-5", action="store_true", help="Treat <path> as a run directory; emit structured Phase 2.5 verdict (sharpest points + pairwise-same + clique flag) to stdout. No JSONL append in this mode.")
|
|
584
|
+
args = parser.parse_args()
|
|
585
|
+
|
|
586
|
+
path = Path(args.path)
|
|
587
|
+
if not path.exists():
|
|
588
|
+
print(f"❌ Path not found: {path}", file=sys.stderr)
|
|
589
|
+
sys.exit(2)
|
|
590
|
+
|
|
591
|
+
if args.phase_2_5:
|
|
592
|
+
if not path.is_dir():
|
|
593
|
+
print(f"❌ --phase-2-5 expects a run directory, got: {path}", file=sys.stderr)
|
|
594
|
+
sys.exit(2)
|
|
595
|
+
verdict = extract_phase_2_5(path)
|
|
596
|
+
print(json.dumps(verdict, ensure_ascii=False, indent=2))
|
|
597
|
+
return
|
|
598
|
+
|
|
599
|
+
md = path.read_text()
|
|
600
|
+
record = extract_record(
|
|
601
|
+
md,
|
|
602
|
+
source_path=str(path),
|
|
603
|
+
tokens=args.tokens,
|
|
604
|
+
wall_clock=args.wall_clock,
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
if not record.validation["valid"] and not args.allow_invalid:
|
|
608
|
+
print(
|
|
609
|
+
f"❌ Validation failed ({record.validation['structural_violations']} violations). "
|
|
610
|
+
f"Run validate.py for details, or pass --allow-invalid to log anyway.",
|
|
611
|
+
file=sys.stderr,
|
|
612
|
+
)
|
|
613
|
+
sys.exit(1)
|
|
614
|
+
|
|
615
|
+
json_str = json.dumps(asdict(record), ensure_ascii=False, indent=2)
|
|
616
|
+
print(json_str)
|
|
617
|
+
|
|
618
|
+
if args.append:
|
|
619
|
+
append_jsonl(record, Path(args.log))
|
|
620
|
+
print(f"📊 Appended one record to {args.log}", file=sys.stderr)
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
if __name__ == "__main__":
|
|
624
|
+
main()
|