@cleocode/skills 2026.4.161 → 2026.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/package.json +1 -1
  2. package/skills/ct-council/SKILL.md +377 -0
  3. package/skills/ct-council/optimization/HARDENING-PLAYBOOK.md +107 -0
  4. package/skills/ct-council/optimization/README.md +74 -0
  5. package/skills/ct-council/optimization/scenarios.yaml +121 -0
  6. package/skills/ct-council/optimization/scripts/campaign.py +543 -0
  7. package/skills/ct-council/optimization/scripts/test_campaign.py +143 -0
  8. package/skills/ct-council/references/chairman.md +119 -0
  9. package/skills/ct-council/references/contrarian.md +70 -0
  10. package/skills/ct-council/references/evidence-pack.md +145 -0
  11. package/skills/ct-council/references/examples.md +235 -0
  12. package/skills/ct-council/references/executor.md +83 -0
  13. package/skills/ct-council/references/expansionist.md +68 -0
  14. package/skills/ct-council/references/first-principles.md +73 -0
  15. package/skills/ct-council/references/outsider.md +73 -0
  16. package/skills/ct-council/references/peer-review.md +125 -0
  17. package/skills/ct-council/scripts/analyze_runs.py +293 -0
  18. package/skills/ct-council/scripts/fixtures/executor_multi.md +198 -0
  19. package/skills/ct-council/scripts/fixtures/missing_advisor.md +117 -0
  20. package/skills/ct-council/scripts/fixtures/missing_convergence.md +190 -0
  21. package/skills/ct-council/scripts/fixtures/thin_evidence.md +193 -0
  22. package/skills/ct-council/scripts/fixtures/valid.md +226 -0
  23. package/skills/ct-council/scripts/fixtures/valid_with_llmtxt.md +226 -0
  24. package/skills/ct-council/scripts/llmtxt_ref.py +223 -0
  25. package/skills/ct-council/scripts/run_council.py +578 -0
  26. package/skills/ct-council/scripts/telemetry.py +624 -0
  27. package/skills/ct-council/scripts/test_telemetry.py +509 -0
  28. package/skills/ct-council/scripts/test_validate.py +452 -0
  29. package/skills/ct-council/scripts/validate.py +396 -0
@@ -0,0 +1,624 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ telemetry.py — extract one JSONL record from a Council run output.
4
+
5
+ Reads a council-output.md, validates it (using validate.py), and emits a
6
+ single JSON record describing the run: question, per-advisor gate-pass
7
+ rates, peer-review disposition distribution, convergence flag, Chairman
8
+ confidence, evidence-pack size, and (optional) externally-supplied
9
+ tokens / wall-clock metrics.
10
+
11
+ Usage:
12
+ # Emit JSON to stdout (do not append to log).
13
+ python3 telemetry.py <output.md>
14
+
15
+ # Append one JSON line to .cleo/council-runs.jsonl (default log path).
16
+ python3 telemetry.py --append <output.md>
17
+
18
+ # Append to a specific log path.
19
+ python3 telemetry.py --log path/to/runs.jsonl <output.md>
20
+
21
+ # Stamp tokens / wall-clock from the orchestrator.
22
+ python3 telemetry.py --tokens 41250 --wall-clock 73.4 --append <output.md>
23
+
24
+ Exit codes:
25
+ 0 — record emitted successfully
26
+ 1 — validation failed (no record emitted)
27
+ 2 — file not found / unreadable
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import argparse
33
+ import datetime as _dt
34
+ import hashlib
35
+ import json
36
+ import re
37
+ import sys
38
+ from dataclasses import dataclass, field, asdict
39
+ from pathlib import Path
40
+
41
+ # Re-use the validator's helpers — single source of truth for parsing.
42
+ sys.path.insert(0, str(Path(__file__).resolve().parent))
43
+ from validate import ( # noqa: E402
44
+ ADVISORS,
45
+ PEER_REVIEW_ROTATION,
46
+ PEER_REVIEW_GATES,
47
+ Validator,
48
+ )
49
+
50
+
51
+ SCHEMA_VERSION = "1.0.0"
52
+ DEFAULT_LOG_PATH = Path(".cleo/council-runs.jsonl")
53
+
54
+ GATE_KEYS = ["G1", "G2", "G3", "G4"]
55
+
56
+
57
+ @dataclass
58
+ class AdvisorRecord:
59
+ gates: dict[str, str] = field(default_factory=dict) # G1..G4 → PASS|FAIL
60
+ gate_pass_count: int = 0
61
+ weight: str = "low" # full | high | moderate | low
62
+ sharpest: str | None = None
63
+ reviewer: str | None = None # who graded this advisor
64
+
65
+
66
+ @dataclass
67
+ class PeerReviewRecord:
68
+ reviewer: str
69
+ reviewee: str
70
+ disposition: str | None = None # Accept | Modify | Reject
71
+ gates_passed: int = 0
72
+
73
+
74
+ @dataclass
75
+ class TelemetryRecord:
76
+ schema_version: str
77
+ run_id: str
78
+ timestamp: str
79
+ question: str
80
+ validation: dict
81
+ evidence_pack: dict
82
+ advisors: dict[str, dict]
83
+ peer_reviews: list[dict]
84
+ convergence: dict
85
+ chairman: dict
86
+ metrics: dict
87
+
88
+
89
+ # ─── helpers (mirror validate.py's line-based, fence-aware section scan) ────
90
+
91
+ _HEADER_RE = re.compile(r"^(#{1,6})\s+(.+?)\s*$")
92
+
93
+
94
+ def _section_body(md: str, header_regex: str) -> str | None:
95
+ """Body under the first header matching header_regex; ignores ``` fences."""
96
+ lines = md.split("\n")
97
+ in_fence = False
98
+ start_line: int | None = None
99
+ start_level: int | None = None
100
+ end_line = len(lines)
101
+
102
+ for i, line in enumerate(lines):
103
+ if line.lstrip().startswith("```"):
104
+ in_fence = not in_fence
105
+ continue
106
+ if in_fence:
107
+ continue
108
+ m = _HEADER_RE.match(line)
109
+ if not m:
110
+ continue
111
+ level = len(m.group(1))
112
+ title = m.group(2).strip()
113
+ if start_line is None:
114
+ if re.match(header_regex, title):
115
+ start_line = i + 1
116
+ start_level = level
117
+ else:
118
+ if level <= start_level:
119
+ end_line = i
120
+ break
121
+
122
+ if start_line is None:
123
+ return None
124
+ return "\n".join(lines[start_line:end_line])
125
+
126
+
127
+ def _weight_from_pass_count(n: int) -> str:
128
+ if n == 4:
129
+ return "full"
130
+ if n == 3:
131
+ return "high"
132
+ if n == 2:
133
+ return "moderate"
134
+ return "low"
135
+
136
+
137
+ def _extract_question(md: str) -> str:
138
+ m = re.search(r"^#\s+The Council\s+—\s+(.+)$", md, re.MULTILINE)
139
+ return m.group(1).strip() if m else ""
140
+
141
+
142
+ def _extract_evidence_pack(md: str) -> dict:
143
+ body = _section_body(md, r"^Evidence pack$")
144
+ if body is None:
145
+ return {"count": 0, "has_llmtxt": False}
146
+ items = re.findall(
147
+ r"^\s*\d+\.\s+(.+?)(?=^\s*\d+\.\s+|\Z)", body, re.MULTILINE | re.DOTALL
148
+ )
149
+ has_llmtxt = any("llmtxt:" in item for item in items)
150
+ return {"count": len(items), "has_llmtxt": has_llmtxt}
151
+
152
+
153
+ def _extract_sharpest(advisor_body: str) -> str | None:
154
+ m = re.search(
155
+ r"\*\*Single sharpest point:\*\*\s*(.+?)(?=\n\*\*|\Z)",
156
+ advisor_body,
157
+ re.DOTALL,
158
+ )
159
+ if not m:
160
+ return None
161
+ return m.group(1).strip().splitlines()[0].strip() if m.group(1).strip() else None
162
+
163
+
164
+ def _extract_peer_review(md: str, reviewer: str, reviewee: str) -> PeerReviewRecord:
165
+ pr = PeerReviewRecord(reviewer=reviewer, reviewee=reviewee)
166
+ body = _section_body(md, rf"^{re.escape(reviewer)} reviewing {re.escape(reviewee)}$")
167
+ if body is None:
168
+ return pr
169
+
170
+ # Gate parsing: tolerate both "G1 Rigor" and the full label.
171
+ for key, full in zip(GATE_KEYS, PEER_REVIEW_GATES):
172
+ gate_re = rf"-\s+{re.escape(full)}:\s+(PASS|FAIL)\s+—"
173
+ m = re.search(gate_re, body)
174
+ if m:
175
+ pr.__dict__.setdefault("gates", {})[key] = m.group(1)
176
+ if m.group(1) == "PASS":
177
+ pr.gates_passed += 1
178
+
179
+ disp_match = re.search(r"\*\*Disposition:\*\*\s+(Accept|Modify|Reject)\b", body)
180
+ if disp_match:
181
+ pr.disposition = disp_match.group(1)
182
+
183
+ return pr
184
+
185
+
186
+ def _extract_advisor(md: str, advisor: str) -> AdvisorRecord:
187
+ rec = AdvisorRecord()
188
+ body = _section_body(md, rf"^Advisor:\s+{re.escape(advisor)}$")
189
+ if body is None:
190
+ return rec
191
+ rec.sharpest = _extract_sharpest(body)
192
+ return rec
193
+
194
+
195
+ def _annotate_advisor_with_gates(rec: AdvisorRecord, pr: PeerReviewRecord) -> None:
196
+ """Attach the peer-review gate verdicts (the reviewee's gates) onto the advisor."""
197
+ rec.reviewer = pr.reviewer
198
+ gates_map: dict[str, str] = pr.__dict__.get("gates", {})
199
+ for gate_key in GATE_KEYS:
200
+ rec.gates[gate_key] = gates_map.get(gate_key, "MISSING")
201
+ rec.gate_pass_count = sum(1 for v in rec.gates.values() if v == "PASS")
202
+ rec.weight = _weight_from_pass_count(rec.gate_pass_count)
203
+
204
+
205
+ def _extract_convergence(md: str) -> dict:
206
+ body = _section_body(md, r"^Phase 2\.5\s*[—-]\s*Convergence check$")
207
+ if body is None:
208
+ return {"flag": None, "rerun_advisors": []}
209
+ text = body.lower()
210
+ # Heuristic: "convergence flag" + a positive verb. Authors typically write
211
+ # "convergence flag raised" or "no convergence flag".
212
+ raised = bool(re.search(r"\bconvergence\b.*\b(raised|fired|triggered)\b", text))
213
+ cleared = bool(re.search(r"\bno convergence\b|\bproceeding to phase 3\b|\bdistinct subjects\b", text))
214
+ flag = True if (raised and not cleared) else (False if cleared else None)
215
+ rerun = re.findall(r"reran\s+(\w[\w \-]*?)\b", text)
216
+ return {"flag": flag, "rerun_advisors": [r.strip() for r in rerun]}
217
+
218
+
219
+ def _extract_chairman(md: str) -> dict:
220
+ body = _section_body(md, r"^Phase 3\s*[—-]\s*Chairman['’]s verdict$")
221
+ if body is None:
222
+ return {
223
+ "confidence": None,
224
+ "recommendation_present": False,
225
+ "next_action_present": False,
226
+ "open_questions_count": 0,
227
+ }
228
+ rec_match = re.search(r"###\s+Recommendation\s*\n(.+?)(?=\n###|\Z)", body, re.DOTALL)
229
+ rec_present = bool(rec_match and rec_match.group(1).strip())
230
+
231
+ action_match = re.search(r"###\s+Next 60-minute action\s*\n(.+?)(?=\n###|\Z)", body, re.DOTALL)
232
+ action_present = bool(action_match and len(action_match.group(1).strip()) >= 15)
233
+
234
+ conf = None
235
+ conf_match = re.search(r"###\s+Confidence\s*\n(.+?)(?=\n###|\Z)", body, re.DOTALL)
236
+ if conf_match:
237
+ conf_text = conf_match.group(1).strip().lower()
238
+ # Order matters: medium-high before high.
239
+ for level in ("medium-high", "medium-low", "high", "medium", "low"):
240
+ if level in conf_text:
241
+ conf = level
242
+ break
243
+
244
+ open_q_match = re.search(r"###\s+Open questions for the owner\s*\n(.+?)(?=\n###|\Z)", body, re.DOTALL)
245
+ open_q_count = 0
246
+ if open_q_match:
247
+ open_q_count = len(re.findall(r"^\s*[-*]\s+", open_q_match.group(1), re.MULTILINE))
248
+
249
+ return {
250
+ "confidence": conf,
251
+ "recommendation_present": rec_present,
252
+ "next_action_present": action_present,
253
+ "open_questions_count": open_q_count,
254
+ }
255
+
256
+
257
+ # ─── public API ─────────────────────────────────────────────────────────────
258
+
259
+
260
+ def extract_record(
261
+ md: str,
262
+ *,
263
+ source_path: str | None = None,
264
+ tokens: int | None = None,
265
+ wall_clock: float | None = None,
266
+ extra: dict | None = None,
267
+ ) -> TelemetryRecord:
268
+ v = Validator(md)
269
+ violations = v.validate()
270
+ structural = sum(1 for x in violations if x.kind == "structural")
271
+ warnings = sum(1 for x in violations if x.kind == "warning")
272
+
273
+ question = _extract_question(md)
274
+ ep = _extract_evidence_pack(md)
275
+
276
+ advisors: dict[str, AdvisorRecord] = {a: _extract_advisor(md, a) for a in ADVISORS}
277
+
278
+ peer_reviews: list[PeerReviewRecord] = []
279
+ for reviewer, reviewee in PEER_REVIEW_ROTATION:
280
+ pr = _extract_peer_review(md, reviewer, reviewee)
281
+ peer_reviews.append(pr)
282
+ _annotate_advisor_with_gates(advisors[reviewee], pr)
283
+
284
+ convergence = _extract_convergence(md)
285
+ chairman = _extract_chairman(md)
286
+
287
+ payload = md.encode("utf-8")
288
+ run_id = hashlib.sha256(payload).hexdigest()[:16]
289
+
290
+ record = TelemetryRecord(
291
+ schema_version=SCHEMA_VERSION,
292
+ run_id=run_id,
293
+ timestamp=_dt.datetime.now(tz=_dt.timezone.utc).isoformat(timespec="seconds"),
294
+ question=question,
295
+ validation={
296
+ "valid": structural == 0,
297
+ "structural_violations": structural,
298
+ "warnings": warnings,
299
+ },
300
+ evidence_pack=ep,
301
+ advisors={
302
+ name: {
303
+ "gates": rec.gates,
304
+ "gate_pass_count": rec.gate_pass_count,
305
+ "weight": rec.weight,
306
+ "sharpest": rec.sharpest,
307
+ "reviewer": rec.reviewer,
308
+ }
309
+ for name, rec in advisors.items()
310
+ },
311
+ peer_reviews=[
312
+ {
313
+ "reviewer": pr.reviewer,
314
+ "reviewee": pr.reviewee,
315
+ "disposition": pr.disposition,
316
+ "gates_passed": pr.gates_passed,
317
+ }
318
+ for pr in peer_reviews
319
+ ],
320
+ convergence=convergence,
321
+ chairman=chairman,
322
+ metrics={
323
+ "tokens": tokens,
324
+ "wall_clock_seconds": wall_clock,
325
+ "evidence_pack_count": ep["count"],
326
+ "source_path": source_path,
327
+ "size_bytes": len(payload),
328
+ **(extra or {}),
329
+ },
330
+ )
331
+ return record
332
+
333
+
334
+ def append_jsonl(record: TelemetryRecord, log_path: Path) -> None:
335
+ log_path.parent.mkdir(parents=True, exist_ok=True)
336
+ with log_path.open("a", encoding="utf-8") as f:
337
+ f.write(json.dumps(asdict(record), ensure_ascii=False) + "\n")
338
+
339
+
340
+ # ─── Verdict + TL;DR generation (post-shakedown UX fix) ─────────────────────
341
+ #
342
+ # The full output.md (Phase 0 + 5 advisors + 5 peer reviews + 2.5 + 3) is the
343
+ # audit trail — ~300-400 lines. The owner consumes the *Chairman verdict*,
344
+ # which is the last ~60 lines. Forcing the reader to scroll through the full
345
+ # transcript to reach the recommendation is a UX failure, not a content
346
+ # problem.
347
+ #
348
+ # These functions extract two leaner deliverables from a validated output.md:
349
+ # - verdict.md — the Chairman section with the question prepended (~60-80 lines)
350
+ # - tldr.md — recommendation + action + confidence (~10-15 lines)
351
+ # The full output.md is preserved as-is for the audit trail.
352
+
353
+
354
+ def _extract_chairman_section(md: str) -> str | None:
355
+ """Return the literal Phase 3 markdown body (everything under '## Phase 3 — ...')."""
356
+ body = _section_body(md, r"^Phase 3\s*[—-]\s*Chairman['’]s verdict$")
357
+ return body
358
+
359
+
360
+ def _extract_phase3_subsection(body: str, header: str) -> str | None:
361
+ """Pull a specific `### <header>` subsection's body from a Phase 3 body."""
362
+ m = re.search(
363
+ rf"###\s+{re.escape(header)}\s*\n(.+?)(?=\n###|\Z)",
364
+ body,
365
+ re.DOTALL,
366
+ )
367
+ if not m:
368
+ return None
369
+ return m.group(1).strip()
370
+
371
+
372
+ def render_verdict(md: str) -> str:
373
+ """Render verdict.md from a full output.md — Chairman section + question header.
374
+
375
+ Output is structurally a standalone decision document: H1 question, gate
376
+ summary, recommendation, conditions, action, confidence. Suitable for
377
+ direct hand-off to the owner without scrolling past upstream artifacts.
378
+ """
379
+ question = _extract_question(md) or "<question missing>"
380
+ chairman = _extract_chairman_section(md)
381
+ if chairman is None:
382
+ raise ValueError("output.md missing Phase 3 — Chairman's verdict section")
383
+ return f"# Council Verdict — {question}\n\n## Phase 3 — Chairman's verdict\n{chairman.rstrip()}\n"
384
+
385
+
386
+ def render_tldr(md: str) -> str:
387
+ """Render tldr.md — 10-15 line summary suitable for PR comments / chat.
388
+
389
+ Pulls only the load-bearing fields: recommendation, next action, confidence,
390
+ and a count of open questions / conditions. Not a substitute for the full
391
+ verdict — a *pointer* to it.
392
+ """
393
+ question = _extract_question(md) or "<question missing>"
394
+ chairman_body = _extract_chairman_section(md) or ""
395
+
396
+ rec = _extract_phase3_subsection(chairman_body, "Recommendation") or "<missing>"
397
+ action = _extract_phase3_subsection(chairman_body, "Next 60-minute action") or "<missing>"
398
+ conf = _extract_phase3_subsection(chairman_body, "Confidence") or "<missing>"
399
+ conditions = _extract_phase3_subsection(chairman_body, "Conditions on the recommendation") or ""
400
+ open_q = _extract_phase3_subsection(chairman_body, "Open questions for the owner") or ""
401
+
402
+ # Trim each to a single first paragraph / line for concision.
403
+ rec_first = _first_paragraph(rec)
404
+ action_first = _first_paragraph(action)
405
+ # Confidence: just the level (first word) + first clause, not the full justification.
406
+ conf_level_match = re.match(r"\s*(medium-high|medium-low|high|medium|low)\b", conf, re.IGNORECASE)
407
+ if conf_level_match:
408
+ conf_first = conf_level_match.group(1).lower()
409
+ else:
410
+ conf_first = _first_paragraph(conf)[:80]
411
+
412
+ cond_count = len(re.findall(r"^\s*\d+\.\s+", conditions, re.MULTILINE))
413
+ open_q_count = len(re.findall(r"^\s*\d+\.\s+|^\s*[-*]\s+", open_q, re.MULTILINE))
414
+ open_q_marker = "none" if (not open_q.strip() or "none" in open_q.lower()[:80]) else f"{open_q_count}"
415
+
416
+ lines = [
417
+ f"# Council TL;DR — {question}",
418
+ "",
419
+ f"**Recommendation** — {rec_first}",
420
+ "",
421
+ f"**Next 60-minute action** — {action_first}",
422
+ "",
423
+ f"**Confidence** — {conf_first}",
424
+ "",
425
+ f"**Conditions:** {cond_count} · **Open questions:** {open_q_marker}",
426
+ "",
427
+ "_Full verdict: `verdict.md` · Full transcript: `output.md`_",
428
+ "",
429
+ ]
430
+ return "\n".join(lines)
431
+
432
+
433
+ # ─── Phase 2.5 structured extractor (T-shakedown-1 verdict) ─────────────────
434
+ #
435
+ # The Phase 2.5 convergence detector currently emits free prose; downstream
436
+ # telemetry has to grep regex (`_extract_convergence` above). This extractor
437
+ # replaces that with a structured artifact built directly from the per-advisor
438
+ # `phase1-<advisor>.md` files in a run directory, before output.md is even
439
+ # assembled. Output schema:
440
+ #
441
+ # {
442
+ # "schema_version": "...",
443
+ # "run_id": "<8-char from run.json>",
444
+ # "sharpest_points": [{"advisor": str, "sentence": str}, ...],
445
+ # "pairwise_same": [[i, j], ...],
446
+ # "flag_mechanical": bool,
447
+ # "method": "exact-normalized | jaccard>=0.6 | 3-clique"
448
+ # }
449
+ #
450
+ # `flag_mechanical=True` iff a 3-clique exists in the pairwise-same graph —
451
+ # matching the protocol's "≥3 semantically the same finding" rule.
452
+
453
+ PHASE_2_5_SCHEMA_VERSION = "1.0.0"
454
+ JACCARD_THRESHOLD = 0.6
455
+
456
+ ADVISOR_FILE_SLUGS = {
457
+ "Contrarian": "contrarian",
458
+ "First Principles": "first-principles",
459
+ "Expansionist": "expansionist",
460
+ "Outsider": "outsider",
461
+ "Executor": "executor",
462
+ }
463
+
464
+
465
+ def _normalize_sentence(s: str) -> str:
466
+ return re.sub(r"\s+", " ", s.strip().lower())
467
+
468
+
469
+ def _tokenize(s: str) -> set[str]:
470
+ """Token bag for Jaccard — lowercase, alphanum, words ≥3 chars to drop noise."""
471
+ return {t.lower() for t in re.findall(r"\w+", s) if len(t) >= 3}
472
+
473
+
474
+ def _jaccard(a: set[str], b: set[str]) -> float:
475
+ if not a or not b:
476
+ return 0.0
477
+ return len(a & b) / len(a | b)
478
+
479
+
480
+ def _has_3_clique(pairs: list[list[int]], n: int) -> bool:
481
+ """Detect a 3-clique in the undirected same-finding graph."""
482
+ edges = {(min(i, j), max(i, j)) for i, j in pairs}
483
+ for i in range(n):
484
+ for j in range(i + 1, n):
485
+ if (i, j) not in edges:
486
+ continue
487
+ for k in range(j + 1, n):
488
+ if (i, k) in edges and (j, k) in edges:
489
+ return True
490
+ return False
491
+
492
+
493
+ def _read_sharpest(run_dir: Path, advisor: str) -> str | None:
494
+ """Locate the advisor's `**Single sharpest point:**` marker at the start of a line.
495
+
496
+ Anchoring on `^` is load-bearing — the marker text can appear inline inside
497
+ other sections (e.g. the Executor's action body referencing the marker as a
498
+ parse target). The persona's output template always places the marker at
499
+ the start of its own line, so a multiline-mode start-of-line anchor
500
+ distinguishes the structural marker from inline mentions.
501
+ """
502
+ slug = ADVISOR_FILE_SLUGS[advisor]
503
+ p = run_dir / f"phase1-{slug}.md"
504
+ if not p.exists():
505
+ return None
506
+ body = p.read_text()
507
+ m = re.search(
508
+ r"^\*\*Single sharpest point:\*\*\s*(.+?)(?=\n\*\*|\n##|\Z)",
509
+ body,
510
+ re.DOTALL | re.MULTILINE,
511
+ )
512
+ if not m:
513
+ return None
514
+ return _first_paragraph(m.group(1))
515
+
516
+
517
+ def _first_paragraph(s: str) -> str:
518
+ parts = [p.strip() for p in s.strip().split("\n\n") if p.strip()]
519
+ if not parts:
520
+ return s.strip()
521
+ return re.sub(r"\s+", " ", parts[0])
522
+
523
+
524
+ def extract_phase_2_5(run_dir: Path) -> dict:
525
+ """Read phase1-*.md files in run_dir, compute structured Phase-2.5 verdict."""
526
+ run_meta_path = run_dir / "run.json"
527
+ run_id = None
528
+ if run_meta_path.exists():
529
+ try:
530
+ run_id = json.loads(run_meta_path.read_text()).get("run_id")
531
+ except json.JSONDecodeError:
532
+ run_id = None
533
+
534
+ sharpest_points: list[dict] = []
535
+ for advisor in ADVISORS:
536
+ sentence = _read_sharpest(run_dir, advisor)
537
+ sharpest_points.append({"advisor": advisor, "sentence": sentence or ""})
538
+
539
+ n = len(sharpest_points)
540
+ norm_strings = [_normalize_sentence(p["sentence"]) for p in sharpest_points]
541
+ token_sets = [_tokenize(p["sentence"]) for p in sharpest_points]
542
+
543
+ pairwise: list[list[int]] = []
544
+ pair_methods: dict[tuple[int, int], str] = {}
545
+ for i in range(n):
546
+ for j in range(i + 1, n):
547
+ if not norm_strings[i] or not norm_strings[j]:
548
+ continue
549
+ if norm_strings[i] == norm_strings[j]:
550
+ pairwise.append([i, j])
551
+ pair_methods[(i, j)] = "exact-normalized"
552
+ continue
553
+ score = _jaccard(token_sets[i], token_sets[j])
554
+ if score >= JACCARD_THRESHOLD:
555
+ pairwise.append([i, j])
556
+ pair_methods[(i, j)] = f"jaccard={score:.2f}"
557
+
558
+ flag_mechanical = _has_3_clique(pairwise, n)
559
+
560
+ return {
561
+ "schema_version": PHASE_2_5_SCHEMA_VERSION,
562
+ "run_id": run_id,
563
+ "run_dir": str(run_dir),
564
+ "sharpest_points": sharpest_points,
565
+ "pairwise_same": pairwise,
566
+ "pair_methods": {f"{i},{j}": m for (i, j), m in pair_methods.items()},
567
+ "flag_mechanical": flag_mechanical,
568
+ "jaccard_threshold": JACCARD_THRESHOLD,
569
+ "missing_advisors": [
570
+ sp["advisor"] for sp in sharpest_points if not sp["sentence"]
571
+ ],
572
+ }
573
+
574
+
575
+ def main():
576
+ parser = argparse.ArgumentParser(description="Emit telemetry from a Council run output.")
577
+ parser.add_argument("path", help="Path to the Council run markdown OR — with --phase-2-5 — a run directory.")
578
+ parser.add_argument("--append", action="store_true", help="Append to the JSONL log (default off — stdout only).")
579
+ parser.add_argument("--log", default=str(DEFAULT_LOG_PATH), help=f"JSONL log path (default: {DEFAULT_LOG_PATH}).")
580
+ parser.add_argument("--tokens", type=int, default=None, help="Total tokens consumed (orchestrator-supplied).")
581
+ parser.add_argument("--wall-clock", type=float, default=None, help="Wall-clock seconds (orchestrator-supplied).")
582
+ parser.add_argument("--allow-invalid", action="store_true", help="Emit a record even if validation fails.")
583
+ parser.add_argument("--phase-2-5", action="store_true", help="Treat <path> as a run directory; emit structured Phase 2.5 verdict (sharpest points + pairwise-same + clique flag) to stdout. No JSONL append in this mode.")
584
+ args = parser.parse_args()
585
+
586
+ path = Path(args.path)
587
+ if not path.exists():
588
+ print(f"❌ Path not found: {path}", file=sys.stderr)
589
+ sys.exit(2)
590
+
591
+ if args.phase_2_5:
592
+ if not path.is_dir():
593
+ print(f"❌ --phase-2-5 expects a run directory, got: {path}", file=sys.stderr)
594
+ sys.exit(2)
595
+ verdict = extract_phase_2_5(path)
596
+ print(json.dumps(verdict, ensure_ascii=False, indent=2))
597
+ return
598
+
599
+ md = path.read_text()
600
+ record = extract_record(
601
+ md,
602
+ source_path=str(path),
603
+ tokens=args.tokens,
604
+ wall_clock=args.wall_clock,
605
+ )
606
+
607
+ if not record.validation["valid"] and not args.allow_invalid:
608
+ print(
609
+ f"❌ Validation failed ({record.validation['structural_violations']} violations). "
610
+ f"Run validate.py for details, or pass --allow-invalid to log anyway.",
611
+ file=sys.stderr,
612
+ )
613
+ sys.exit(1)
614
+
615
+ json_str = json.dumps(asdict(record), ensure_ascii=False, indent=2)
616
+ print(json_str)
617
+
618
+ if args.append:
619
+ append_jsonl(record, Path(args.log))
620
+ print(f"📊 Appended one record to {args.log}", file=sys.stderr)
621
+
622
+
623
+ if __name__ == "__main__":
624
+ main()