@cleocode/skills 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/dispatch-config.json +404 -0
  2. package/index.d.ts +178 -0
  3. package/index.js +405 -0
  4. package/package.json +14 -0
  5. package/profiles/core.json +7 -0
  6. package/profiles/full.json +10 -0
  7. package/profiles/minimal.json +7 -0
  8. package/profiles/recommended.json +7 -0
  9. package/provider-skills-map.json +97 -0
  10. package/skills/_shared/cleo-style-guide.md +84 -0
  11. package/skills/_shared/manifest-operations.md +810 -0
  12. package/skills/_shared/placeholders.json +433 -0
  13. package/skills/_shared/skill-chaining-patterns.md +237 -0
  14. package/skills/_shared/subagent-protocol-base.md +223 -0
  15. package/skills/_shared/task-system-integration.md +232 -0
  16. package/skills/_shared/testing-framework-config.md +110 -0
  17. package/skills/ct-cleo/SKILL.md +490 -0
  18. package/skills/ct-cleo/references/anti-patterns.md +19 -0
  19. package/skills/ct-cleo/references/loom-lifecycle.md +136 -0
  20. package/skills/ct-cleo/references/orchestrator-constraints.md +55 -0
  21. package/skills/ct-cleo/references/session-protocol.md +162 -0
  22. package/skills/ct-codebase-mapper/SKILL.md +82 -0
  23. package/skills/ct-contribution/SKILL.md +521 -0
  24. package/skills/ct-contribution/templates/contribution-init.json +21 -0
  25. package/skills/ct-dev-workflow/SKILL.md +423 -0
  26. package/skills/ct-docs-lookup/SKILL.md +66 -0
  27. package/skills/ct-docs-review/SKILL.md +175 -0
  28. package/skills/ct-docs-write/SKILL.md +108 -0
  29. package/skills/ct-documentor/SKILL.md +231 -0
  30. package/skills/ct-epic-architect/SKILL.md +305 -0
  31. package/skills/ct-epic-architect/references/bug-epic-example.md +172 -0
  32. package/skills/ct-epic-architect/references/commands.md +201 -0
  33. package/skills/ct-epic-architect/references/feature-epic-example.md +210 -0
  34. package/skills/ct-epic-architect/references/migration-epic-example.md +244 -0
  35. package/skills/ct-epic-architect/references/output-format.md +92 -0
  36. package/skills/ct-epic-architect/references/patterns.md +284 -0
  37. package/skills/ct-epic-architect/references/refactor-epic-example.md +412 -0
  38. package/skills/ct-epic-architect/references/research-epic-example.md +226 -0
  39. package/skills/ct-epic-architect/references/shell-escaping.md +86 -0
  40. package/skills/ct-epic-architect/references/skill-aware-execution.md +195 -0
  41. package/skills/ct-grade/SKILL.md +230 -0
  42. package/skills/ct-grade/agents/analysis-reporter.md +203 -0
  43. package/skills/ct-grade/agents/blind-comparator.md +157 -0
  44. package/skills/ct-grade/agents/scenario-runner.md +134 -0
  45. package/skills/ct-grade/eval-viewer/__pycache__/generate_grade_review.cpython-314.pyc +0 -0
  46. package/skills/ct-grade/eval-viewer/generate_grade_review.py +1138 -0
  47. package/skills/ct-grade/eval-viewer/generate_grade_viewer.py +544 -0
  48. package/skills/ct-grade/eval-viewer/generate_review.py +283 -0
  49. package/skills/ct-grade/eval-viewer/grade-review.html +1574 -0
  50. package/skills/ct-grade/eval-viewer/viewer.html +219 -0
  51. package/skills/ct-grade/evals/evals.json +94 -0
  52. package/skills/ct-grade/references/ab-test-methodology.md +150 -0
  53. package/skills/ct-grade/references/domains.md +137 -0
  54. package/skills/ct-grade/references/grade-spec.md +236 -0
  55. package/skills/ct-grade/references/scenario-playbook.md +234 -0
  56. package/skills/ct-grade/references/token-tracking.md +120 -0
  57. package/skills/ct-grade/scripts/__pycache__/audit_analyzer.cpython-314.pyc +0 -0
  58. package/skills/ct-grade/scripts/__pycache__/run_ab_test.cpython-314.pyc +0 -0
  59. package/skills/ct-grade/scripts/__pycache__/run_all.cpython-314.pyc +0 -0
  60. package/skills/ct-grade/scripts/__pycache__/token_tracker.cpython-314.pyc +0 -0
  61. package/skills/ct-grade/scripts/audit_analyzer.py +279 -0
  62. package/skills/ct-grade/scripts/generate_report.py +283 -0
  63. package/skills/ct-grade/scripts/run_ab_test.py +504 -0
  64. package/skills/ct-grade/scripts/run_all.py +287 -0
  65. package/skills/ct-grade/scripts/setup_run.py +183 -0
  66. package/skills/ct-grade/scripts/token_tracker.py +630 -0
  67. package/skills/ct-grade-v2-1/SKILL.md +237 -0
  68. package/skills/ct-grade-v2-1/agents/analysis-reporter.md +203 -0
  69. package/skills/ct-grade-v2-1/agents/blind-comparator.md +157 -0
  70. package/skills/ct-grade-v2-1/agents/scenario-runner.md +179 -0
  71. package/skills/ct-grade-v2-1/evals/evals.json +74 -0
  72. package/skills/ct-grade-v2-1/grade-viewer/__pycache__/build_op_stats.cpython-314.pyc +0 -0
  73. package/skills/ct-grade-v2-1/grade-viewer/__pycache__/generate_grade_review.cpython-314.pyc +0 -0
  74. package/skills/ct-grade-v2-1/grade-viewer/build_op_stats.py +174 -0
  75. package/skills/ct-grade-v2-1/grade-viewer/eval-analysis.json +41 -0
  76. package/skills/ct-grade-v2-1/grade-viewer/eval-report.md +34 -0
  77. package/skills/ct-grade-v2-1/grade-viewer/generate_grade_review.py +1023 -0
  78. package/skills/ct-grade-v2-1/grade-viewer/generate_grade_viewer.py +548 -0
  79. package/skills/ct-grade-v2-1/grade-viewer/grade-review-eval.html +613 -0
  80. package/skills/ct-grade-v2-1/grade-viewer/grade-review.html +1532 -0
  81. package/skills/ct-grade-v2-1/grade-viewer/viewer.html +620 -0
  82. package/skills/ct-grade-v2-1/manifest-entry.json +31 -0
  83. package/skills/ct-grade-v2-1/references/ab-testing.md +233 -0
  84. package/skills/ct-grade-v2-1/references/domains-ssot.md +156 -0
  85. package/skills/ct-grade-v2-1/references/grade-spec-v2.md +167 -0
  86. package/skills/ct-grade-v2-1/references/playbook-v2.md +393 -0
  87. package/skills/ct-grade-v2-1/references/token-tracking.md +202 -0
  88. package/skills/ct-grade-v2-1/scripts/generate_report.py +419 -0
  89. package/skills/ct-grade-v2-1/scripts/run_ab_test.py +493 -0
  90. package/skills/ct-grade-v2-1/scripts/run_scenario.py +396 -0
  91. package/skills/ct-grade-v2-1/scripts/setup_run.py +207 -0
  92. package/skills/ct-grade-v2-1/scripts/token_tracker.py +175 -0
  93. package/skills/ct-memory/SKILL.md +84 -0
  94. package/skills/ct-orchestrator/INSTALL.md +61 -0
  95. package/skills/ct-orchestrator/README.md +69 -0
  96. package/skills/ct-orchestrator/SKILL.md +380 -0
  97. package/skills/ct-orchestrator/manifest-entry.json +19 -0
  98. package/skills/ct-orchestrator/orchestrator-prompt.txt +17 -0
  99. package/skills/ct-orchestrator/references/SUBAGENT-PROTOCOL-BLOCK.md +66 -0
  100. package/skills/ct-orchestrator/references/autonomous-operation.md +167 -0
  101. package/skills/ct-orchestrator/references/lifecycle-gates.md +98 -0
  102. package/skills/ct-orchestrator/references/orchestrator-compliance.md +271 -0
  103. package/skills/ct-orchestrator/references/orchestrator-handoffs.md +85 -0
  104. package/skills/ct-orchestrator/references/orchestrator-patterns.md +164 -0
  105. package/skills/ct-orchestrator/references/orchestrator-recovery.md +113 -0
  106. package/skills/ct-orchestrator/references/orchestrator-spawning.md +271 -0
  107. package/skills/ct-orchestrator/references/orchestrator-tokens.md +180 -0
  108. package/skills/ct-research-agent/SKILL.md +226 -0
  109. package/skills/ct-skill-creator/.cleo/.context-state.json +13 -0
  110. package/skills/ct-skill-creator/.cleo/logs/cleo.2026-03-07.1.log +24 -0
  111. package/skills/ct-skill-creator/.cleo/tasks.db +0 -0
  112. package/skills/ct-skill-creator/SKILL.md +356 -0
  113. package/skills/ct-skill-creator/agents/analyzer.md +276 -0
  114. package/skills/ct-skill-creator/agents/comparator.md +204 -0
  115. package/skills/ct-skill-creator/agents/grader.md +225 -0
  116. package/skills/ct-skill-creator/assets/eval_review.html +146 -0
  117. package/skills/ct-skill-creator/eval-viewer/__pycache__/generate_review.cpython-314.pyc +0 -0
  118. package/skills/ct-skill-creator/eval-viewer/generate_review.py +471 -0
  119. package/skills/ct-skill-creator/eval-viewer/viewer.html +1325 -0
  120. package/skills/ct-skill-creator/manifest-entry.json +17 -0
  121. package/skills/ct-skill-creator/references/dynamic-context.md +228 -0
  122. package/skills/ct-skill-creator/references/frontmatter.md +83 -0
  123. package/skills/ct-skill-creator/references/invocation-control.md +165 -0
  124. package/skills/ct-skill-creator/references/output-patterns.md +86 -0
  125. package/skills/ct-skill-creator/references/provider-deployment.md +175 -0
  126. package/skills/ct-skill-creator/references/schemas.md +430 -0
  127. package/skills/ct-skill-creator/references/workflows.md +28 -0
  128. package/skills/ct-skill-creator/scripts/__init__.py +1 -0
  129. package/skills/ct-skill-creator/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
  130. package/skills/ct-skill-creator/scripts/__pycache__/aggregate_benchmark.cpython-314.pyc +0 -0
  131. package/skills/ct-skill-creator/scripts/__pycache__/generate_report.cpython-314.pyc +0 -0
  132. package/skills/ct-skill-creator/scripts/__pycache__/improve_description.cpython-314.pyc +0 -0
  133. package/skills/ct-skill-creator/scripts/__pycache__/init_skill.cpython-314.pyc +0 -0
  134. package/skills/ct-skill-creator/scripts/__pycache__/quick_validate.cpython-314.pyc +0 -0
  135. package/skills/ct-skill-creator/scripts/__pycache__/run_eval.cpython-314.pyc +0 -0
  136. package/skills/ct-skill-creator/scripts/__pycache__/run_loop.cpython-314.pyc +0 -0
  137. package/skills/ct-skill-creator/scripts/__pycache__/utils.cpython-314.pyc +0 -0
  138. package/skills/ct-skill-creator/scripts/aggregate_benchmark.py +401 -0
  139. package/skills/ct-skill-creator/scripts/generate_report.py +326 -0
  140. package/skills/ct-skill-creator/scripts/improve_description.py +247 -0
  141. package/skills/ct-skill-creator/scripts/init_skill.py +306 -0
  142. package/skills/ct-skill-creator/scripts/package_skill.py +110 -0
  143. package/skills/ct-skill-creator/scripts/quick_validate.py +97 -0
  144. package/skills/ct-skill-creator/scripts/run_eval.py +310 -0
  145. package/skills/ct-skill-creator/scripts/run_loop.py +328 -0
  146. package/skills/ct-skill-creator/scripts/utils.py +47 -0
  147. package/skills/ct-skill-validator/SKILL.md +178 -0
  148. package/skills/ct-skill-validator/agents/ecosystem-checker.md +151 -0
  149. package/skills/ct-skill-validator/assets/valid-skill-example.md +13 -0
  150. package/skills/ct-skill-validator/evals/eval_set.json +14 -0
  151. package/skills/ct-skill-validator/evals/evals.json +52 -0
  152. package/skills/ct-skill-validator/manifest-entry.json +20 -0
  153. package/skills/ct-skill-validator/references/cleo-ecosystem-rules.md +163 -0
  154. package/skills/ct-skill-validator/references/validation-rules.md +168 -0
  155. package/skills/ct-skill-validator/scripts/__init__.py +0 -0
  156. package/skills/ct-skill-validator/scripts/__pycache__/audit_body.cpython-314.pyc +0 -0
  157. package/skills/ct-skill-validator/scripts/__pycache__/check_ecosystem.cpython-314.pyc +0 -0
  158. package/skills/ct-skill-validator/scripts/__pycache__/generate_validation_report.cpython-314.pyc +0 -0
  159. package/skills/ct-skill-validator/scripts/__pycache__/validate.cpython-314.pyc +0 -0
  160. package/skills/ct-skill-validator/scripts/audit_body.py +242 -0
  161. package/skills/ct-skill-validator/scripts/check_ecosystem.py +169 -0
  162. package/skills/ct-skill-validator/scripts/check_manifest.py +172 -0
  163. package/skills/ct-skill-validator/scripts/generate_validation_report.py +442 -0
  164. package/skills/ct-skill-validator/scripts/validate.py +422 -0
  165. package/skills/ct-spec-writer/SKILL.md +189 -0
  166. package/skills/ct-stickynote/README.md +14 -0
  167. package/skills/ct-stickynote/SKILL.md +46 -0
  168. package/skills/ct-task-executor/SKILL.md +296 -0
  169. package/skills/ct-validator/SKILL.md +216 -0
  170. package/skills/manifest.json +469 -0
  171. package/skills.json +281 -0
@@ -0,0 +1,1023 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Generate and serve the CLEO Grade Review viewer (v2.1 — SQLite-backed).
4
+
5
+ Reads grade results from GRADES.jsonl, session/token data from tasks.db,
6
+ grade run manifests from .cleo/metrics/grade-runs/, and eval results from
7
+ evals.json + grading.json files. Embeds all data into grade-review.html
8
+ as JSON in the {{EMBEDDED_DATA}} placeholder and serves via stdlib HTTP.
9
+
10
+ Sources (auto-discovered under workspace):
11
+ - .cleo/metrics/GRADES.jsonl (historical grade results)
12
+ - .cleo/tasks.db (sessions, audit_log, token_usage)
13
+ - .cleo/metrics/grade-runs/ (run manifests, summaries, operations)
14
+ - evals/evals.json + grading.json (eval report results)
15
+
16
+ Usage:
17
+ python generate_grade_review.py <workspace-path> [options]
18
+
19
+ Options:
20
+ --port PORT HTTP port (default: 3119)
21
+ --static PATH Write standalone HTML to file instead of serving
22
+ --skill-dir PATH Override skill directory (default: auto-detect)
23
+ --no-browser Don't auto-open browser
24
+ """
25
+
26
+ import argparse
27
+ import json
28
+ import os
29
+ import signal
30
+ import subprocess
31
+ import sys
32
+ import time
33
+ import webbrowser
34
+ from datetime import datetime, timezone
35
+ from http.server import BaseHTTPRequestHandler, HTTPServer
36
+ from pathlib import Path
37
+ from urllib.parse import parse_qs, urlparse
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Workspace discovery
42
+ # ---------------------------------------------------------------------------
43
+
44
+ def find_workspace(start='.'):
45
+ """Walk up from start to find directory containing .cleo/tasks.db."""
46
+ p = Path(start).resolve()
47
+ while p != p.parent:
48
+ if (p / '.cleo' / 'tasks.db').exists():
49
+ return p
50
+ p = p.parent
51
+ return Path(start).resolve()
52
+
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Data loaders
56
+ # ---------------------------------------------------------------------------
57
+
58
+ def load_grades_jsonl(path):
59
+ """Load GRADES.jsonl. Returns list of grade result dicts."""
60
+ p = Path(path)
61
+ if not p.exists():
62
+ return []
63
+ results = []
64
+ for line in p.read_text(errors='replace').splitlines():
65
+ line = line.strip()
66
+ if not line:
67
+ continue
68
+ try:
69
+ results.append(json.loads(line))
70
+ except Exception:
71
+ pass
72
+ return results
73
+
74
+
75
+ def scoreToLetter(score, max_score=100):
76
+ if score is None: return None
77
+ pct = (score / max_score) * 100 if max_score else 0
78
+ if pct >= 90: return 'A'
79
+ if pct >= 80: return 'B'
80
+ if pct >= 70: return 'C'
81
+ if pct >= 60: return 'D'
82
+ return 'F'
83
+
84
+
85
+ def load_sessions(workspace):
86
+ """Load all sessions from SQLite with audit_log stats and token_usage totals."""
87
+ import sqlite3
88
+ db = Path(workspace) / '.cleo' / 'tasks.db'
89
+ if not db.exists():
90
+ return []
91
+ try:
92
+ conn = sqlite3.connect(str(db), timeout=5)
93
+ conn.row_factory = sqlite3.Row
94
+ cur = conn.cursor()
95
+ cur.execute("""
96
+ SELECT
97
+ s.id, s.name, s.status, s.scope_json,
98
+ s.started_at, s.ended_at, s.resume_count,
99
+ s.previous_session_id, s.next_session_id, s.agent_identifier,
100
+ s.grade_mode, s.stats_json,
101
+ COUNT(a.id) as audit_entries,
102
+ SUM(CASE WHEN a.gateway = 'query' AND a.source = 'mcp' THEN 1 ELSE 0 END) as mcp_calls,
103
+ SUM(CASE WHEN a.source = 'cli' THEN 1 ELSE 0 END) as cli_calls
104
+ FROM sessions s
105
+ LEFT JOIN audit_log a ON a.session_id = s.id
106
+ GROUP BY s.id
107
+ ORDER BY s.started_at DESC
108
+ """)
109
+ sessions = [dict(row) for row in cur.fetchall()]
110
+
111
+ # Parse stats_json, scope_json, and add chain/agent fields
112
+ for s in sessions:
113
+ stats = {}
114
+ try:
115
+ if s.get('stats_json'): stats = json.loads(s['stats_json'])
116
+ except Exception: pass
117
+ # Parse scope_json
118
+ scope_raw = s.get('scope_json', '{}')
119
+ try:
120
+ scope_obj = json.loads(scope_raw) if scope_raw else {}
121
+ except Exception:
122
+ scope_obj = {}
123
+ s['scope_type'] = scope_obj.get('type', '')
124
+ s['scope_root_task_id'] = scope_obj.get('rootTaskId', '')
125
+ s.pop('scope_json', None)
126
+ s['totalActiveMinutes'] = stats.get('totalActiveMinutes', 0)
127
+ s['tasksCompleted'] = stats.get('tasksCompleted', 0)
128
+ s['tasksCreated'] = stats.get('tasksCreated', 0)
129
+ s['previousSessionId'] = s.pop('previous_session_id', None)
130
+ s['nextSessionId'] = s.pop('next_session_id', None)
131
+ s['agentIdentifier'] = s.pop('agent_identifier', None)
132
+ s['gradeMode'] = bool(s.pop('grade_mode', False))
133
+ s.pop('stats_json', None)
134
+
135
+ # Get token totals per session
136
+ try:
137
+ cur.execute("""
138
+ SELECT session_id, SUM(total_tokens) as total_tokens, COUNT(*) as token_records
139
+ FROM token_usage
140
+ GROUP BY session_id
141
+ """)
142
+ token_map = {row['session_id']: dict(row) for row in cur.fetchall()}
143
+ for s in sessions:
144
+ tok = token_map.get(s['id'], {})
145
+ s['total_tokens'] = tok.get('total_tokens', 0)
146
+ s['token_records'] = tok.get('token_records', 0)
147
+ except Exception:
148
+ for s in sessions:
149
+ s['total_tokens'] = 0
150
+ s['token_records'] = 0
151
+
152
+ conn.close()
153
+
154
+ # Map to camelCase keys expected by the HTML viewer
155
+ mapped = []
156
+ for s in sessions:
157
+ started = s.get('started_at', '')
158
+ ended = s.get('ended_at', '')
159
+ duration_ms = None
160
+ if started and ended:
161
+ try:
162
+ t0 = datetime.fromisoformat(started.replace('Z', '+00:00'))
163
+ t1 = datetime.fromisoformat(ended.replace('Z', '+00:00'))
164
+ duration_ms = int((t1 - t0).total_seconds() * 1000)
165
+ except Exception:
166
+ pass
167
+ mapped.append({
168
+ 'sessionId': s.get('id', ''),
169
+ 'name': s.get('name', ''),
170
+ 'status': s.get('status', ''),
171
+ 'scope': s.get('scope_type', ''),
172
+ 'scopeRootTaskId': s.get('scope_root_task_id', ''),
173
+ 'startedAt': started,
174
+ 'endedAt': ended,
175
+ 'durationMs': duration_ms,
176
+ 'resumeCount': s.get('resume_count', 0),
177
+ 'tasksCompleted': s.get('tasksCompleted', 0),
178
+ 'tasksCreated': s.get('tasksCreated', 0),
179
+ 'auditEntries': s.get('audit_entries', 0),
180
+ 'mcpCalls': s.get('mcp_calls', 0),
181
+ 'cliCalls': s.get('cli_calls', 0),
182
+ 'totalTokens': s.get('total_tokens', 0),
183
+ 'tokenRecords': s.get('token_records', 0),
184
+ 'totalActiveMinutes': s.get('totalActiveMinutes', 0),
185
+ 'previousSessionId': s.get('previousSessionId'),
186
+ 'nextSessionId': s.get('nextSessionId'),
187
+ 'agentIdentifier': s.get('agentIdentifier'),
188
+ 'gradeMode': s.get('gradeMode', False),
189
+ })
190
+
191
+ # Enrich sessions with grade data from GRADES.jsonl
192
+ grades_path = Path(workspace) / '.cleo' / 'metrics' / 'GRADES.jsonl'
193
+ if grades_path.exists():
194
+ grade_map = {} # sessionId -> best grade result
195
+ for line in grades_path.read_text(errors='replace').splitlines():
196
+ line = line.strip()
197
+ if not line: continue
198
+ try:
199
+ g = json.loads(line)
200
+ sid = g.get('sessionId', '')
201
+ if sid:
202
+ existing = grade_map.get(sid)
203
+ if existing is None or (g.get('totalScore', 0) or 0) > (existing.get('totalScore', 0) or 0):
204
+ grade_map[sid] = g
205
+ except Exception:
206
+ pass
207
+ for s in mapped:
208
+ sid = s.get('sessionId') or s.get('id', '')
209
+ if sid in grade_map:
210
+ g = grade_map[sid]
211
+ score = g.get('totalScore')
212
+ max_score = g.get('maxScore', 100)
213
+ s['gradeScore'] = score
214
+ s['gradeLetter'] = scoreToLetter(score, max_score) if score is not None else None
215
+ s['gradeDetails'] = g.get('dimensions', {})
216
+ s['gradeFlags'] = g.get('flags', [])
217
+ s['gradeTimestamp'] = g.get('timestamp', '')
218
+
219
+ return mapped
220
+ except Exception:
221
+ return []
222
+
223
+
224
+ def load_token_analysis(workspace):
225
+ """Query token_usage table for transport and domain breakdowns."""
226
+ import sqlite3
227
+ db = Path(workspace) / '.cleo' / 'tasks.db'
228
+ if not db.exists():
229
+ return {'by_transport': {}, 'by_domain': {}, 'confidence': 'coarse', 'total_records': 0}
230
+ try:
231
+ conn = sqlite3.connect(str(db), timeout=5)
232
+ conn.row_factory = sqlite3.Row
233
+ cur = conn.cursor()
234
+
235
+ # By transport
236
+ cur.execute("""
237
+ SELECT transport, method, confidence,
238
+ SUM(total_tokens) as total,
239
+ COUNT(*) as count,
240
+ AVG(total_tokens) as avg
241
+ FROM token_usage GROUP BY transport
242
+ """)
243
+ by_transport = {}
244
+ for row in cur.fetchall():
245
+ by_transport[row['transport']] = dict(row)
246
+
247
+ # By domain
248
+ cur.execute("""
249
+ SELECT domain, SUM(total_tokens) as total, COUNT(*) as count
250
+ FROM token_usage GROUP BY domain
251
+ """)
252
+ by_domain = {}
253
+ for row in cur.fetchall():
254
+ by_domain[row['domain']] = dict(row)
255
+
256
+ # Overall confidence (most common)
257
+ cur.execute("""
258
+ SELECT confidence, COUNT(*) as cnt FROM token_usage
259
+ GROUP BY confidence ORDER BY cnt DESC LIMIT 1
260
+ """)
261
+ row = cur.fetchone()
262
+ confidence = row['confidence'] if row else 'coarse'
263
+
264
+ cur.execute("SELECT COUNT(*) as total FROM token_usage")
265
+ total_records = cur.fetchone()['total']
266
+
267
+ conn.close()
268
+ return {
269
+ 'by_transport': by_transport,
270
+ 'by_domain': by_domain,
271
+ 'confidence': confidence,
272
+ 'total_records': total_records,
273
+ }
274
+ except Exception:
275
+ return {'by_transport': {}, 'by_domain': {}, 'confidence': 'coarse', 'total_records': 0}
276
+
277
+
278
+ def load_grade_runs(workspace):
279
+ """Scan .cleo/metrics/grade-runs/ for run-manifest.json files."""
280
+ runs_dir = Path(workspace) / '.cleo' / 'metrics' / 'grade-runs'
281
+ runs = []
282
+ if not runs_dir.exists():
283
+ return runs
284
+ for run_dir in sorted(runs_dir.iterdir(), reverse=True):
285
+ if not run_dir.is_dir():
286
+ continue
287
+ manifest_path = run_dir / 'run-manifest.json'
288
+ if manifest_path.exists():
289
+ try:
290
+ manifest = json.loads(manifest_path.read_text())
291
+ summary = None
292
+ summary_path = run_dir / 'summary.json'
293
+ if summary_path.exists():
294
+ try:
295
+ summary = json.loads(summary_path.read_text())
296
+ except Exception:
297
+ pass
298
+ runs.append({
299
+ 'runId': run_dir.name,
300
+ 'manifest': manifest,
301
+ 'summary': summary,
302
+ })
303
+ except Exception:
304
+ pass
305
+ return runs
306
+
307
+
308
+ def compute_per_operation_stats(workspace):
309
+ """Aggregate operations.jsonl files from all grade runs."""
310
+ runs_dir = Path(workspace) / '.cleo' / 'metrics' / 'grade-runs'
311
+ stats = {}
312
+ if not runs_dir.exists():
313
+ return stats
314
+ for ops_file in runs_dir.rglob('operations.jsonl'):
315
+ for line in ops_file.read_text(errors='replace').splitlines():
316
+ if not line.strip():
317
+ continue
318
+ try:
319
+ entry = json.loads(line)
320
+ key = '{}.{}'.format(
321
+ entry.get('domain', 'unknown'),
322
+ entry.get('operation', 'unknown'),
323
+ )
324
+ iface = entry.get('interface', 'mcp')
325
+ duration = entry.get('duration_ms', 0) or 0
326
+ if key not in stats:
327
+ stats[key] = {
328
+ 'mcp_calls': 0, 'cli_calls': 0,
329
+ 'total_mcp_ms': 0, 'total_cli_ms': 0,
330
+ }
331
+ if iface == 'cli':
332
+ stats[key]['cli_calls'] += 1
333
+ stats[key]['total_cli_ms'] += duration
334
+ else:
335
+ stats[key]['mcp_calls'] += 1
336
+ stats[key]['total_mcp_ms'] += duration
337
+ except Exception:
338
+ pass
339
+ # Compute averages
340
+ for v in stats.values():
341
+ v['avg_mcp_ms'] = round(v['total_mcp_ms'] / v['mcp_calls'], 1) if v['mcp_calls'] > 0 else 0
342
+ v['avg_cli_ms'] = round(v['total_cli_ms'] / v['cli_calls'], 1) if v['cli_calls'] > 0 else 0
343
+ return stats
344
+
345
+
346
+ def load_eval_report(workspace, skill_dir=None):
347
+ """Load grading results and attach eval names from evals.json."""
348
+ evals_def = []
349
+ if skill_dir:
350
+ evals_path = Path(skill_dir) / 'evals' / 'evals.json'
351
+ if evals_path.exists():
352
+ try:
353
+ evals_def = json.loads(evals_path.read_text())
354
+ except Exception:
355
+ pass
356
+
357
+ id_to_name = {}
358
+ for e in evals_def:
359
+ if isinstance(e, dict):
360
+ eid = e.get('id', '')
361
+ id_to_name[eid] = (e.get('description', e.get('prompt', '')))[:80]
362
+
363
+ # Find grading.json files under workspace (including eval-results dir)
364
+ results = []
365
+ seen_paths = set()
366
+ for search_root in [Path(workspace), Path(workspace) / '.cleo' / 'metrics' / 'eval-results']:
367
+ if not search_root.exists():
368
+ continue
369
+ for grading_file in search_root.rglob('grading.json'):
370
+ real_path = str(grading_file.resolve())
371
+ if real_path in seen_paths:
372
+ continue
373
+ seen_paths.add(real_path)
374
+ try:
375
+ data = json.loads(grading_file.read_text())
376
+ eval_id = data.get('evalId', '')
377
+ data['_name'] = id_to_name.get(eval_id, '')
378
+ results.append(data)
379
+ except Exception:
380
+ pass
381
+ # Also scan eval-results for <id>-grading.json files
382
+ eval_results_dir = Path(workspace) / '.cleo' / 'metrics' / 'eval-results'
383
+ if eval_results_dir.exists():
384
+ for gf in eval_results_dir.glob('*-grading.json'):
385
+ real_path = str(gf.resolve())
386
+ if real_path in seen_paths:
387
+ continue
388
+ seen_paths.add(real_path)
389
+ try:
390
+ data = json.loads(gf.read_text())
391
+ eval_id = data.get('evalId', '')
392
+ data['_name'] = id_to_name.get(eval_id, '')
393
+ results.append(data)
394
+ except Exception:
395
+ pass
396
+
397
+ # Synthesize grading.json from real grades for eval coverage
398
+ grades_path = Path(workspace) / '.cleo' / 'metrics' / 'GRADES.jsonl'
399
+ real_grades = []
400
+ if grades_path.exists():
401
+ for line in grades_path.read_text(errors='replace').splitlines():
402
+ line = line.strip()
403
+ if not line: continue
404
+ try: real_grades.append(json.loads(line))
405
+ except Exception: pass
406
+
407
+ # Map eval expectations to grade dimensions
408
+ eval_grade_map = {
409
+ 'eval-001': lambda g: g.get('totalScore', 0) > 0,
410
+ 'eval-002': lambda g: (g.get('dimensions', {}).get('sessionDiscipline', {}).get('score', 0) if isinstance(g.get('dimensions'), dict) else 0) >= 18,
411
+ 'eval-003': lambda g: (g.get('dimensions', {}).get('taskEfficiency', {}).get('score', 0) if isinstance(g.get('dimensions'), dict) else 0) >= 15,
412
+ 'eval-004': lambda g: (g.get('dimensions', {}).get('taskHygiene', {}).get('score', 0) if isinstance(g.get('dimensions'), dict) else 0) >= 18,
413
+ 'eval-005': lambda g: (g.get('dimensions', {}).get('protocolAdherence', {}).get('score', 0) if isinstance(g.get('dimensions'), dict) else 0) >= 15,
414
+ 'eval-006': lambda g: (g.get('dimensions', {}).get('mcpGateway', {}).get('score', 0) if isinstance(g.get('dimensions'), dict) else 0) >= 15,
415
+ 'eval-007': lambda g: g.get('totalScore', 0) >= 60,
416
+ }
417
+
418
+ # Write grading.json for each eval based on real grade data (only if not already found)
419
+ run_ids = {r.get('evalId') for r in results}
420
+ if real_grades:
421
+ evals_output_dir = Path(workspace) / '.cleo' / 'metrics' / 'eval-results'
422
+ evals_output_dir.mkdir(parents=True, exist_ok=True)
423
+
424
+ for e in evals_def:
425
+ eval_id = e.get('id', '')
426
+ if eval_id in run_ids:
427
+ continue # already have results
428
+ checker = eval_grade_map.get(eval_id)
429
+ if checker:
430
+ passing = [g for g in real_grades if checker(g)]
431
+ total = len(real_grades)
432
+ pass_count = len(passing)
433
+ grading_data = {
434
+ 'evalId': eval_id,
435
+ 'name': id_to_name.get(eval_id, ''),
436
+ 'totalRuns': total,
437
+ 'passed': pass_count,
438
+ 'failed': total - pass_count,
439
+ 'passRate': round(pass_count / total, 3) if total else 0,
440
+ 'expectations': e.get('expectations', []),
441
+ 'results': [
442
+ {
443
+ 'sessionId': g.get('sessionId', ''),
444
+ 'passed': checker(g),
445
+ 'score': g.get('totalScore'),
446
+ 'evidence': g.get('dimensions', {}),
447
+ }
448
+ for g in real_grades[:20] # limit to 20 for size
449
+ ],
450
+ 'generatedAt': datetime.now(timezone.utc).isoformat(),
451
+ }
452
+ grading_file = evals_output_dir / '{}-grading.json'.format(eval_id)
453
+ try:
454
+ grading_file.write_text(json.dumps(grading_data, indent=2))
455
+ grading_data['_name'] = id_to_name.get(eval_id, '')
456
+ results.append(grading_data)
457
+ run_ids.add(eval_id)
458
+ except Exception:
459
+ pass
460
+
461
+ # Add NOT RUN entries for evals with no grading.json
462
+ for e in evals_def:
463
+ if isinstance(e, dict) and e.get('id') not in run_ids:
464
+ results.append({
465
+ 'evalId': e.get('id'),
466
+ '_name': id_to_name.get(e.get('id', ''), ''),
467
+ '_not_run': True,
468
+ })
469
+ return results
470
+
471
+
472
+ def load_live_session(workspace):
473
+ """Query tasks.db for current active session + last 50 audit entries + token totals."""
474
+ import sqlite3
475
+ db = Path(workspace) / '.cleo' / 'tasks.db'
476
+ if not db.exists():
477
+ return {'session_id': None, 'entries': []}
478
+ try:
479
+ conn = sqlite3.connect(str(db), timeout=5)
480
+ conn.row_factory = sqlite3.Row
481
+ cur = conn.cursor()
482
+
483
+ cur.execute(
484
+ "SELECT id, name, status, started_at FROM sessions "
485
+ "WHERE status='active' ORDER BY started_at DESC LIMIT 1"
486
+ )
487
+ row = cur.fetchone()
488
+ if not row:
489
+ conn.close()
490
+ return {'session_id': None, 'entries': []}
491
+
492
+ session_id = row['id']
493
+ session_name = row['name']
494
+ started_at = row['started_at']
495
+
496
+ # Last 50 audit entries
497
+ cur.execute(
498
+ """SELECT timestamp, domain, operation, source, gateway, duration_ms, success
499
+ FROM audit_log
500
+ WHERE session_id = ?
501
+ ORDER BY timestamp DESC LIMIT 50""",
502
+ (session_id,),
503
+ )
504
+ entries = [
505
+ {
506
+ 'timestamp': r['timestamp'],
507
+ 'domain': r['domain'],
508
+ 'operation': r['operation'],
509
+ 'source': r['source'],
510
+ 'gateway': r['gateway'],
511
+ 'duration_ms': r['duration_ms'],
512
+ 'success': bool(r['success']),
513
+ }
514
+ for r in cur.fetchall()
515
+ ]
516
+
517
+ # Token totals for this session
518
+ token_total = 0
519
+ try:
520
+ cur.execute(
521
+ "SELECT SUM(total_tokens) as total FROM token_usage WHERE session_id = ?",
522
+ (session_id,),
523
+ )
524
+ tok_row = cur.fetchone()
525
+ if tok_row and tok_row['total']:
526
+ token_total = tok_row['total']
527
+ except Exception:
528
+ pass
529
+
530
+ conn.close()
531
+ return {
532
+ 'session_id': session_id,
533
+ 'session_name': session_name,
534
+ 'started_at': started_at,
535
+ 'total_tokens': token_total,
536
+ 'entries': entries,
537
+ }
538
+ except Exception as e:
539
+ return {'session_id': None, 'entries': [], 'error': str(e)}
540
+
541
+
542
+ def load_session_detail(workspace, session_id):
543
+ """Load audit entries + token data + full session row for a specific session."""
544
+ import sqlite3
545
+ db = Path(workspace) / '.cleo' / 'tasks.db'
546
+ if not db.exists() or not session_id:
547
+ return {'entries': [], 'tokens': {}, 'session': {}}
548
+ try:
549
+ conn = sqlite3.connect(str(db), timeout=5)
550
+ conn.row_factory = sqlite3.Row
551
+ cur = conn.cursor()
552
+
553
+ # Get full session row
554
+ cur.execute("""
555
+ SELECT id, name, status, scope_json, notes_json, tasks_completed_json, tasks_created_json,
556
+ handoff_json, debrief_json, stats_json, started_at, ended_at,
557
+ previous_session_id, next_session_id, agent_identifier,
558
+ handoff_consumed_at, resume_count, grade_mode
559
+ FROM sessions WHERE id = ?
560
+ """, (session_id,))
561
+ srow = cur.fetchone()
562
+ session_info = {}
563
+ if srow:
564
+ srow = dict(srow)
565
+ def parse_j(v, default=None):
566
+ if not v: return default
567
+ try: return json.loads(v)
568
+ except Exception: return default
569
+ session_info = {
570
+ 'id': srow['id'],
571
+ 'name': srow['name'],
572
+ 'status': srow['status'],
573
+ 'scope': parse_j(srow['scope_json'], {}),
574
+ 'notes': parse_j(srow['notes_json'], []),
575
+ 'tasksCompleted': parse_j(srow['tasks_completed_json'], []),
576
+ 'tasksCreated': parse_j(srow['tasks_created_json'], []),
577
+ 'handoff': parse_j(srow['handoff_json']),
578
+ 'debrief': parse_j(srow['debrief_json']),
579
+ 'stats': parse_j(srow['stats_json'], {}),
580
+ 'startedAt': srow['started_at'],
581
+ 'endedAt': srow['ended_at'],
582
+ 'previousSessionId': srow['previous_session_id'],
583
+ 'nextSessionId': srow['next_session_id'],
584
+ 'agentIdentifier': srow['agent_identifier'],
585
+ 'handoffConsumedAt': srow['handoff_consumed_at'],
586
+ 'resumeCount': srow['resume_count'] or 0,
587
+ 'gradeMode': bool(srow['grade_mode']),
588
+ }
589
+
590
+ cur.execute(
591
+ """SELECT timestamp, domain, operation, source, gateway, duration_ms, success
592
+ FROM audit_log
593
+ WHERE session_id = ?
594
+ ORDER BY timestamp DESC LIMIT 500""",
595
+ (session_id,),
596
+ )
597
+ entries = [
598
+ {
599
+ 'timestamp': r['timestamp'],
600
+ 'domain': r['domain'],
601
+ 'operation': r['operation'],
602
+ 'source': r['source'],
603
+ 'gateway': r['gateway'],
604
+ 'duration_ms': r['duration_ms'],
605
+ 'success': bool(r['success']),
606
+ }
607
+ for r in cur.fetchall()
608
+ ]
609
+
610
+ tokens = {}
611
+ try:
612
+ cur.execute(
613
+ """SELECT SUM(total_tokens) as total_tokens,
614
+ SUM(input_tokens) as input_tokens,
615
+ SUM(output_tokens) as output_tokens,
616
+ COUNT(*) as records
617
+ FROM token_usage WHERE session_id = ?""",
618
+ (session_id,),
619
+ )
620
+ row = cur.fetchone()
621
+ if row:
622
+ tokens = dict(row)
623
+ except Exception:
624
+ pass
625
+
626
+ conn.close()
627
+ return {'entries': entries, 'tokens': tokens, 'session': session_info}
628
+ except Exception:
629
+ return {'entries': [], 'tokens': {}, 'session': {}}
630
+
631
+
632
+ def enrich_grades_with_tokens(grades, workspace):
633
+ """Attach _tokenMeta from token_usage table where sessionId matches."""
634
+ import sqlite3
635
+ db = Path(workspace) / '.cleo' / 'tasks.db'
636
+ if not db.exists():
637
+ return grades
638
+ try:
639
+ conn = sqlite3.connect(str(db), timeout=5)
640
+ conn.row_factory = sqlite3.Row
641
+ cur = conn.cursor()
642
+ cur.execute("""
643
+ SELECT session_id, SUM(total_tokens) as total_tokens,
644
+ SUM(input_tokens) as input_tokens, SUM(output_tokens) as output_tokens,
645
+ COUNT(*) as records,
646
+ MAX(confidence) as confidence, MAX(method) as method
647
+ FROM token_usage GROUP BY session_id
648
+ """)
649
+ token_map = {row['session_id']: dict(row) for row in cur.fetchall()}
650
+ conn.close()
651
+ for g in grades:
652
+ sid = g.get('sessionId')
653
+ if sid and sid in token_map:
654
+ t = token_map[sid]
655
+ g['_tokenMeta'] = {
656
+ 'total_tokens': t['total_tokens'],
657
+ 'input_tokens': t['input_tokens'],
658
+ 'output_tokens': t['output_tokens'],
659
+ 'confidence': t['confidence'] or 'coarse',
660
+ 'method': t['method'] or 'heuristic',
661
+ 'records': t['records'],
662
+ }
663
+ except Exception:
664
+ pass
665
+ return grades
666
+
667
+
668
+ # ---------------------------------------------------------------------------
669
+ # Canonical operation list (for operation matrix)
670
+ # ---------------------------------------------------------------------------
671
+
672
+ def _canonical_ops():
673
+ """Return list of (op, domain, tier, gateway) for all canonical ops."""
674
+ ops = []
675
+ def add(domain, tier, gateway, names):
676
+ for n in names:
677
+ ops.append({
678
+ 'operation': domain + '.' + n,
679
+ 'domain': domain,
680
+ 'tier': tier,
681
+ 'gateway': gateway,
682
+ })
683
+
684
+ add('tasks', 0, 'query', ['find', 'show', 'list', 'tree', 'plan', 'exists'])
685
+ add('tasks', 0, 'mutate', ['add', 'update', 'complete', 'cancel', 'archive', 'restore', 'relates', 'depends', 'history'])
686
+ add('session', 0, 'query', ['status', 'list', 'briefing.show', 'handoff.show', 'context.drift'])
687
+ add('session', 0, 'mutate', ['start', 'end', 'decision.log', 'record.decision', 'context.inject'])
688
+ add('admin', 0, 'query', ['dash', 'health', 'help', 'stats', 'doctor', 'grade', 'grade.list', 'adr.find'])
689
+ add('memory', 1, 'query', ['find', 'timeline', 'fetch', 'pattern.find', 'learning.find'])
690
+ add('memory', 1, 'mutate', ['observe'])
691
+ add('tools', 1, 'query', ['skill.list', 'skill.show', 'provider.list', 'provider.show'])
692
+ add('check', 1, 'query', ['health', 'schema', 'compliance'])
693
+ add('pipeline', 1, 'query', ['stage.status', 'manifest.list'])
694
+ add('pipeline', 1, 'mutate', ['stage.record', 'stage.gate.pass', 'stage.validate', 'manifest.add', 'manifest.remove'])
695
+ add('orchestrate', 2, 'query', ['analyze', 'ready', 'next'])
696
+ add('orchestrate', 2, 'mutate', ['spawn', 'start'])
697
+ add('nexus', 2, 'query', ['status', 'project.list', 'project.show'])
698
+ add('nexus', 2, 'mutate', ['project.add'])
699
+ add('sticky', 2, 'query', ['list', 'show'])
700
+ add('sticky', 2, 'mutate', ['add', 'convert', 'archive', 'purge'])
701
+ return ops
702
+
703
+
704
+ def build_operation_matrix(op_stats):
705
+ """Build the canonical op list as an array, overlaid with computed stats."""
706
+ # Start with canonical ops
707
+ matrix_map = {}
708
+ for co in _canonical_ops():
709
+ key = co['operation']
710
+ matrix_map[key] = {
711
+ 'operation': key,
712
+ 'domain': co['domain'],
713
+ 'tier': co['tier'],
714
+ 'gateway': co['gateway'],
715
+ 'mcp_calls': 0,
716
+ 'cli_calls': 0,
717
+ 'avg_mcp_ms': None,
718
+ 'avg_cli_ms': None,
719
+ 'tested': False,
720
+ }
721
+
722
+ # Overlay stats from grade runs
723
+ for op_key, d in op_stats.items():
724
+ if op_key not in matrix_map:
725
+ parts = op_key.split('.', 1)
726
+ matrix_map[op_key] = {
727
+ 'operation': op_key,
728
+ 'domain': parts[0] if parts else 'unknown',
729
+ 'tier': 0,
730
+ 'gateway': 'query',
731
+ 'mcp_calls': 0,
732
+ 'cli_calls': 0,
733
+ 'avg_mcp_ms': None,
734
+ 'avg_cli_ms': None,
735
+ 'tested': False,
736
+ }
737
+ mcp_calls = d.get('mcp_calls', 0) or 0
738
+ cli_calls = d.get('cli_calls', 0) or 0
739
+ matrix_map[op_key]['mcp_calls'] = mcp_calls
740
+ matrix_map[op_key]['cli_calls'] = cli_calls
741
+ matrix_map[op_key]['avg_mcp_ms'] = d.get('avg_mcp_ms')
742
+ matrix_map[op_key]['avg_cli_ms'] = d.get('avg_cli_ms')
743
+ matrix_map[op_key]['tested'] = bool(mcp_calls or cli_calls)
744
+
745
+ # Return as sorted array (HTML viewer expects array)
746
+ return sorted(matrix_map.values(), key=lambda o: o['operation'])
747
+
748
+
749
+ # ---------------------------------------------------------------------------
750
+ # Embedded data builder
751
+ # ---------------------------------------------------------------------------
752
+
753
+ def build_embedded_data(workspace, skill_dir=None):
754
+ """Build the full embedded data dict for the viewer (9 keys)."""
755
+ workspace = Path(workspace).resolve()
756
+
757
+ # 1. Grades
758
+ grades_path = workspace / '.cleo' / 'metrics' / 'GRADES.jsonl'
759
+ grades = load_grades_jsonl(str(grades_path))
760
+
761
+ # 2. Sessions (from SQLite)
762
+ sessions = load_sessions(workspace)
763
+
764
+ # 3. Grade runs (manifests + summaries)
765
+ grade_runs = load_grade_runs(workspace)
766
+ ab_results = grade_runs[0]['summary'] if grade_runs and grade_runs[0].get('summary') else {}
767
+ ab_history = [r['manifest'] for r in grade_runs]
768
+
769
+ # 4. Token analysis (from SQLite)
770
+ token_analysis = load_token_analysis(workspace)
771
+
772
+ # 5. Operation matrix (canonical ops + grade-run stats)
773
+ op_stats = compute_per_operation_stats(workspace)
774
+ operation_matrix = build_operation_matrix(op_stats)
775
+
776
+ # 6. Eval report
777
+ eval_report = load_eval_report(workspace, skill_dir)
778
+
779
+ # 7. Live session
780
+ live_session = load_live_session(workspace)
781
+
782
+ # 8. Enrich grades with token metadata from token_usage
783
+ grades = enrich_grades_with_tokens(grades, workspace)
784
+
785
+ # 9. Grade summary stats
786
+ def compute_grade_summary(grades_list, sessions_list):
787
+ if not grades_list:
788
+ return {'total': 0, 'graded': 0, 'avgScore': None, 'distribution': {}}
789
+ scores = [g.get('totalScore') for g in grades_list if g.get('totalScore') is not None]
790
+ by_letter = {}
791
+ for sc in scores:
792
+ letter = scoreToLetter(sc)
793
+ by_letter[letter] = by_letter.get(letter, 0) + 1
794
+ graded_sessions = sum(1 for s in sessions_list if s.get('gradeScore') is not None)
795
+ return {
796
+ 'total': len(grades_list),
797
+ 'graded': graded_sessions,
798
+ 'avgScore': round(sum(scores) / len(scores), 1) if scores else None,
799
+ 'maxScore': max(scores) if scores else None,
800
+ 'minScore': min(scores) if scores else None,
801
+ 'distribution': by_letter,
802
+ }
803
+
804
+ grade_summary = compute_grade_summary(grades, sessions)
805
+
806
+ return {
807
+ 'grades': grades,
808
+ 'sessions': sessions,
809
+ 'ab_results': ab_results,
810
+ 'ab_history': ab_history,
811
+ 'token_analysis': token_analysis,
812
+ 'operation_matrix': operation_matrix,
813
+ 'eval_report': {'evals': eval_report},
814
+ 'grade_summary': grade_summary,
815
+ 'live_session': live_session,
816
+ 'metadata': {
817
+ 'generated_at': datetime.now(timezone.utc).isoformat(),
818
+ 'workspace': str(workspace),
819
+ 'skill_version': '2.1.0',
820
+ 'grade_count': len(grades),
821
+ 'session_count': len(sessions),
822
+ },
823
+ }
824
+
825
+
826
+ # ---------------------------------------------------------------------------
827
+ # HTML generator
828
+ # ---------------------------------------------------------------------------
829
+
830
+ def generate_html(data, template):
831
+ """Embed data into grade-review.html template. Returns full HTML string."""
832
+ data_json = json.dumps(data, ensure_ascii=False, default=str)
833
+ return template.replace('{{EMBEDDED_DATA}}', data_json)
834
+
835
+
836
+ # ---------------------------------------------------------------------------
837
+ # HTTP server
838
+ # ---------------------------------------------------------------------------
839
+
840
+ def _kill_port(port):
841
+ try:
842
+ result = subprocess.run(
843
+ ['lsof', '-ti', ':{}'.format(port)],
844
+ capture_output=True, text=True, timeout=5,
845
+ )
846
+ for pid_str in result.stdout.strip().split('\n'):
847
+ if pid_str.strip():
848
+ try:
849
+ os.kill(int(pid_str.strip()), signal.SIGTERM)
850
+ except (ProcessLookupError, ValueError):
851
+ pass
852
+ if result.stdout.strip():
853
+ time.sleep(0.5)
854
+ except (subprocess.TimeoutExpired, FileNotFoundError):
855
+ pass
856
+
857
+
858
+ class GradeReviewHandler(BaseHTTPRequestHandler):
859
+ """Serves the grade review HTML and JSON API endpoints."""
860
+
861
+ def do_GET(self):
862
+ parsed = urlparse(self.path)
863
+ path = parsed.path
864
+
865
+ if path in ('/', '', '/index.html'):
866
+ self._serve_main()
867
+ elif path == '/live-data':
868
+ self._serve_live_data()
869
+ elif path.startswith('/sessions-data'):
870
+ self._serve_session_data(parsed)
871
+ else:
872
+ self.send_error(404)
873
+
874
+ def _serve_main(self):
875
+ try:
876
+ data = build_embedded_data(self.server.workspace, self.server.skill_dir)
877
+ html = generate_html(data, self.server.template)
878
+ content = html.encode('utf-8')
879
+ self.send_response(200)
880
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
881
+ self.send_header('Content-Length', str(len(content)))
882
+ self.end_headers()
883
+ self.wfile.write(content)
884
+ except Exception as e:
885
+ msg = 'Error generating review: {}'.format(e).encode('utf-8')
886
+ self.send_response(500)
887
+ self.send_header('Content-Type', 'text/plain')
888
+ self.send_header('Content-Length', str(len(msg)))
889
+ self.end_headers()
890
+ self.wfile.write(msg)
891
+
892
+ def _serve_live_data(self):
893
+ try:
894
+ live = load_live_session(self.server.workspace)
895
+ body = json.dumps({'live_session': live}, default=str).encode('utf-8')
896
+ self.send_response(200)
897
+ self.send_header('Content-Type', 'application/json')
898
+ self.send_header('Content-Length', str(len(body)))
899
+ self.send_header('Cache-Control', 'no-cache')
900
+ self.end_headers()
901
+ self.wfile.write(body)
902
+ except Exception as e:
903
+ err = json.dumps({'error': str(e), 'live_session': {'session_id': None, 'entries': []}}).encode('utf-8')
904
+ self.send_response(500)
905
+ self.send_header('Content-Type', 'application/json')
906
+ self.send_header('Content-Length', str(len(err)))
907
+ self.end_headers()
908
+ self.wfile.write(err)
909
+
910
+ def _serve_session_data(self, parsed):
911
+ try:
912
+ params = parse_qs(parsed.query)
913
+ session_id = params.get('sessionId', [None])[0]
914
+ data = load_session_detail(self.server.workspace, session_id)
915
+ body = json.dumps(data, default=str).encode('utf-8')
916
+ self.send_response(200)
917
+ self.send_header('Content-Type', 'application/json')
918
+ self.send_header('Content-Length', str(len(body)))
919
+ self.send_header('Cache-Control', 'no-cache')
920
+ self.end_headers()
921
+ self.wfile.write(body)
922
+ except Exception as e:
923
+ err = json.dumps({'error': str(e), 'entries': [], 'tokens': {}}).encode('utf-8')
924
+ self.send_response(500)
925
+ self.send_header('Content-Type', 'application/json')
926
+ self.send_header('Content-Length', str(len(err)))
927
+ self.end_headers()
928
+ self.wfile.write(err)
929
+
930
+ def log_message(self, fmt, *args):
931
+ pass # suppress access logs
932
+
933
+
934
+ # ---------------------------------------------------------------------------
935
+ # Main
936
+ # ---------------------------------------------------------------------------
937
+
938
+ def main():
939
+ parser = argparse.ArgumentParser(description='CLEO Grade Review viewer (v2.1)')
940
+ parser.add_argument('workspace', type=Path, help='Project or results directory to scan')
941
+ parser.add_argument('--port', '-p', type=int, default=3119)
942
+ parser.add_argument('--static', '-s', type=Path, default=None,
943
+ help='Write standalone HTML to this path instead of serving')
944
+ parser.add_argument('--skill-dir', default=None,
945
+ help='Override skill directory (default: auto-detect from __file__)')
946
+ parser.add_argument('--no-browser', action='store_true', help='Do not auto-open browser')
947
+ args = parser.parse_args()
948
+
949
+ workspace = args.workspace.resolve()
950
+ if not workspace.exists():
951
+ print('ERROR: workspace does not exist: {}'.format(workspace), file=sys.stderr)
952
+ sys.exit(1)
953
+
954
+ # Auto-detect skill_dir: generator is in grade-viewer/, skill root is one level up
955
+ skill_dir = args.skill_dir or str(Path(__file__).parent.parent)
956
+
957
+ # Load template once
958
+ template_path = Path(__file__).parent / 'grade-review.html'
959
+ if not template_path.exists():
960
+ print('ERROR: grade-review.html not found at {}'.format(template_path), file=sys.stderr)
961
+ sys.exit(1)
962
+ template = template_path.read_text(encoding='utf-8')
963
+
964
+ data = build_embedded_data(workspace, skill_dir)
965
+
966
+ grade_count = len(data.get('grades', []))
967
+ session_count = len(data.get('sessions', []))
968
+ eval_count = len(data.get('eval_report', {}).get('evals', []))
969
+ op_count = len(data.get('operation_matrix', []))
970
+
971
+ print('\n ct-grade v2.1 Review Viewer', file=sys.stderr)
972
+ print(' {}'.format('\u2500' * 40), file=sys.stderr)
973
+ print(' Workspace : {}'.format(workspace), file=sys.stderr)
974
+ print(' Grades : {}'.format(grade_count), file=sys.stderr)
975
+ print(' Sessions : {}'.format(session_count), file=sys.stderr)
976
+ print(' Eval reports : {}'.format(eval_count), file=sys.stderr)
977
+ print(' Op matrix : {} operations'.format(op_count), file=sys.stderr)
978
+
979
+ if not grade_count and not session_count:
980
+ print('\n WARNING: No data found. Run a grading scenario first.', file=sys.stderr)
981
+
982
+ if args.static:
983
+ html = generate_html(data, template)
984
+ args.static.parent.mkdir(parents=True, exist_ok=True)
985
+ args.static.write_text(html, encoding='utf-8')
986
+ print('\n Static viewer written to: {}'.format(args.static), file=sys.stderr)
987
+ sys.exit(0)
988
+
989
+ port = args.port
990
+ _kill_port(port)
991
+
992
+ try:
993
+ server = HTTPServer(('127.0.0.1', port), GradeReviewHandler)
994
+ except OSError:
995
+ server = HTTPServer(('127.0.0.1', 0), GradeReviewHandler)
996
+ port = server.server_address[1]
997
+
998
+ # Attach workspace and template to server for handler access
999
+ server.workspace = workspace
1000
+ server.skill_dir = skill_dir
1001
+ server.template = template
1002
+
1003
+ url = 'http://localhost:{}'.format(port)
1004
+ print(' URL : {}'.format(url), file=sys.stderr)
1005
+ print('\n Refreshing the browser re-scans the workspace for new results.', file=sys.stderr)
1006
+ print(' /live-data and /sessions-data?sessionId=X for JSON API.', file=sys.stderr)
1007
+ print(' Press Ctrl+C to stop.\n', file=sys.stderr)
1008
+
1009
+ def handle_sigint(sig, frame):
1010
+ print('\nStopped.', file=sys.stderr)
1011
+ server.server_close()
1012
+ sys.exit(0)
1013
+
1014
+ signal.signal(signal.SIGINT, handle_sigint)
1015
+
1016
+ if not args.no_browser:
1017
+ webbrowser.open(url)
1018
+
1019
+ server.serve_forever()
1020
+
1021
+
1022
+ if __name__ == '__main__':
1023
+ main()