@cleocode/skills 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/dispatch-config.json +404 -0
  2. package/index.d.ts +178 -0
  3. package/index.js +405 -0
  4. package/package.json +14 -0
  5. package/profiles/core.json +7 -0
  6. package/profiles/full.json +10 -0
  7. package/profiles/minimal.json +7 -0
  8. package/profiles/recommended.json +7 -0
  9. package/provider-skills-map.json +97 -0
  10. package/skills/_shared/cleo-style-guide.md +84 -0
  11. package/skills/_shared/manifest-operations.md +810 -0
  12. package/skills/_shared/placeholders.json +433 -0
  13. package/skills/_shared/skill-chaining-patterns.md +237 -0
  14. package/skills/_shared/subagent-protocol-base.md +223 -0
  15. package/skills/_shared/task-system-integration.md +232 -0
  16. package/skills/_shared/testing-framework-config.md +110 -0
  17. package/skills/ct-cleo/SKILL.md +490 -0
  18. package/skills/ct-cleo/references/anti-patterns.md +19 -0
  19. package/skills/ct-cleo/references/loom-lifecycle.md +136 -0
  20. package/skills/ct-cleo/references/orchestrator-constraints.md +55 -0
  21. package/skills/ct-cleo/references/session-protocol.md +162 -0
  22. package/skills/ct-codebase-mapper/SKILL.md +82 -0
  23. package/skills/ct-contribution/SKILL.md +521 -0
  24. package/skills/ct-contribution/templates/contribution-init.json +21 -0
  25. package/skills/ct-dev-workflow/SKILL.md +423 -0
  26. package/skills/ct-docs-lookup/SKILL.md +66 -0
  27. package/skills/ct-docs-review/SKILL.md +175 -0
  28. package/skills/ct-docs-write/SKILL.md +108 -0
  29. package/skills/ct-documentor/SKILL.md +231 -0
  30. package/skills/ct-epic-architect/SKILL.md +305 -0
  31. package/skills/ct-epic-architect/references/bug-epic-example.md +172 -0
  32. package/skills/ct-epic-architect/references/commands.md +201 -0
  33. package/skills/ct-epic-architect/references/feature-epic-example.md +210 -0
  34. package/skills/ct-epic-architect/references/migration-epic-example.md +244 -0
  35. package/skills/ct-epic-architect/references/output-format.md +92 -0
  36. package/skills/ct-epic-architect/references/patterns.md +284 -0
  37. package/skills/ct-epic-architect/references/refactor-epic-example.md +412 -0
  38. package/skills/ct-epic-architect/references/research-epic-example.md +226 -0
  39. package/skills/ct-epic-architect/references/shell-escaping.md +86 -0
  40. package/skills/ct-epic-architect/references/skill-aware-execution.md +195 -0
  41. package/skills/ct-grade/SKILL.md +230 -0
  42. package/skills/ct-grade/agents/analysis-reporter.md +203 -0
  43. package/skills/ct-grade/agents/blind-comparator.md +157 -0
  44. package/skills/ct-grade/agents/scenario-runner.md +134 -0
  45. package/skills/ct-grade/eval-viewer/__pycache__/generate_grade_review.cpython-314.pyc +0 -0
  46. package/skills/ct-grade/eval-viewer/generate_grade_review.py +1138 -0
  47. package/skills/ct-grade/eval-viewer/generate_grade_viewer.py +544 -0
  48. package/skills/ct-grade/eval-viewer/generate_review.py +283 -0
  49. package/skills/ct-grade/eval-viewer/grade-review.html +1574 -0
  50. package/skills/ct-grade/eval-viewer/viewer.html +219 -0
  51. package/skills/ct-grade/evals/evals.json +94 -0
  52. package/skills/ct-grade/references/ab-test-methodology.md +150 -0
  53. package/skills/ct-grade/references/domains.md +137 -0
  54. package/skills/ct-grade/references/grade-spec.md +236 -0
  55. package/skills/ct-grade/references/scenario-playbook.md +234 -0
  56. package/skills/ct-grade/references/token-tracking.md +120 -0
  57. package/skills/ct-grade/scripts/__pycache__/audit_analyzer.cpython-314.pyc +0 -0
  58. package/skills/ct-grade/scripts/__pycache__/run_ab_test.cpython-314.pyc +0 -0
  59. package/skills/ct-grade/scripts/__pycache__/run_all.cpython-314.pyc +0 -0
  60. package/skills/ct-grade/scripts/__pycache__/token_tracker.cpython-314.pyc +0 -0
  61. package/skills/ct-grade/scripts/audit_analyzer.py +279 -0
  62. package/skills/ct-grade/scripts/generate_report.py +283 -0
  63. package/skills/ct-grade/scripts/run_ab_test.py +504 -0
  64. package/skills/ct-grade/scripts/run_all.py +287 -0
  65. package/skills/ct-grade/scripts/setup_run.py +183 -0
  66. package/skills/ct-grade/scripts/token_tracker.py +630 -0
  67. package/skills/ct-grade-v2-1/SKILL.md +237 -0
  68. package/skills/ct-grade-v2-1/agents/analysis-reporter.md +203 -0
  69. package/skills/ct-grade-v2-1/agents/blind-comparator.md +157 -0
  70. package/skills/ct-grade-v2-1/agents/scenario-runner.md +179 -0
  71. package/skills/ct-grade-v2-1/evals/evals.json +74 -0
  72. package/skills/ct-grade-v2-1/grade-viewer/__pycache__/build_op_stats.cpython-314.pyc +0 -0
  73. package/skills/ct-grade-v2-1/grade-viewer/__pycache__/generate_grade_review.cpython-314.pyc +0 -0
  74. package/skills/ct-grade-v2-1/grade-viewer/build_op_stats.py +174 -0
  75. package/skills/ct-grade-v2-1/grade-viewer/eval-analysis.json +41 -0
  76. package/skills/ct-grade-v2-1/grade-viewer/eval-report.md +34 -0
  77. package/skills/ct-grade-v2-1/grade-viewer/generate_grade_review.py +1023 -0
  78. package/skills/ct-grade-v2-1/grade-viewer/generate_grade_viewer.py +548 -0
  79. package/skills/ct-grade-v2-1/grade-viewer/grade-review-eval.html +613 -0
  80. package/skills/ct-grade-v2-1/grade-viewer/grade-review.html +1532 -0
  81. package/skills/ct-grade-v2-1/grade-viewer/viewer.html +620 -0
  82. package/skills/ct-grade-v2-1/manifest-entry.json +31 -0
  83. package/skills/ct-grade-v2-1/references/ab-testing.md +233 -0
  84. package/skills/ct-grade-v2-1/references/domains-ssot.md +156 -0
  85. package/skills/ct-grade-v2-1/references/grade-spec-v2.md +167 -0
  86. package/skills/ct-grade-v2-1/references/playbook-v2.md +393 -0
  87. package/skills/ct-grade-v2-1/references/token-tracking.md +202 -0
  88. package/skills/ct-grade-v2-1/scripts/generate_report.py +419 -0
  89. package/skills/ct-grade-v2-1/scripts/run_ab_test.py +493 -0
  90. package/skills/ct-grade-v2-1/scripts/run_scenario.py +396 -0
  91. package/skills/ct-grade-v2-1/scripts/setup_run.py +207 -0
  92. package/skills/ct-grade-v2-1/scripts/token_tracker.py +175 -0
  93. package/skills/ct-memory/SKILL.md +84 -0
  94. package/skills/ct-orchestrator/INSTALL.md +61 -0
  95. package/skills/ct-orchestrator/README.md +69 -0
  96. package/skills/ct-orchestrator/SKILL.md +380 -0
  97. package/skills/ct-orchestrator/manifest-entry.json +19 -0
  98. package/skills/ct-orchestrator/orchestrator-prompt.txt +17 -0
  99. package/skills/ct-orchestrator/references/SUBAGENT-PROTOCOL-BLOCK.md +66 -0
  100. package/skills/ct-orchestrator/references/autonomous-operation.md +167 -0
  101. package/skills/ct-orchestrator/references/lifecycle-gates.md +98 -0
  102. package/skills/ct-orchestrator/references/orchestrator-compliance.md +271 -0
  103. package/skills/ct-orchestrator/references/orchestrator-handoffs.md +85 -0
  104. package/skills/ct-orchestrator/references/orchestrator-patterns.md +164 -0
  105. package/skills/ct-orchestrator/references/orchestrator-recovery.md +113 -0
  106. package/skills/ct-orchestrator/references/orchestrator-spawning.md +271 -0
  107. package/skills/ct-orchestrator/references/orchestrator-tokens.md +180 -0
  108. package/skills/ct-research-agent/SKILL.md +226 -0
  109. package/skills/ct-skill-creator/.cleo/.context-state.json +13 -0
  110. package/skills/ct-skill-creator/.cleo/logs/cleo.2026-03-07.1.log +24 -0
  111. package/skills/ct-skill-creator/.cleo/tasks.db +0 -0
  112. package/skills/ct-skill-creator/SKILL.md +356 -0
  113. package/skills/ct-skill-creator/agents/analyzer.md +276 -0
  114. package/skills/ct-skill-creator/agents/comparator.md +204 -0
  115. package/skills/ct-skill-creator/agents/grader.md +225 -0
  116. package/skills/ct-skill-creator/assets/eval_review.html +146 -0
  117. package/skills/ct-skill-creator/eval-viewer/__pycache__/generate_review.cpython-314.pyc +0 -0
  118. package/skills/ct-skill-creator/eval-viewer/generate_review.py +471 -0
  119. package/skills/ct-skill-creator/eval-viewer/viewer.html +1325 -0
  120. package/skills/ct-skill-creator/manifest-entry.json +17 -0
  121. package/skills/ct-skill-creator/references/dynamic-context.md +228 -0
  122. package/skills/ct-skill-creator/references/frontmatter.md +83 -0
  123. package/skills/ct-skill-creator/references/invocation-control.md +165 -0
  124. package/skills/ct-skill-creator/references/output-patterns.md +86 -0
  125. package/skills/ct-skill-creator/references/provider-deployment.md +175 -0
  126. package/skills/ct-skill-creator/references/schemas.md +430 -0
  127. package/skills/ct-skill-creator/references/workflows.md +28 -0
  128. package/skills/ct-skill-creator/scripts/__init__.py +1 -0
  129. package/skills/ct-skill-creator/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
  130. package/skills/ct-skill-creator/scripts/__pycache__/aggregate_benchmark.cpython-314.pyc +0 -0
  131. package/skills/ct-skill-creator/scripts/__pycache__/generate_report.cpython-314.pyc +0 -0
  132. package/skills/ct-skill-creator/scripts/__pycache__/improve_description.cpython-314.pyc +0 -0
  133. package/skills/ct-skill-creator/scripts/__pycache__/init_skill.cpython-314.pyc +0 -0
  134. package/skills/ct-skill-creator/scripts/__pycache__/quick_validate.cpython-314.pyc +0 -0
  135. package/skills/ct-skill-creator/scripts/__pycache__/run_eval.cpython-314.pyc +0 -0
  136. package/skills/ct-skill-creator/scripts/__pycache__/run_loop.cpython-314.pyc +0 -0
  137. package/skills/ct-skill-creator/scripts/__pycache__/utils.cpython-314.pyc +0 -0
  138. package/skills/ct-skill-creator/scripts/aggregate_benchmark.py +401 -0
  139. package/skills/ct-skill-creator/scripts/generate_report.py +326 -0
  140. package/skills/ct-skill-creator/scripts/improve_description.py +247 -0
  141. package/skills/ct-skill-creator/scripts/init_skill.py +306 -0
  142. package/skills/ct-skill-creator/scripts/package_skill.py +110 -0
  143. package/skills/ct-skill-creator/scripts/quick_validate.py +97 -0
  144. package/skills/ct-skill-creator/scripts/run_eval.py +310 -0
  145. package/skills/ct-skill-creator/scripts/run_loop.py +328 -0
  146. package/skills/ct-skill-creator/scripts/utils.py +47 -0
  147. package/skills/ct-skill-validator/SKILL.md +178 -0
  148. package/skills/ct-skill-validator/agents/ecosystem-checker.md +151 -0
  149. package/skills/ct-skill-validator/assets/valid-skill-example.md +13 -0
  150. package/skills/ct-skill-validator/evals/eval_set.json +14 -0
  151. package/skills/ct-skill-validator/evals/evals.json +52 -0
  152. package/skills/ct-skill-validator/manifest-entry.json +20 -0
  153. package/skills/ct-skill-validator/references/cleo-ecosystem-rules.md +163 -0
  154. package/skills/ct-skill-validator/references/validation-rules.md +168 -0
  155. package/skills/ct-skill-validator/scripts/__init__.py +0 -0
  156. package/skills/ct-skill-validator/scripts/__pycache__/audit_body.cpython-314.pyc +0 -0
  157. package/skills/ct-skill-validator/scripts/__pycache__/check_ecosystem.cpython-314.pyc +0 -0
  158. package/skills/ct-skill-validator/scripts/__pycache__/generate_validation_report.cpython-314.pyc +0 -0
  159. package/skills/ct-skill-validator/scripts/__pycache__/validate.cpython-314.pyc +0 -0
  160. package/skills/ct-skill-validator/scripts/audit_body.py +242 -0
  161. package/skills/ct-skill-validator/scripts/check_ecosystem.py +169 -0
  162. package/skills/ct-skill-validator/scripts/check_manifest.py +172 -0
  163. package/skills/ct-skill-validator/scripts/generate_validation_report.py +442 -0
  164. package/skills/ct-skill-validator/scripts/validate.py +422 -0
  165. package/skills/ct-spec-writer/SKILL.md +189 -0
  166. package/skills/ct-stickynote/README.md +14 -0
  167. package/skills/ct-stickynote/SKILL.md +46 -0
  168. package/skills/ct-task-executor/SKILL.md +296 -0
  169. package/skills/ct-validator/SKILL.md +216 -0
  170. package/skills/manifest.json +469 -0
  171. package/skills.json +281 -0
@@ -0,0 +1,1138 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Generate and serve the CLEO Grade Review viewer (v1.1 — API-aware SQLite-backed).
4
+
5
+ Reads grade results from GRADES.jsonl, session/token data from tasks.db,
6
+ grade run manifests from .cleo/metrics/grade-runs/, and eval results from
7
+ evals.json + grading.json files. Embeds all data into grade-review.html
8
+ as JSON in the {{EMBEDDED_DATA}} placeholder and serves via stdlib HTTP.
9
+
10
+ Sources (auto-discovered under workspace):
11
+ - .cleo/metrics/GRADES.jsonl (historical grade results)
12
+ - .cleo/tasks.db (sessions, audit_log, token_usage)
13
+ - .cleo/metrics/grade-runs/ (run manifests, summaries, operations)
14
+ - evals/evals.json + grading.json (eval report results)
15
+
16
+ Usage:
17
+ python generate_grade_review.py <workspace-path> [options]
18
+
19
+ Options:
20
+ --port PORT HTTP port (default: 3118)
21
+ --static PATH Write standalone HTML to file instead of serving
22
+ --skill-dir PATH Override skill directory (default: auto-detect)
23
+ --no-browser Don't auto-open browser
24
+ """
25
+
26
+ import argparse
27
+ import json
28
+ import os
29
+ import signal
30
+ import subprocess
31
+ import sys
32
+ import time
33
+ import webbrowser
34
+ from datetime import datetime, timezone
35
+ from http.server import BaseHTTPRequestHandler, HTTPServer
36
+ from pathlib import Path
37
+ from urllib.parse import parse_qs, urlparse
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Workspace discovery
42
+ # ---------------------------------------------------------------------------
43
+
44
+ def find_workspace(start='.'):
45
+ """Walk up from start to find directory containing .cleo/tasks.db."""
46
+ p = Path(start).resolve()
47
+ while p != p.parent:
48
+ if (p / '.cleo' / 'tasks.db').exists():
49
+ return p
50
+ p = p.parent
51
+ return Path(start).resolve()
52
+
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Data loaders
56
+ # ---------------------------------------------------------------------------
57
+
58
+ def load_grades_jsonl(path):
59
+ """Load GRADES.jsonl. Returns list of grade result dicts."""
60
+ p = Path(path)
61
+ if not p.exists():
62
+ return []
63
+ results = []
64
+ for line in p.read_text(errors='replace').splitlines():
65
+ line = line.strip()
66
+ if not line:
67
+ continue
68
+ try:
69
+ results.append(json.loads(line))
70
+ except Exception:
71
+ pass
72
+ return results
73
+
74
+
75
+ def scoreToLetter(score, max_score=100):
76
+ if score is None: return None
77
+ pct = (score / max_score) * 100 if max_score else 0
78
+ if pct >= 90: return 'A'
79
+ if pct >= 80: return 'B'
80
+ if pct >= 70: return 'C'
81
+ if pct >= 60: return 'D'
82
+ return 'F'
83
+
84
+
85
+ def load_sessions(workspace):
86
+ """Load all sessions from SQLite with audit_log stats and token_usage totals."""
87
+ import sqlite3
88
+ db = Path(workspace) / '.cleo' / 'tasks.db'
89
+ if not db.exists():
90
+ return []
91
+ try:
92
+ conn = sqlite3.connect(str(db), timeout=5)
93
+ conn.row_factory = sqlite3.Row
94
+ cur = conn.cursor()
95
+ cur.execute("""
96
+ SELECT
97
+ s.id, s.name, s.status, s.scope_json,
98
+ s.started_at, s.ended_at, s.resume_count,
99
+ s.previous_session_id, s.next_session_id, s.agent_identifier,
100
+ s.grade_mode, s.stats_json,
101
+ COUNT(a.id) as audit_entries,
102
+ SUM(CASE WHEN a.gateway = 'query' AND a.source = 'mcp' THEN 1 ELSE 0 END) as mcp_calls,
103
+ SUM(CASE WHEN a.source = 'cli' THEN 1 ELSE 0 END) as cli_calls
104
+ FROM sessions s
105
+ LEFT JOIN audit_log a ON a.session_id = s.id
106
+ GROUP BY s.id
107
+ ORDER BY s.started_at DESC
108
+ """)
109
+ sessions = [dict(row) for row in cur.fetchall()]
110
+
111
+ # Parse stats_json, scope_json, and add chain/agent fields
112
+ for s in sessions:
113
+ stats = {}
114
+ try:
115
+ if s.get('stats_json'): stats = json.loads(s['stats_json'])
116
+ except Exception: pass
117
+ # Parse scope_json
118
+ scope_raw = s.get('scope_json', '{}')
119
+ try:
120
+ scope_obj = json.loads(scope_raw) if scope_raw else {}
121
+ except Exception:
122
+ scope_obj = {}
123
+ s['scope_type'] = scope_obj.get('type', '')
124
+ s['scope_root_task_id'] = scope_obj.get('rootTaskId', '')
125
+ s.pop('scope_json', None)
126
+ s['totalActiveMinutes'] = stats.get('totalActiveMinutes', 0)
127
+ s['tasksCompleted'] = stats.get('tasksCompleted', 0)
128
+ s['tasksCreated'] = stats.get('tasksCreated', 0)
129
+ s['previousSessionId'] = s.pop('previous_session_id', None)
130
+ s['nextSessionId'] = s.pop('next_session_id', None)
131
+ s['agentIdentifier'] = s.pop('agent_identifier', None)
132
+ s['gradeMode'] = bool(s.pop('grade_mode', False))
133
+ s.pop('stats_json', None)
134
+
135
+ # Get token totals per session
136
+ try:
137
+ cur.execute("""
138
+ SELECT session_id, SUM(total_tokens) as total_tokens, COUNT(*) as token_records
139
+ FROM token_usage
140
+ GROUP BY session_id
141
+ """)
142
+ token_map = {row['session_id']: dict(row) for row in cur.fetchall()}
143
+ for s in sessions:
144
+ tok = token_map.get(s['id'], {})
145
+ s['total_tokens'] = tok.get('total_tokens', 0)
146
+ s['token_records'] = tok.get('token_records', 0)
147
+ except Exception:
148
+ for s in sessions:
149
+ s['total_tokens'] = 0
150
+ s['token_records'] = 0
151
+
152
+ conn.close()
153
+
154
+ # Map to camelCase keys expected by the HTML viewer
155
+ mapped = []
156
+ for s in sessions:
157
+ started = s.get('started_at', '')
158
+ ended = s.get('ended_at', '')
159
+ duration_ms = None
160
+ if started and ended:
161
+ try:
162
+ t0 = datetime.fromisoformat(started.replace('Z', '+00:00'))
163
+ t1 = datetime.fromisoformat(ended.replace('Z', '+00:00'))
164
+ duration_ms = int((t1 - t0).total_seconds() * 1000)
165
+ except Exception:
166
+ pass
167
+ mapped.append({
168
+ 'sessionId': s.get('id', ''),
169
+ 'name': s.get('name', ''),
170
+ 'status': s.get('status', ''),
171
+ 'scope': s.get('scope_type', ''),
172
+ 'scopeRootTaskId': s.get('scope_root_task_id', ''),
173
+ 'startedAt': started,
174
+ 'endedAt': ended,
175
+ 'durationMs': duration_ms,
176
+ 'resumeCount': s.get('resume_count', 0),
177
+ 'tasksCompleted': s.get('tasksCompleted', 0),
178
+ 'tasksCreated': s.get('tasksCreated', 0),
179
+ 'auditEntries': s.get('audit_entries', 0),
180
+ 'mcpCalls': s.get('mcp_calls', 0),
181
+ 'cliCalls': s.get('cli_calls', 0),
182
+ 'totalTokens': s.get('total_tokens', 0),
183
+ 'tokenRecords': s.get('token_records', 0),
184
+ 'totalActiveMinutes': s.get('totalActiveMinutes', 0),
185
+ 'previousSessionId': s.get('previousSessionId'),
186
+ 'nextSessionId': s.get('nextSessionId'),
187
+ 'agentIdentifier': s.get('agentIdentifier'),
188
+ 'gradeMode': s.get('gradeMode', False),
189
+ })
190
+
191
+ # Enrich sessions with grade data from GRADES.jsonl
192
+ grades_path = Path(workspace) / '.cleo' / 'metrics' / 'GRADES.jsonl'
193
+ if grades_path.exists():
194
+ grade_map = {} # sessionId -> best grade result
195
+ for line in grades_path.read_text(errors='replace').splitlines():
196
+ line = line.strip()
197
+ if not line: continue
198
+ try:
199
+ g = json.loads(line)
200
+ sid = g.get('sessionId', '')
201
+ if sid:
202
+ existing = grade_map.get(sid)
203
+ if existing is None or (g.get('totalScore', 0) or 0) > (existing.get('totalScore', 0) or 0):
204
+ grade_map[sid] = g
205
+ except Exception:
206
+ pass
207
+ for s in mapped:
208
+ sid = s.get('sessionId') or s.get('id', '')
209
+ if sid in grade_map:
210
+ g = grade_map[sid]
211
+ score = g.get('totalScore')
212
+ max_score = g.get('maxScore', 100)
213
+ s['gradeScore'] = score
214
+ s['gradeLetter'] = scoreToLetter(score, max_score) if score is not None else None
215
+ s['gradeDetails'] = g.get('dimensions', {})
216
+ s['gradeFlags'] = g.get('flags', [])
217
+ s['gradeTimestamp'] = g.get('timestamp', '')
218
+
219
+ return mapped
220
+ except Exception:
221
+ return []
222
+
223
+
224
+ def load_token_analysis(workspace):
225
+ """Query token_usage table for transport and domain breakdowns."""
226
+ import sqlite3
227
+ db = Path(workspace) / '.cleo' / 'tasks.db'
228
+ if not db.exists():
229
+ return {'by_transport': {}, 'by_domain': {}, 'confidence': 'coarse', 'total_records': 0}
230
+ try:
231
+ conn = sqlite3.connect(str(db), timeout=5)
232
+ conn.row_factory = sqlite3.Row
233
+ cur = conn.cursor()
234
+
235
+ # By transport (normalize legacy api -> http per CLEO-WEB-API v2.1)
236
+ cur.execute("""
237
+ SELECT transport, method, confidence,
238
+ SUM(total_tokens) as total,
239
+ COUNT(*) as count,
240
+ AVG(total_tokens) as avg
241
+ FROM token_usage GROUP BY transport
242
+ """)
243
+ by_transport = {}
244
+ transport_aliases = {'api': 'http'}
245
+ for row in cur.fetchall():
246
+ raw_transport = row['transport'] or 'unknown'
247
+ transport = transport_aliases.get(raw_transport, raw_transport)
248
+ current = by_transport.get(transport, {
249
+ 'transport': transport,
250
+ 'raw_transports': [],
251
+ 'method': row['method'],
252
+ 'confidence': row['confidence'],
253
+ 'total': 0,
254
+ 'count': 0,
255
+ 'avg': 0,
256
+ })
257
+ current['raw_transports'] = sorted(set(current.get('raw_transports', []) + [raw_transport]))
258
+ current['total'] += row['total'] or 0
259
+ current['count'] += row['count'] or 0
260
+ if current['count'] > 0:
261
+ current['avg'] = round(current['total'] / current['count'], 2)
262
+ if raw_transport == 'http' or not current.get('method'):
263
+ current['method'] = row['method']
264
+ current['confidence'] = row['confidence']
265
+ by_transport[transport] = current
266
+
267
+ # By domain
268
+ cur.execute("""
269
+ SELECT domain, SUM(total_tokens) as total, COUNT(*) as count
270
+ FROM token_usage GROUP BY domain
271
+ """)
272
+ by_domain = {}
273
+ for row in cur.fetchall():
274
+ by_domain[row['domain']] = dict(row)
275
+
276
+ # Overall confidence (most common)
277
+ cur.execute("""
278
+ SELECT confidence, COUNT(*) as cnt FROM token_usage
279
+ GROUP BY confidence ORDER BY cnt DESC LIMIT 1
280
+ """)
281
+ row = cur.fetchone()
282
+ confidence = row['confidence'] if row else 'coarse'
283
+
284
+ cur.execute("SELECT COUNT(*) as total FROM token_usage")
285
+ total_records = cur.fetchone()['total']
286
+
287
+ conn.close()
288
+ return {
289
+ 'by_transport': by_transport,
290
+ 'by_domain': by_domain,
291
+ 'confidence': confidence,
292
+ 'total_records': total_records,
293
+ }
294
+ except Exception:
295
+ return {'by_transport': {}, 'by_domain': {}, 'confidence': 'coarse', 'total_records': 0}
296
+
297
+
298
+ def load_grade_runs(workspace):
299
+ """Scan .cleo/metrics/grade-runs/ for run-manifest.json files."""
300
+ runs_dir = Path(workspace) / '.cleo' / 'metrics' / 'grade-runs'
301
+ runs = []
302
+ if not runs_dir.exists():
303
+ return runs
304
+ for run_dir in sorted(runs_dir.iterdir(), reverse=True):
305
+ if not run_dir.is_dir():
306
+ continue
307
+ manifest_path = run_dir / 'run-manifest.json'
308
+ if manifest_path.exists():
309
+ try:
310
+ manifest = json.loads(manifest_path.read_text())
311
+ summary = None
312
+ summary_path = run_dir / 'summary.json'
313
+ if summary_path.exists():
314
+ try:
315
+ summary = json.loads(summary_path.read_text())
316
+ except Exception:
317
+ pass
318
+ runs.append({
319
+ 'runId': run_dir.name,
320
+ 'manifest': manifest,
321
+ 'summary': summary,
322
+ })
323
+ except Exception:
324
+ pass
325
+ return runs
326
+
327
+
328
+ def compute_per_operation_stats(workspace):
329
+ """Aggregate operations.jsonl files from all grade runs."""
330
+ runs_dir = Path(workspace) / '.cleo' / 'metrics' / 'grade-runs'
331
+ stats = {}
332
+ if not runs_dir.exists():
333
+ return stats
334
+ for ops_file in runs_dir.rglob('operations.jsonl'):
335
+ for line in ops_file.read_text(errors='replace').splitlines():
336
+ if not line.strip():
337
+ continue
338
+ try:
339
+ entry = json.loads(line)
340
+ key = '{}.{}'.format(
341
+ entry.get('domain', 'unknown'),
342
+ entry.get('operation', 'unknown'),
343
+ )
344
+ iface = entry.get('interface', 'mcp')
345
+ duration = entry.get('duration_ms', 0) or 0
346
+ if key not in stats:
347
+ stats[key] = {
348
+ 'mcp_calls': 0, 'cli_calls': 0,
349
+ 'total_mcp_ms': 0, 'total_cli_ms': 0,
350
+ }
351
+ if iface == 'cli':
352
+ stats[key]['cli_calls'] += 1
353
+ stats[key]['total_cli_ms'] += duration
354
+ else:
355
+ stats[key]['mcp_calls'] += 1
356
+ stats[key]['total_mcp_ms'] += duration
357
+ except Exception:
358
+ pass
359
+ # Compute averages
360
+ for v in stats.values():
361
+ v['avg_mcp_ms'] = round(v['total_mcp_ms'] / v['mcp_calls'], 1) if v['mcp_calls'] > 0 else 0
362
+ v['avg_cli_ms'] = round(v['total_cli_ms'] / v['cli_calls'], 1) if v['cli_calls'] > 0 else 0
363
+ return stats
364
+
365
+
366
+ def load_eval_report(workspace, skill_dir=None):
367
+ """Load grading results and attach eval names from evals.json."""
368
+ evals_def = []
369
+ if skill_dir:
370
+ evals_path = Path(skill_dir) / 'evals' / 'evals.json'
371
+ if evals_path.exists():
372
+ try:
373
+ raw_evals = json.loads(evals_path.read_text())
374
+ if isinstance(raw_evals, dict):
375
+ evals_def = raw_evals.get('evals', [])
376
+ elif isinstance(raw_evals, list):
377
+ evals_def = raw_evals
378
+ except Exception:
379
+ pass
380
+
381
+ id_to_name = {}
382
+ for e in evals_def:
383
+ if isinstance(e, dict):
384
+ eid = e.get('id', '')
385
+ id_to_name[eid] = (e.get('description', e.get('prompt', '')))[:80]
386
+
387
+ # Find grading.json files under workspace (including eval-results dir)
388
+ results = []
389
+ seen_paths = set()
390
+ for search_root in [Path(workspace), Path(workspace) / '.cleo' / 'metrics' / 'eval-results']:
391
+ if not search_root.exists():
392
+ continue
393
+ for grading_file in search_root.rglob('grading.json'):
394
+ real_path = str(grading_file.resolve())
395
+ if real_path in seen_paths:
396
+ continue
397
+ seen_paths.add(real_path)
398
+ try:
399
+ data = json.loads(grading_file.read_text())
400
+ eval_id = data.get('evalId', '')
401
+ data['_name'] = id_to_name.get(eval_id, '')
402
+ results.append(data)
403
+ except Exception:
404
+ pass
405
+ # Also scan eval-results for <id>-grading.json files
406
+ eval_results_dir = Path(workspace) / '.cleo' / 'metrics' / 'eval-results'
407
+ if eval_results_dir.exists():
408
+ for gf in eval_results_dir.glob('*-grading.json'):
409
+ real_path = str(gf.resolve())
410
+ if real_path in seen_paths:
411
+ continue
412
+ seen_paths.add(real_path)
413
+ try:
414
+ data = json.loads(gf.read_text())
415
+ eval_id = data.get('evalId', '')
416
+ data['_name'] = id_to_name.get(eval_id, '')
417
+ results.append(data)
418
+ except Exception:
419
+ pass
420
+
421
+ # Synthesize grading.json from real grades for eval coverage
422
+ grades_path = Path(workspace) / '.cleo' / 'metrics' / 'GRADES.jsonl'
423
+ real_grades = []
424
+ if grades_path.exists():
425
+ for line in grades_path.read_text(errors='replace').splitlines():
426
+ line = line.strip()
427
+ if not line: continue
428
+ try: real_grades.append(json.loads(line))
429
+ except Exception: pass
430
+
431
+ # Map eval expectations to grade dimensions (supports ct-grade legacy ids + keys)
432
+ def dim_score(grade, *keys):
433
+ dims = grade.get('dimensions', {}) if isinstance(grade.get('dimensions'), dict) else {}
434
+ for key in keys:
435
+ value = dims.get(key, {})
436
+ if isinstance(value, dict):
437
+ score = value.get('score')
438
+ if score is not None:
439
+ return score
440
+ return 0
441
+
442
+ eval_grade_map = {
443
+ 1: lambda g: g.get('totalScore', 0) > 0,
444
+ '1': lambda g: g.get('totalScore', 0) > 0,
445
+ 'eval-001': lambda g: g.get('totalScore', 0) > 0,
446
+ 2: lambda g: dim_score(g, 'sessionDiscipline') >= 18,
447
+ '2': lambda g: dim_score(g, 'sessionDiscipline') >= 18,
448
+ 'eval-002': lambda g: dim_score(g, 'sessionDiscipline') >= 18,
449
+ 3: lambda g: dim_score(g, 'discoveryEfficiency', 'taskEfficiency') >= 15,
450
+ '3': lambda g: dim_score(g, 'discoveryEfficiency', 'taskEfficiency') >= 15,
451
+ 'eval-003': lambda g: dim_score(g, 'discoveryEfficiency', 'taskEfficiency') >= 15,
452
+ 4: lambda g: dim_score(g, 'taskHygiene') >= 18,
453
+ '4': lambda g: dim_score(g, 'taskHygiene') >= 18,
454
+ 'eval-004': lambda g: dim_score(g, 'taskHygiene') >= 18,
455
+ 5: lambda g: dim_score(g, 'errorProtocol', 'protocolAdherence') >= 15,
456
+ '5': lambda g: dim_score(g, 'errorProtocol', 'protocolAdherence') >= 15,
457
+ 'eval-005': lambda g: dim_score(g, 'errorProtocol', 'protocolAdherence') >= 15,
458
+ 6: lambda g: dim_score(g, 'disclosureUse', 'mcpGateway') >= 15,
459
+ '6': lambda g: dim_score(g, 'disclosureUse', 'mcpGateway') >= 15,
460
+ 'eval-006': lambda g: dim_score(g, 'disclosureUse', 'mcpGateway') >= 15,
461
+ 7: lambda g: g.get('totalScore', 0) >= 60,
462
+ '7': lambda g: g.get('totalScore', 0) >= 60,
463
+ 'eval-007': lambda g: g.get('totalScore', 0) >= 60,
464
+ }
465
+
466
+ # Write grading.json for each eval based on real grade data (only if not already found)
467
+ run_ids = {r.get('evalId') for r in results}
468
+ if real_grades:
469
+ evals_output_dir = Path(workspace) / '.cleo' / 'metrics' / 'eval-results'
470
+ evals_output_dir.mkdir(parents=True, exist_ok=True)
471
+
472
+ for e in evals_def:
473
+ eval_id = e.get('id', '')
474
+ if eval_id in run_ids:
475
+ continue # already have results
476
+ checker = eval_grade_map.get(eval_id)
477
+ if checker:
478
+ passing = [g for g in real_grades if checker(g)]
479
+ total = len(real_grades)
480
+ pass_count = len(passing)
481
+ grading_data = {
482
+ 'evalId': eval_id,
483
+ 'name': id_to_name.get(eval_id, ''),
484
+ 'totalRuns': total,
485
+ 'passed': pass_count,
486
+ 'failed': total - pass_count,
487
+ 'passRate': round(pass_count / total, 3) if total else 0,
488
+ 'expectations': e.get('expectations', []),
489
+ 'results': [
490
+ {
491
+ 'sessionId': g.get('sessionId', ''),
492
+ 'passed': checker(g),
493
+ 'score': g.get('totalScore'),
494
+ 'evidence': g.get('dimensions', {}),
495
+ }
496
+ for g in real_grades[:20] # limit to 20 for size
497
+ ],
498
+ 'generatedAt': datetime.now(timezone.utc).isoformat(),
499
+ }
500
+ grading_file = evals_output_dir / '{}-grading.json'.format(eval_id)
501
+ try:
502
+ grading_file.write_text(json.dumps(grading_data, indent=2))
503
+ grading_data['_name'] = id_to_name.get(eval_id, '')
504
+ results.append(grading_data)
505
+ run_ids.add(eval_id)
506
+ except Exception:
507
+ pass
508
+
509
+ # Add NOT RUN entries for evals with no grading.json
510
+ for e in evals_def:
511
+ if isinstance(e, dict) and e.get('id') not in run_ids:
512
+ results.append({
513
+ 'evalId': e.get('id'),
514
+ '_name': id_to_name.get(e.get('id', ''), ''),
515
+ '_not_run': True,
516
+ })
517
+ return results
518
+
519
+
520
+ def load_live_session(workspace):
521
+ """Query tasks.db for current active session + last 50 audit entries + token totals."""
522
+ import sqlite3
523
+ db = Path(workspace) / '.cleo' / 'tasks.db'
524
+ if not db.exists():
525
+ return {'session_id': None, 'entries': []}
526
+ try:
527
+ conn = sqlite3.connect(str(db), timeout=5)
528
+ conn.row_factory = sqlite3.Row
529
+ cur = conn.cursor()
530
+
531
+ cur.execute(
532
+ "SELECT id, name, status, started_at FROM sessions "
533
+ "WHERE status='active' ORDER BY started_at DESC LIMIT 1"
534
+ )
535
+ row = cur.fetchone()
536
+ if not row:
537
+ conn.close()
538
+ return {'session_id': None, 'entries': []}
539
+
540
+ session_id = row['id']
541
+ session_name = row['name']
542
+ started_at = row['started_at']
543
+
544
+ # Last 50 audit entries
545
+ cur.execute(
546
+ """SELECT timestamp, domain, operation, source, gateway, duration_ms, success
547
+ FROM audit_log
548
+ WHERE session_id = ?
549
+ ORDER BY timestamp DESC LIMIT 50""",
550
+ (session_id,),
551
+ )
552
+ entries = [
553
+ {
554
+ 'timestamp': r['timestamp'],
555
+ 'domain': r['domain'],
556
+ 'operation': r['operation'],
557
+ 'source': r['source'],
558
+ 'gateway': r['gateway'],
559
+ 'duration_ms': r['duration_ms'],
560
+ 'success': bool(r['success']),
561
+ }
562
+ for r in cur.fetchall()
563
+ ]
564
+
565
+ # Token totals for this session
566
+ token_total = 0
567
+ try:
568
+ cur.execute(
569
+ "SELECT SUM(total_tokens) as total FROM token_usage WHERE session_id = ?",
570
+ (session_id,),
571
+ )
572
+ tok_row = cur.fetchone()
573
+ if tok_row and tok_row['total']:
574
+ token_total = tok_row['total']
575
+ except Exception:
576
+ pass
577
+
578
+ conn.close()
579
+ return {
580
+ 'session_id': session_id,
581
+ 'session_name': session_name,
582
+ 'started_at': started_at,
583
+ 'total_tokens': token_total,
584
+ 'entries': entries,
585
+ }
586
+ except Exception as e:
587
+ return {'session_id': None, 'entries': [], 'error': str(e)}
588
+
589
+
590
+ def load_session_detail(workspace, session_id):
591
+ """Load audit entries + token data + full session row for a specific session."""
592
+ import sqlite3
593
+ db = Path(workspace) / '.cleo' / 'tasks.db'
594
+ if not db.exists() or not session_id:
595
+ return {'entries': [], 'tokens': {}, 'session': {}}
596
+ try:
597
+ conn = sqlite3.connect(str(db), timeout=5)
598
+ conn.row_factory = sqlite3.Row
599
+ cur = conn.cursor()
600
+
601
+ # Get full session row
602
+ cur.execute("""
603
+ SELECT id, name, status, scope_json, notes_json, tasks_completed_json, tasks_created_json,
604
+ handoff_json, debrief_json, stats_json, started_at, ended_at,
605
+ previous_session_id, next_session_id, agent_identifier,
606
+ handoff_consumed_at, resume_count, grade_mode
607
+ FROM sessions WHERE id = ?
608
+ """, (session_id,))
609
+ srow = cur.fetchone()
610
+ session_info = {}
611
+ if srow:
612
+ srow = dict(srow)
613
+ def parse_j(v, default=None):
614
+ if not v: return default
615
+ try: return json.loads(v)
616
+ except Exception: return default
617
+ session_info = {
618
+ 'id': srow['id'],
619
+ 'name': srow['name'],
620
+ 'status': srow['status'],
621
+ 'scope': parse_j(srow['scope_json'], {}),
622
+ 'notes': parse_j(srow['notes_json'], []),
623
+ 'tasksCompleted': parse_j(srow['tasks_completed_json'], []),
624
+ 'tasksCreated': parse_j(srow['tasks_created_json'], []),
625
+ 'handoff': parse_j(srow['handoff_json']),
626
+ 'debrief': parse_j(srow['debrief_json']),
627
+ 'stats': parse_j(srow['stats_json'], {}),
628
+ 'startedAt': srow['started_at'],
629
+ 'endedAt': srow['ended_at'],
630
+ 'previousSessionId': srow['previous_session_id'],
631
+ 'nextSessionId': srow['next_session_id'],
632
+ 'agentIdentifier': srow['agent_identifier'],
633
+ 'handoffConsumedAt': srow['handoff_consumed_at'],
634
+ 'resumeCount': srow['resume_count'] or 0,
635
+ 'gradeMode': bool(srow['grade_mode']),
636
+ }
637
+
638
+ cur.execute(
639
+ """SELECT timestamp, domain, operation, source, gateway, duration_ms, success
640
+ FROM audit_log
641
+ WHERE session_id = ?
642
+ ORDER BY timestamp DESC LIMIT 500""",
643
+ (session_id,),
644
+ )
645
+ entries = [
646
+ {
647
+ 'timestamp': r['timestamp'],
648
+ 'domain': r['domain'],
649
+ 'operation': r['operation'],
650
+ 'source': r['source'],
651
+ 'gateway': r['gateway'],
652
+ 'duration_ms': r['duration_ms'],
653
+ 'success': bool(r['success']),
654
+ }
655
+ for r in cur.fetchall()
656
+ ]
657
+
658
+ tokens = {}
659
+ try:
660
+ cur.execute(
661
+ """SELECT SUM(total_tokens) as total_tokens,
662
+ SUM(input_tokens) as input_tokens,
663
+ SUM(output_tokens) as output_tokens,
664
+ COUNT(*) as records
665
+ FROM token_usage WHERE session_id = ?""",
666
+ (session_id,),
667
+ )
668
+ row = cur.fetchone()
669
+ if row:
670
+ tokens = dict(row)
671
+ except Exception:
672
+ pass
673
+
674
+ conn.close()
675
+ return {'entries': entries, 'tokens': tokens, 'session': session_info}
676
+ except Exception:
677
+ return {'entries': [], 'tokens': {}, 'session': {}}
678
+
679
+
680
+ def enrich_grades_with_tokens(grades, workspace):
681
+ """Attach _tokenMeta from token_usage table where sessionId matches."""
682
+ import sqlite3
683
+ db = Path(workspace) / '.cleo' / 'tasks.db'
684
+ if not db.exists():
685
+ return grades
686
+ try:
687
+ conn = sqlite3.connect(str(db), timeout=5)
688
+ conn.row_factory = sqlite3.Row
689
+ cur = conn.cursor()
690
+ cur.execute("""
691
+ SELECT session_id, SUM(total_tokens) as total_tokens,
692
+ SUM(input_tokens) as input_tokens, SUM(output_tokens) as output_tokens,
693
+ COUNT(*) as records,
694
+ MAX(confidence) as confidence, MAX(method) as method
695
+ FROM token_usage GROUP BY session_id
696
+ """)
697
+ token_map = {row['session_id']: dict(row) for row in cur.fetchall()}
698
+ conn.close()
699
+ for g in grades:
700
+ sid = g.get('sessionId')
701
+ if sid and sid in token_map:
702
+ t = token_map[sid]
703
+ g['_tokenMeta'] = {
704
+ 'total_tokens': t['total_tokens'],
705
+ 'input_tokens': t['input_tokens'],
706
+ 'output_tokens': t['output_tokens'],
707
+ 'confidence': t['confidence'] or 'coarse',
708
+ 'method': t['method'] or 'heuristic',
709
+ 'records': t['records'],
710
+ }
711
+ except Exception:
712
+ pass
713
+ return grades
714
+
715
+
716
+ # ---------------------------------------------------------------------------
717
+ # Canonical operation list (for operation matrix)
718
+ # ---------------------------------------------------------------------------
719
+
720
+ def _canonical_ops():
721
+ """Return list of (op, domain, tier, gateway) for all canonical ops."""
722
+ ops = []
723
+ def add(domain, tier, gateway, names):
724
+ for n in names:
725
+ ops.append({
726
+ 'operation': domain + '.' + n,
727
+ 'domain': domain,
728
+ 'tier': tier,
729
+ 'gateway': gateway,
730
+ })
731
+
732
+ add('tasks', 0, 'query', ['find', 'show', 'list', 'tree', 'plan', 'exists'])
733
+ add('tasks', 0, 'mutate', ['add', 'update', 'complete', 'cancel', 'archive', 'restore', 'relates', 'depends', 'history'])
734
+ add('session', 0, 'query', ['status', 'list', 'briefing.show', 'handoff.show', 'context.drift'])
735
+ add('session', 0, 'mutate', ['start', 'end', 'decision.log', 'record.decision', 'context.inject'])
736
+ add('admin', 0, 'query', ['dash', 'health', 'help', 'stats', 'doctor', 'grade', 'grade.list', 'adr.find', 'token'])
737
+ add('check', 0, 'query', ['grade', 'grade.list'])
738
+ add('memory', 1, 'query', ['find', 'timeline', 'fetch', 'pattern.find', 'learning.find'])
739
+ add('memory', 1, 'mutate', ['observe'])
740
+ add('tools', 1, 'query', ['skill.list', 'skill.show', 'provider.list', 'provider.show'])
741
+ add('check', 1, 'query', ['health', 'schema', 'compliance'])
742
+ add('pipeline', 1, 'query', ['stage.status', 'manifest.list'])
743
+ add('pipeline', 1, 'mutate', ['stage.record', 'stage.gate.pass', 'stage.validate', 'manifest.add', 'manifest.remove'])
744
+ add('orchestrate', 2, 'query', ['analyze', 'ready', 'next'])
745
+ add('orchestrate', 2, 'mutate', ['spawn', 'start'])
746
+ add('nexus', 2, 'query', ['status', 'project.list', 'project.show'])
747
+ add('nexus', 2, 'mutate', ['project.add'])
748
+ add('sticky', 2, 'query', ['list', 'show'])
749
+ add('sticky', 2, 'mutate', ['add', 'convert', 'archive', 'purge'])
750
+ return ops
751
+
752
+
753
+ def build_operation_matrix(op_stats):
754
+ """Build the canonical op list as an array, overlaid with computed stats."""
755
+ # Start with canonical ops
756
+ matrix_map = {}
757
+ for co in _canonical_ops():
758
+ key = co['operation']
759
+ matrix_map[key] = {
760
+ 'operation': key,
761
+ 'domain': co['domain'],
762
+ 'tier': co['tier'],
763
+ 'gateway': co['gateway'],
764
+ 'mcp_calls': 0,
765
+ 'cli_calls': 0,
766
+ 'avg_mcp_ms': None,
767
+ 'avg_cli_ms': None,
768
+ 'tested': False,
769
+ }
770
+
771
+ # Overlay stats from grade runs
772
+ for op_key, d in op_stats.items():
773
+ if op_key not in matrix_map:
774
+ parts = op_key.split('.', 1)
775
+ matrix_map[op_key] = {
776
+ 'operation': op_key,
777
+ 'domain': parts[0] if parts else 'unknown',
778
+ 'tier': 0,
779
+ 'gateway': 'query',
780
+ 'mcp_calls': 0,
781
+ 'cli_calls': 0,
782
+ 'avg_mcp_ms': None,
783
+ 'avg_cli_ms': None,
784
+ 'tested': False,
785
+ }
786
+ mcp_calls = d.get('mcp_calls', 0) or 0
787
+ cli_calls = d.get('cli_calls', 0) or 0
788
+ matrix_map[op_key]['mcp_calls'] = mcp_calls
789
+ matrix_map[op_key]['cli_calls'] = cli_calls
790
+ matrix_map[op_key]['avg_mcp_ms'] = d.get('avg_mcp_ms')
791
+ matrix_map[op_key]['avg_cli_ms'] = d.get('avg_cli_ms')
792
+ matrix_map[op_key]['tested'] = bool(mcp_calls or cli_calls)
793
+
794
+ # Return as sorted array (HTML viewer expects array)
795
+ return sorted(matrix_map.values(), key=lambda o: o['operation'])
796
+
797
+
798
+ def build_api_surface():
799
+ """Return current canonical + compatibility grade analytics API guidance."""
800
+ return {
801
+ 'canonical': {
802
+ 'query': [
803
+ 'check.grade',
804
+ 'check.grade.list',
805
+ 'admin.token?action=summary',
806
+ 'admin.token?action=list',
807
+ 'admin.token?action=show',
808
+ ],
809
+ 'mutate': [
810
+ 'admin.token?action=record',
811
+ 'admin.token?action=delete',
812
+ 'admin.token?action=clear',
813
+ ],
814
+ },
815
+ 'compatibility': {
816
+ 'query': [
817
+ 'admin.grade',
818
+ 'admin.grade.list',
819
+ 'admin.token.summary',
820
+ 'admin.token.list',
821
+ 'admin.token.show',
822
+ ],
823
+ 'mutate': [
824
+ 'admin.token.record',
825
+ 'admin.token.delete',
826
+ 'admin.token.clear',
827
+ ],
828
+ },
829
+ 'handlerOnly': [
830
+ 'admin.grade.run.list',
831
+ 'admin.grade.run.show',
832
+ ],
833
+ 'planned': [
834
+ 'admin.grade.run.slot.show',
835
+ 'admin.grade.run.timing.list',
836
+ 'admin.grade.run.timing.show',
837
+ 'admin.grade.run.comparison.list',
838
+ 'admin.grade.run.comparison.show',
839
+ 'admin.grade.run.analysis.list',
840
+ 'admin.grade.run.analysis.show',
841
+ 'admin.grade.run.summary.show',
842
+ 'admin.grade.eval.list',
843
+ 'admin.grade.eval.show',
844
+ ],
845
+ 'webApi': {
846
+ 'queryEndpoint': '/api/query',
847
+ 'mutateEndpoint': '/api/mutate',
848
+ 'lafsHeaders': [
849
+ 'X-Cleo-Request-Id',
850
+ 'X-Cleo-Exit-Code',
851
+ 'X-Cleo-Transport',
852
+ 'X-Cleo-Operation',
853
+ 'X-Cleo-Domain',
854
+ ],
855
+ 'transportAliasNote': 'Treat persisted transport=api as equivalent to http during the compatibility window.',
856
+ },
857
+ }
858
+
859
+
860
+ # ---------------------------------------------------------------------------
861
+ # Embedded data builder
862
+ # ---------------------------------------------------------------------------
863
+
864
+ def build_embedded_data(workspace, skill_dir=None):
865
+ """Build the full embedded data dict for the viewer (9 keys)."""
866
+ workspace = Path(workspace).resolve()
867
+
868
+ # 1. Grades
869
+ grades_path = workspace / '.cleo' / 'metrics' / 'GRADES.jsonl'
870
+ grades = load_grades_jsonl(str(grades_path))
871
+
872
+ # 2. Sessions (from SQLite)
873
+ sessions = load_sessions(workspace)
874
+
875
+ # 3. Grade runs (manifests + summaries)
876
+ grade_runs = load_grade_runs(workspace)
877
+ ab_results = grade_runs[0]['summary'] if grade_runs and grade_runs[0].get('summary') else {}
878
+ ab_history = [r['manifest'] for r in grade_runs]
879
+
880
+ # 4. Token analysis (from SQLite)
881
+ token_analysis = load_token_analysis(workspace)
882
+
883
+ # 5. Operation matrix (canonical ops + grade-run stats)
884
+ op_stats = compute_per_operation_stats(workspace)
885
+ operation_matrix = build_operation_matrix(op_stats)
886
+
887
+ # 6. Eval report
888
+ eval_report = load_eval_report(workspace, skill_dir)
889
+
890
+ # 7. Live session
891
+ live_session = load_live_session(workspace)
892
+
893
+ # 8. Enrich grades with token metadata from token_usage
894
+ grades = enrich_grades_with_tokens(grades, workspace)
895
+
896
+ # 9. API surface guidance
897
+ api_surface = build_api_surface()
898
+
899
+ # 10. Grade summary stats
900
+ def compute_grade_summary(grades_list, sessions_list):
901
+ if not grades_list:
902
+ return {'total': 0, 'graded': 0, 'avgScore': None, 'distribution': {}}
903
+ scores = [g.get('totalScore') for g in grades_list if g.get('totalScore') is not None]
904
+ by_letter = {}
905
+ for sc in scores:
906
+ letter = scoreToLetter(sc)
907
+ by_letter[letter] = by_letter.get(letter, 0) + 1
908
+ graded_sessions = sum(1 for s in sessions_list if s.get('gradeScore') is not None)
909
+ return {
910
+ 'total': len(grades_list),
911
+ 'graded': graded_sessions,
912
+ 'avgScore': round(sum(scores) / len(scores), 1) if scores else None,
913
+ 'maxScore': max(scores) if scores else None,
914
+ 'minScore': min(scores) if scores else None,
915
+ 'distribution': by_letter,
916
+ }
917
+
918
+ grade_summary = compute_grade_summary(grades, sessions)
919
+
920
+ return {
921
+ 'grades': grades,
922
+ 'sessions': sessions,
923
+ 'ab_results': ab_results,
924
+ 'ab_history': ab_history,
925
+ 'token_analysis': token_analysis,
926
+ 'operation_matrix': operation_matrix,
927
+ 'eval_report': {'evals': eval_report},
928
+ 'grade_summary': grade_summary,
929
+ 'live_session': live_session,
930
+ 'api_surface': api_surface,
931
+ 'metadata': {
932
+ 'generated_at': datetime.now(timezone.utc).isoformat(),
933
+ 'workspace': str(workspace),
934
+ 'skill_version': '1.1.0',
935
+ 'grade_count': len(grades),
936
+ 'session_count': len(sessions),
937
+ },
938
+ }
939
+
940
+
941
+ # ---------------------------------------------------------------------------
942
+ # HTML generator
943
+ # ---------------------------------------------------------------------------
944
+
945
+ def generate_html(data, template):
946
+ """Embed data into grade-review.html template. Returns full HTML string."""
947
+ data_json = json.dumps(data, ensure_ascii=False, default=str)
948
+ return template.replace('{{EMBEDDED_DATA}}', data_json)
949
+
950
+
951
+ # ---------------------------------------------------------------------------
952
+ # HTTP server
953
+ # ---------------------------------------------------------------------------
954
+
955
+ def _kill_port(port):
956
+ try:
957
+ result = subprocess.run(
958
+ ['lsof', '-ti', ':{}'.format(port)],
959
+ capture_output=True, text=True, timeout=5,
960
+ )
961
+ for pid_str in result.stdout.strip().split('\n'):
962
+ if pid_str.strip():
963
+ try:
964
+ os.kill(int(pid_str.strip()), signal.SIGTERM)
965
+ except (ProcessLookupError, ValueError):
966
+ pass
967
+ if result.stdout.strip():
968
+ time.sleep(0.5)
969
+ except (subprocess.TimeoutExpired, FileNotFoundError):
970
+ pass
971
+
972
+
973
+ class GradeReviewHandler(BaseHTTPRequestHandler):
974
+ """Serves the grade review HTML and JSON API endpoints."""
975
+
976
+ def do_GET(self):
977
+ parsed = urlparse(self.path)
978
+ path = parsed.path
979
+
980
+ if path in ('/', '', '/index.html'):
981
+ self._serve_main()
982
+ elif path == '/live-data':
983
+ self._serve_live_data()
984
+ elif path.startswith('/sessions-data'):
985
+ self._serve_session_data(parsed)
986
+ else:
987
+ self.send_error(404)
988
+
989
+ def _serve_main(self):
990
+ try:
991
+ data = build_embedded_data(self.server.workspace, self.server.skill_dir)
992
+ html = generate_html(data, self.server.template)
993
+ content = html.encode('utf-8')
994
+ self.send_response(200)
995
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
996
+ self.send_header('Content-Length', str(len(content)))
997
+ self.end_headers()
998
+ self.wfile.write(content)
999
+ except Exception as e:
1000
+ msg = 'Error generating review: {}'.format(e).encode('utf-8')
1001
+ self.send_response(500)
1002
+ self.send_header('Content-Type', 'text/plain')
1003
+ self.send_header('Content-Length', str(len(msg)))
1004
+ self.end_headers()
1005
+ self.wfile.write(msg)
1006
+
1007
+ def _serve_live_data(self):
1008
+ try:
1009
+ live = load_live_session(self.server.workspace)
1010
+ body = json.dumps({'live_session': live}, default=str).encode('utf-8')
1011
+ self.send_response(200)
1012
+ self.send_header('Content-Type', 'application/json')
1013
+ self.send_header('Content-Length', str(len(body)))
1014
+ self.send_header('Cache-Control', 'no-cache')
1015
+ self.end_headers()
1016
+ self.wfile.write(body)
1017
+ except Exception as e:
1018
+ err = json.dumps({'error': str(e), 'live_session': {'session_id': None, 'entries': []}}).encode('utf-8')
1019
+ self.send_response(500)
1020
+ self.send_header('Content-Type', 'application/json')
1021
+ self.send_header('Content-Length', str(len(err)))
1022
+ self.end_headers()
1023
+ self.wfile.write(err)
1024
+
1025
+ def _serve_session_data(self, parsed):
1026
+ try:
1027
+ params = parse_qs(parsed.query)
1028
+ session_id = params.get('sessionId', [None])[0]
1029
+ data = load_session_detail(self.server.workspace, session_id)
1030
+ body = json.dumps(data, default=str).encode('utf-8')
1031
+ self.send_response(200)
1032
+ self.send_header('Content-Type', 'application/json')
1033
+ self.send_header('Content-Length', str(len(body)))
1034
+ self.send_header('Cache-Control', 'no-cache')
1035
+ self.end_headers()
1036
+ self.wfile.write(body)
1037
+ except Exception as e:
1038
+ err = json.dumps({'error': str(e), 'entries': [], 'tokens': {}}).encode('utf-8')
1039
+ self.send_response(500)
1040
+ self.send_header('Content-Type', 'application/json')
1041
+ self.send_header('Content-Length', str(len(err)))
1042
+ self.end_headers()
1043
+ self.wfile.write(err)
1044
+
1045
+ def log_message(self, fmt, *args):
1046
+ pass # suppress access logs
1047
+
1048
+
1049
+ # ---------------------------------------------------------------------------
1050
+ # Main
1051
+ # ---------------------------------------------------------------------------
1052
+
1053
+ def main():
1054
+ parser = argparse.ArgumentParser(description='CLEO Grade Review viewer (v1.1 API-aware)')
1055
+ parser.add_argument('workspace', type=Path, help='Project or results directory to scan')
1056
+ parser.add_argument('--port', '-p', type=int, default=3118)
1057
+ parser.add_argument('--static', '-s', type=Path, default=None,
1058
+ help='Write standalone HTML to this path instead of serving')
1059
+ parser.add_argument('--skill-dir', default=None,
1060
+ help='Override skill directory (default: auto-detect from __file__)')
1061
+ parser.add_argument('--no-browser', action='store_true', help='Do not auto-open browser')
1062
+ args = parser.parse_args()
1063
+
1064
+ workspace = args.workspace.resolve()
1065
+ if not workspace.exists():
1066
+ print('ERROR: workspace does not exist: {}'.format(workspace), file=sys.stderr)
1067
+ sys.exit(1)
1068
+
1069
+ # Auto-detect skill_dir: generator is in eval-viewer/, skill root is one level up
1070
+ skill_dir = args.skill_dir or str(Path(__file__).parent.parent)
1071
+
1072
+ # Load template once
1073
+ template_path = Path(__file__).parent / 'grade-review.html'
1074
+ if not template_path.exists():
1075
+ print('ERROR: grade-review.html not found at {}'.format(template_path), file=sys.stderr)
1076
+ sys.exit(1)
1077
+ template = template_path.read_text(encoding='utf-8')
1078
+
1079
+ data = build_embedded_data(workspace, skill_dir)
1080
+
1081
+ grade_count = len(data.get('grades', []))
1082
+ session_count = len(data.get('sessions', []))
1083
+ eval_count = len(data.get('eval_report', {}).get('evals', []))
1084
+ op_count = len(data.get('operation_matrix', []))
1085
+
1086
+ print('\n ct-grade Review Viewer', file=sys.stderr)
1087
+ print(' {}'.format('\u2500' * 40), file=sys.stderr)
1088
+ print(' Workspace : {}'.format(workspace), file=sys.stderr)
1089
+ print(' Grades : {}'.format(grade_count), file=sys.stderr)
1090
+ print(' Sessions : {}'.format(session_count), file=sys.stderr)
1091
+ print(' Eval reports : {}'.format(eval_count), file=sys.stderr)
1092
+ print(' Op matrix : {} operations'.format(op_count), file=sys.stderr)
1093
+
1094
+ if not grade_count and not session_count:
1095
+ print('\n WARNING: No data found. Run a grading scenario first.', file=sys.stderr)
1096
+
1097
+ if args.static:
1098
+ html = generate_html(data, template)
1099
+ args.static.parent.mkdir(parents=True, exist_ok=True)
1100
+ args.static.write_text(html, encoding='utf-8')
1101
+ print('\n Static viewer written to: {}'.format(args.static), file=sys.stderr)
1102
+ sys.exit(0)
1103
+
1104
+ port = args.port
1105
+ _kill_port(port)
1106
+
1107
+ try:
1108
+ server = HTTPServer(('127.0.0.1', port), GradeReviewHandler)
1109
+ except OSError:
1110
+ server = HTTPServer(('127.0.0.1', 0), GradeReviewHandler)
1111
+ port = server.server_address[1]
1112
+
1113
+ # Attach workspace and template to server for handler access
1114
+ server.workspace = workspace
1115
+ server.skill_dir = skill_dir
1116
+ server.template = template
1117
+
1118
+ url = 'http://localhost:{}'.format(port)
1119
+ print(' URL : {}'.format(url), file=sys.stderr)
1120
+ print('\n Refreshing the browser re-scans the workspace for new results.', file=sys.stderr)
1121
+ print(' /live-data and /sessions-data?sessionId=X for JSON API.', file=sys.stderr)
1122
+ print(' Press Ctrl+C to stop.\n', file=sys.stderr)
1123
+
1124
+ def handle_sigint(sig, frame):
1125
+ print('\nStopped.', file=sys.stderr)
1126
+ server.server_close()
1127
+ sys.exit(0)
1128
+
1129
+ signal.signal(signal.SIGINT, handle_sigint)
1130
+
1131
+ if not args.no_browser:
1132
+ webbrowser.open(url)
1133
+
1134
+ server.serve_forever()
1135
+
1136
+
1137
+ if __name__ == '__main__':
1138
+ main()