@cleocode/cleo 2026.3.20 → 2026.3.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +46156 -42192
- package/dist/cli/index.js.map +4 -4
- package/dist/mcp/index.js +37601 -36260
- package/dist/mcp/index.js.map +4 -4
- package/drizzle-brain.config.ts +7 -0
- package/drizzle-nexus.config.ts +7 -0
- package/drizzle-tasks.config.ts +7 -0
- package/migrations/drizzle-brain/20260301230215_workable_spitfire/migration.sql +68 -0
- package/migrations/drizzle-brain/20260301230215_workable_spitfire/snapshot.json +651 -0
- package/migrations/drizzle-brain/20260302050325_unknown_justin_hammer/migration.sql +23 -0
- package/migrations/drizzle-brain/20260302050325_unknown_justin_hammer/snapshot.json +884 -0
- package/migrations/drizzle-brain/20260302061755_unusual_jamie_braddock/migration.sql +2 -0
- package/migrations/drizzle-brain/20260302061755_unusual_jamie_braddock/snapshot.json +908 -0
- package/migrations/drizzle-brain/20260302193548_luxuriant_glorian/migration.sql +20 -0
- package/migrations/drizzle-brain/20260302193548_luxuriant_glorian/snapshot.json +1078 -0
- package/migrations/drizzle-brain/20260304045002_white_thunderbolt_ross/migration.sql +16 -0
- package/migrations/drizzle-brain/20260304045002_white_thunderbolt_ross/snapshot.json +1233 -0
- package/migrations/drizzle-nexus/20260305070805_quick_ted_forrester/migration.sql +46 -0
- package/migrations/drizzle-nexus/20260305070805_quick_ted_forrester/snapshot.json +461 -0
- package/migrations/drizzle-tasks/20260308024513_oval_king_bedlam/migration.sql +32 -0
- package/migrations/drizzle-tasks/20260308024513_oval_king_bedlam/snapshot.json +3727 -0
- package/package.json +22 -5
- package/packages/ct-skills/skills/ct-cleo/SKILL.md +344 -81
- package/packages/ct-skills/skills/ct-grade/SKILL.md +20 -4
- package/packages/ct-skills/skills/ct-grade/agents/analysis-reporter.md +203 -0
- package/packages/ct-skills/skills/ct-grade/agents/blind-comparator.md +157 -0
- package/packages/ct-skills/skills/ct-grade/agents/scenario-runner.md +134 -0
- package/packages/ct-skills/skills/ct-grade/eval-viewer/generate_grade_review.py +1138 -0
- package/packages/ct-skills/skills/ct-grade/eval-viewer/generate_grade_viewer.py +544 -0
- package/packages/ct-skills/skills/ct-grade/eval-viewer/generate_review.py +283 -0
- package/packages/ct-skills/skills/ct-grade/eval-viewer/grade-review.html +1574 -0
- package/packages/ct-skills/skills/ct-grade/eval-viewer/viewer.html +219 -0
- package/packages/ct-skills/skills/ct-grade/evals/evals.json +94 -0
- package/packages/ct-skills/skills/ct-grade/references/ab-test-methodology.md +150 -0
- package/packages/ct-skills/skills/ct-grade/references/domains.md +137 -0
- package/packages/ct-skills/skills/ct-grade/references/grade-spec.md +236 -0
- package/packages/ct-skills/skills/ct-grade/references/scenario-playbook.md +234 -0
- package/packages/ct-skills/skills/ct-grade/references/token-tracking.md +120 -0
- package/packages/ct-skills/skills/ct-grade/scripts/audit_analyzer.py +279 -0
- package/packages/ct-skills/skills/ct-grade/scripts/generate_report.py +283 -0
- package/packages/ct-skills/skills/ct-grade/scripts/run_ab_test.py +504 -0
- package/packages/ct-skills/skills/ct-grade/scripts/run_all.py +287 -0
- package/packages/ct-skills/skills/ct-grade/scripts/setup_run.py +183 -0
- package/packages/ct-skills/skills/ct-grade/scripts/token_tracker.py +630 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/SKILL.md +237 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/agents/analysis-reporter.md +203 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/agents/blind-comparator.md +157 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/agents/scenario-runner.md +179 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/evals/evals.json +74 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/grade-viewer/build_op_stats.py +174 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/grade-viewer/eval-analysis.json +41 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/grade-viewer/eval-report.md +34 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/grade-viewer/generate_grade_review.py +1023 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/grade-viewer/generate_grade_viewer.py +548 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/grade-viewer/grade-review-eval.html +613 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/grade-viewer/grade-review.html +1532 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/grade-viewer/viewer.html +620 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/manifest-entry.json +31 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/references/ab-testing.md +233 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/references/domains-ssot.md +156 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/references/grade-spec-v2.md +167 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/references/playbook-v2.md +393 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/references/token-tracking.md +202 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/scripts/generate_report.py +419 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/scripts/run_ab_test.py +493 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/scripts/run_scenario.py +396 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/scripts/setup_run.py +207 -0
- package/packages/ct-skills/skills/ct-grade-v2-1/scripts/token_tracker.py +175 -0
- package/packages/ct-skills/skills/ct-orchestrator/SKILL.md +1 -29
- package/packages/ct-skills/skills/ct-orchestrator/manifest-entry.json +19 -0
- package/packages/ct-skills/skills/ct-skill-creator/.cleo/.context-state.json +13 -0
- package/packages/ct-skills/skills/ct-skill-creator/.cleo/tasks.db +0 -0
- package/packages/ct-skills/skills/ct-skill-creator/SKILL.md +0 -12
- package/packages/ct-skills/skills/ct-skill-creator/agents/analyzer.md +276 -0
- package/packages/ct-skills/skills/ct-skill-creator/agents/comparator.md +204 -0
- package/packages/ct-skills/skills/ct-skill-creator/agents/grader.md +225 -0
- package/packages/ct-skills/skills/ct-skill-creator/assets/eval_review.html +146 -0
- package/packages/ct-skills/skills/ct-skill-creator/eval-viewer/generate_review.py +471 -0
- package/packages/ct-skills/skills/ct-skill-creator/eval-viewer/viewer.html +1325 -0
- package/packages/ct-skills/skills/ct-skill-creator/manifest-entry.json +17 -0
- package/packages/ct-skills/skills/ct-skill-creator/references/dynamic-context.md +228 -0
- package/packages/ct-skills/skills/ct-skill-creator/references/frontmatter.md +83 -0
- package/packages/ct-skills/skills/ct-skill-creator/references/invocation-control.md +165 -0
- package/packages/ct-skills/skills/ct-skill-creator/references/provider-deployment.md +175 -0
- package/packages/ct-skills/skills/ct-skill-creator/references/schemas.md +430 -0
- package/packages/ct-skills/skills/ct-skill-creator/scripts/__init__.py +1 -0
- package/packages/ct-skills/skills/ct-skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/packages/ct-skills/skills/ct-skill-creator/scripts/generate_report.py +326 -0
- package/packages/ct-skills/skills/ct-skill-creator/scripts/improve_description.py +247 -0
- package/packages/ct-skills/skills/ct-skill-creator/scripts/run_eval.py +310 -0
- package/packages/ct-skills/skills/ct-skill-creator/scripts/run_loop.py +328 -0
- package/packages/ct-skills/skills/ct-skill-creator/scripts/utils.py +47 -0
- package/packages/ct-skills/skills/ct-skill-validator/SKILL.md +178 -0
- package/packages/ct-skills/skills/ct-skill-validator/agents/ecosystem-checker.md +151 -0
- package/packages/ct-skills/skills/ct-skill-validator/assets/valid-skill-example.md +13 -0
- package/packages/ct-skills/skills/ct-skill-validator/evals/eval_set.json +14 -0
- package/packages/ct-skills/skills/ct-skill-validator/evals/evals.json +52 -0
- package/packages/ct-skills/skills/ct-skill-validator/manifest-entry.json +20 -0
- package/packages/ct-skills/skills/ct-skill-validator/references/cleo-ecosystem-rules.md +163 -0
- package/packages/ct-skills/skills/ct-skill-validator/references/validation-rules.md +168 -0
- package/packages/ct-skills/skills/ct-skill-validator/scripts/__init__.py +0 -0
- package/packages/ct-skills/skills/ct-skill-validator/scripts/audit_body.py +242 -0
- package/packages/ct-skills/skills/ct-skill-validator/scripts/check_ecosystem.py +169 -0
- package/packages/ct-skills/skills/ct-skill-validator/scripts/check_manifest.py +172 -0
- package/packages/ct-skills/skills/ct-skill-validator/scripts/generate_validation_report.py +442 -0
- package/packages/ct-skills/skills/ct-skill-validator/scripts/validate.py +422 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260224040019_baseline/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260224040019_baseline/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260224040238_add-audit-log/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260224040238_add-audit-log/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260224144602_closed_grim_reaper/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260224144602_closed_grim_reaper/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260225024442_sync-lifecycle-enums-and-arch-decisions/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260225024442_sync-lifecycle-enums-and-arch-decisions/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227014821_adr-system-and-status-registry/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227014821_adr-system-and-status-registry/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227021231_add-cancelled-pipeline-status/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227021231_add-cancelled-pipeline-status/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227022417_adr-cognitive-search-fields/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227022417_adr-cognitive-search-fields/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227172236_freezing_grey_gargoyle/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227172236_freezing_grey_gargoyle/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227183444_fix-orphaned-parent-ids/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227183444_fix-orphaned-parent-ids/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227183521_parent-id-on-delete-set-null/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227183521_parent-id-on-delete-set-null/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227200430_numerous_mysterio/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227200430_numerous_mysterio/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227235745_add-audit-log-dispatch-columns/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260227235745_add-audit-log-dispatch-columns/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260301053344_careless_changeling/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260301053344_careless_changeling/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260301175940_futuristic_eternity/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260301175940_futuristic_eternity/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260301180528_update-task-relations-check-constraint/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260301180528_update-task-relations-check-constraint/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260302163443_free_silk_fever/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260302163443_free_silk_fever/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260302163457_robust_johnny_storm/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260302163457_robust_johnny_storm/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260302163511_late_sphinx/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260302163511_late_sphinx/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260305011924_cheerful_mongu/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260305011924_cheerful_mongu/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260305203927_demonic_storm/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260305203927_demonic_storm/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260306001243_spooky_rage/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260306001243_spooky_rage/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260306193138_young_morbius/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260306193138_young_morbius/snapshot.json +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260306194959_sticky_captain_flint/migration.sql +0 -0
- /package/{drizzle → migrations/drizzle-tasks}/20260306194959_sticky_captain_flint/snapshot.json +0 -0
|
@@ -0,0 +1,544 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
generate_grade_viewer.py — Generate and serve a visual HTML report for ct-grade A/B runs.
|
|
4
|
+
|
|
5
|
+
Reads all artifacts from a run directory (grade.json, comparison.json, timing.json,
|
|
6
|
+
token-summary.json, analysis.json, report.md) and produces a self-contained HTML
|
|
7
|
+
page with visual score bars, A/B comparison tables, token charts, and recommendations.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
# Serve live at localhost:3119 (refreshes on browser reload):
|
|
11
|
+
python eval-viewer/generate_grade_viewer.py --run-dir ./ab_results/run-001
|
|
12
|
+
|
|
13
|
+
# Write static HTML file:
|
|
14
|
+
python eval-viewer/generate_grade_viewer.py --run-dir ./ab_results/run-001 --static grade-results.html
|
|
15
|
+
|
|
16
|
+
# Different port:
|
|
17
|
+
python eval-viewer/generate_grade_viewer.py --run-dir ./ab_results/run-001 --port 3120
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import argparse
|
|
21
|
+
import json
|
|
22
|
+
import os
|
|
23
|
+
import signal
|
|
24
|
+
import subprocess
|
|
25
|
+
import sys
|
|
26
|
+
import time
|
|
27
|
+
import webbrowser
|
|
28
|
+
from datetime import datetime, timezone
|
|
29
|
+
from functools import partial
|
|
30
|
+
from http.server import HTTPServer, BaseHTTPRequestHandler
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
DIMENSION_LABELS = {
|
|
35
|
+
"sessionDiscipline": "S1 Session Discipline",
|
|
36
|
+
"discoveryEfficiency": "S2 Discovery Efficiency",
|
|
37
|
+
"taskHygiene": "S3 Task Hygiene",
|
|
38
|
+
"errorProtocol": "S4 Error Protocol",
|
|
39
|
+
"disclosureUse": "S5 Progressive Disclosure",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
GRADE_COLORS = {"A": "#22c55e", "B": "#10b981", "C": "#eab308", "D": "#f97316", "F": "#ef4444"}
|
|
43
|
+
DIM_COLORS = ["#6366f1", "#8b5cf6", "#ec4899", "#f59e0b", "#10b981"]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def letter_grade(score):
|
|
47
|
+
if score is None:
|
|
48
|
+
return "?"
|
|
49
|
+
if score >= 90: return "A"
|
|
50
|
+
if score >= 75: return "B"
|
|
51
|
+
if score >= 60: return "C"
|
|
52
|
+
if score >= 45: return "D"
|
|
53
|
+
return "F"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def load_json_file(path):
|
|
57
|
+
try:
|
|
58
|
+
return json.loads(Path(path).read_text())
|
|
59
|
+
except Exception:
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def find_grade_files(run_dir):
|
|
64
|
+
return sorted(Path(run_dir).rglob("grade.json"))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def find_comparison_files(run_dir):
|
|
68
|
+
return sorted(Path(run_dir).rglob("comparison.json"))
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def collect_run_data(run_dir):
|
|
72
|
+
"""Collect all artifacts from the run directory."""
|
|
73
|
+
data = {
|
|
74
|
+
"manifest": load_json_file(Path(run_dir) / "run-manifest.json") or {},
|
|
75
|
+
"token_summary": load_json_file(Path(run_dir) / "token-summary.json") or {},
|
|
76
|
+
"analysis": load_json_file(Path(run_dir) / "analysis.json"),
|
|
77
|
+
"report_md": None,
|
|
78
|
+
"slots": {},
|
|
79
|
+
}
|
|
80
|
+
report_path = Path(run_dir) / "report.md"
|
|
81
|
+
if report_path.exists():
|
|
82
|
+
data["report_md"] = report_path.read_text()
|
|
83
|
+
|
|
84
|
+
# Walk slot/run/arm structure
|
|
85
|
+
for slot_dir in sorted(Path(run_dir).iterdir()):
|
|
86
|
+
if not slot_dir.is_dir() or slot_dir.name.startswith("."):
|
|
87
|
+
continue
|
|
88
|
+
if slot_dir.name in ("run-manifest.json", "token-summary.json", "analysis.json", "report.md"):
|
|
89
|
+
continue
|
|
90
|
+
slot_name = slot_dir.name
|
|
91
|
+
data["slots"][slot_name] = {"runs": {}}
|
|
92
|
+
|
|
93
|
+
for run_subdir in sorted(slot_dir.iterdir()):
|
|
94
|
+
if not run_subdir.is_dir():
|
|
95
|
+
continue
|
|
96
|
+
run_num = run_subdir.name
|
|
97
|
+
run_data = {"arms": {}, "comparison": None}
|
|
98
|
+
|
|
99
|
+
for arm_dir in sorted(run_subdir.iterdir()):
|
|
100
|
+
if not arm_dir.is_dir():
|
|
101
|
+
continue
|
|
102
|
+
arm_name = arm_dir.name
|
|
103
|
+
arm_data = {
|
|
104
|
+
"grade": load_json_file(arm_dir / "grade.json"),
|
|
105
|
+
"timing": load_json_file(arm_dir / "timing.json"),
|
|
106
|
+
"operations": [],
|
|
107
|
+
}
|
|
108
|
+
ops_path = arm_dir / "operations.jsonl"
|
|
109
|
+
if ops_path.exists():
|
|
110
|
+
for line in ops_path.read_text().splitlines():
|
|
111
|
+
line = line.strip()
|
|
112
|
+
if line:
|
|
113
|
+
try:
|
|
114
|
+
arm_data["operations"].append(json.loads(line))
|
|
115
|
+
except Exception:
|
|
116
|
+
pass
|
|
117
|
+
run_data["arms"][arm_name] = arm_data
|
|
118
|
+
|
|
119
|
+
comp = load_json_file(run_subdir / "comparison.json")
|
|
120
|
+
run_data["comparison"] = comp
|
|
121
|
+
data["slots"][slot_name]["runs"][run_num] = run_data
|
|
122
|
+
|
|
123
|
+
return data
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def pct(n, total=20):
|
|
127
|
+
if n is None or total == 0:
|
|
128
|
+
return 0
|
|
129
|
+
return min(100, max(0, round(n / total * 100)))
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def dim_bar(label, score, max_score=20, color="#6366f1"):
|
|
133
|
+
fill = pct(score, max_score)
|
|
134
|
+
score_str = f"{score}/{max_score}" if score is not None else "—"
|
|
135
|
+
return f"""
|
|
136
|
+
<div class="dim-row">
|
|
137
|
+
<div class="dim-label"><span>{esc(label)}</span><span class="dim-score">{esc(score_str)}</span></div>
|
|
138
|
+
<div class="dim-track"><div class="dim-fill" style="width:{fill}%;background:{color}"></div></div>
|
|
139
|
+
</div>"""
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def grade_badge(letter):
|
|
143
|
+
color = GRADE_COLORS.get(letter, "#6b7280")
|
|
144
|
+
return f'<span class="grade-badge" style="background:{color}22;color:{color};border:1px solid {color}44">{esc(letter)}</span>'
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def esc(s):
|
|
148
|
+
if s is None:
|
|
149
|
+
return ""
|
|
150
|
+
return str(s).replace("&", "&").replace("<", "<").replace(">", ">").replace('"', """)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def render_grade_card(arm_label, grade_data, timing_data, color):
|
|
154
|
+
if not grade_data:
|
|
155
|
+
return f'<div class="no-data">No grade data for {esc(arm_label)}</div>'
|
|
156
|
+
total = grade_data.get("totalScore", 0)
|
|
157
|
+
letter = letter_grade(total)
|
|
158
|
+
dims = grade_data.get("dimensions", {})
|
|
159
|
+
flags = grade_data.get("flags", [])
|
|
160
|
+
entry_count = grade_data.get("entryCount", 0)
|
|
161
|
+
|
|
162
|
+
tokens = timing_data.get("total_tokens") if timing_data else None
|
|
163
|
+
token_str = f"{tokens:,}" if tokens else "—"
|
|
164
|
+
|
|
165
|
+
flags_html = ""
|
|
166
|
+
if flags:
|
|
167
|
+
flag_items = "".join(f"<li>{esc(f)}</li>" for f in flags)
|
|
168
|
+
flags_html = f'<div class="flags-section"><div class="flags-label">Flags ({len(flags)})</div><ul class="flags-list">{flag_items}</ul></div>'
|
|
169
|
+
else:
|
|
170
|
+
flags_html = '<div class="flags-section no-flags">No flags — clean session</div>'
|
|
171
|
+
|
|
172
|
+
dim_bars = ""
|
|
173
|
+
for i, (dim_key, dim_label) in enumerate(DIMENSION_LABELS.items()):
|
|
174
|
+
dim_data = dims.get(dim_key, {})
|
|
175
|
+
score = dim_data.get("score")
|
|
176
|
+
dim_bars += dim_bar(dim_label, score, 20, DIM_COLORS[i % len(DIM_COLORS)])
|
|
177
|
+
|
|
178
|
+
evidence_html = ""
|
|
179
|
+
for dim_key, dim_label in DIMENSION_LABELS.items():
|
|
180
|
+
evs = dims.get(dim_key, {}).get("evidence", [])
|
|
181
|
+
if evs:
|
|
182
|
+
items = "".join(f"<li>{esc(e)}</li>" for e in evs)
|
|
183
|
+
evidence_html += f'<div class="ev-dim">{esc(dim_label)}</div><ul class="ev-list">{items}</ul>'
|
|
184
|
+
|
|
185
|
+
return f"""
|
|
186
|
+
<div class="grade-card" style="border-top:3px solid {color}">
|
|
187
|
+
<div class="grade-card-header">
|
|
188
|
+
<div class="arm-label" style="color:{color}">{esc(arm_label)}</div>
|
|
189
|
+
<div class="grade-score-block">
|
|
190
|
+
{grade_badge(letter)}
|
|
191
|
+
<span class="score-text">{esc(str(total))}<span class="score-max">/100</span></span>
|
|
192
|
+
</div>
|
|
193
|
+
<div class="grade-meta">
|
|
194
|
+
<span>{esc(str(entry_count))} audit entries</span>
|
|
195
|
+
<span>Tokens: {esc(token_str)}</span>
|
|
196
|
+
</div>
|
|
197
|
+
</div>
|
|
198
|
+
<div class="dim-bars">{dim_bars}</div>
|
|
199
|
+
{flags_html}
|
|
200
|
+
{f'<div class="evidence-section"><div class="ev-title">Evidence</div>{evidence_html}</div>' if evidence_html else ''}
|
|
201
|
+
</div>"""
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def render_comparison_card(comp_data):
|
|
205
|
+
if not comp_data:
|
|
206
|
+
return ""
|
|
207
|
+
winner = comp_data.get("winner", "?")
|
|
208
|
+
reasoning = comp_data.get("reasoning", "")
|
|
209
|
+
rubric = comp_data.get("rubric", {})
|
|
210
|
+
winner_color = "#22c55e" if winner == "A" else "#f97316" if winner == "B" else "#6b7280"
|
|
211
|
+
winner_label = f"Arm {winner}" if winner in ("A", "B") else "Tie"
|
|
212
|
+
|
|
213
|
+
rows = ""
|
|
214
|
+
for arm_key in ("A", "B"):
|
|
215
|
+
r = rubric.get(arm_key, {})
|
|
216
|
+
overall = r.get("overall_score", "—")
|
|
217
|
+
content = r.get("content_score", "—")
|
|
218
|
+
struct = r.get("structure_score", "—")
|
|
219
|
+
rows += f"<tr><td>Arm {esc(arm_key)}</td><td>{esc(str(overall))}/10</td><td>{esc(str(content))}</td><td>{esc(str(struct))}</td></tr>"
|
|
220
|
+
|
|
221
|
+
exp_rows = ""
|
|
222
|
+
exp_res = comp_data.get("expectation_results", {})
|
|
223
|
+
for arm_key in ("A", "B"):
|
|
224
|
+
er = exp_res.get(arm_key, {})
|
|
225
|
+
pr = er.get("pass_rate", None)
|
|
226
|
+
pr_str = f"{round(pr*100)}%" if pr is not None else "—"
|
|
227
|
+
passed = er.get("passed", "—")
|
|
228
|
+
total = er.get("total", "—")
|
|
229
|
+
exp_rows += f"<tr><td>Arm {esc(arm_key)}</td><td>{esc(str(passed))}/{esc(str(total))}</td><td>{esc(pr_str)}</td></tr>"
|
|
230
|
+
|
|
231
|
+
return f"""
|
|
232
|
+
<div class="comp-card">
|
|
233
|
+
<div class="comp-header">
|
|
234
|
+
<span class="comp-title">Blind Comparison</span>
|
|
235
|
+
<span class="winner-badge" style="background:{winner_color}22;color:{winner_color};border:1px solid {winner_color}44">Winner: {esc(winner_label)}</span>
|
|
236
|
+
</div>
|
|
237
|
+
<div class="reasoning-text">{esc(reasoning)}</div>
|
|
238
|
+
<div class="comp-tables">
|
|
239
|
+
<table class="comp-table">
|
|
240
|
+
<thead><tr><th>Arm</th><th>Overall</th><th>Content</th><th>Structure</th></tr></thead>
|
|
241
|
+
<tbody>{rows}</tbody>
|
|
242
|
+
</table>
|
|
243
|
+
<table class="comp-table">
|
|
244
|
+
<thead><tr><th>Arm</th><th>Expectations</th><th>Pass Rate</th></tr></thead>
|
|
245
|
+
<tbody>{exp_rows}</tbody>
|
|
246
|
+
</table>
|
|
247
|
+
</div>
|
|
248
|
+
</div>"""
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def render_token_summary(token_summary):
|
|
252
|
+
if not token_summary:
|
|
253
|
+
return ""
|
|
254
|
+
by_arm = token_summary.get("by_arm", {})
|
|
255
|
+
delta = token_summary.get("delta_A_vs_B", {})
|
|
256
|
+
warnings = token_summary.get("warnings", [])
|
|
257
|
+
|
|
258
|
+
rows = ""
|
|
259
|
+
for arm, stats in sorted(by_arm.items()):
|
|
260
|
+
iface = stats.get("interface", "?")
|
|
261
|
+
t = stats.get("total_tokens", {})
|
|
262
|
+
mean = t.get("mean")
|
|
263
|
+
sd = t.get("stddev")
|
|
264
|
+
n = t.get("count", 0)
|
|
265
|
+
mean_str = f"{mean:,.0f}" if mean else "—"
|
|
266
|
+
sd_str = f"±{sd:,.0f}" if sd else ""
|
|
267
|
+
rows += f"<tr><td>{esc(arm)}</td><td>{esc(iface)}</td><td>{esc(mean_str)} {esc(sd_str)}</td><td>{esc(str(n))}</td></tr>"
|
|
268
|
+
|
|
269
|
+
delta_html = ""
|
|
270
|
+
if delta and delta.get("mean_tokens"):
|
|
271
|
+
pct_str = delta.get("percent", "")
|
|
272
|
+
note = delta.get("note", "")
|
|
273
|
+
delta_html = f'<div class="delta-row">Delta (A−B): <strong>{esc(pct_str)}</strong> — {esc(note)}</div>'
|
|
274
|
+
|
|
275
|
+
warn_html = ""
|
|
276
|
+
if warnings:
|
|
277
|
+
warn_html = "".join(f'<div class="warn-row">{esc(w)}</div>' for w in warnings)
|
|
278
|
+
|
|
279
|
+
return f"""
|
|
280
|
+
<div class="token-section">
|
|
281
|
+
<div class="section-title">Token Economy</div>
|
|
282
|
+
<table class="comp-table">
|
|
283
|
+
<thead><tr><th>Arm</th><th>Interface</th><th>Mean Tokens</th><th>Runs</th></tr></thead>
|
|
284
|
+
<tbody>{rows}</tbody>
|
|
285
|
+
</table>
|
|
286
|
+
{delta_html}
|
|
287
|
+
{warn_html}
|
|
288
|
+
</div>"""
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def render_slot(slot_name, slot_data):
|
|
292
|
+
runs_html = ""
|
|
293
|
+
for run_num, run_data in sorted(slot_data.get("runs", {}).items()):
|
|
294
|
+
arms_html = ""
|
|
295
|
+
for arm_name, arm_data in sorted(run_data.get("arms", {}).items()):
|
|
296
|
+
arm_color = "#6366f1" if arm_name == "arm-A" else "#f59e0b"
|
|
297
|
+
iface = (arm_data.get("timing") or {}).get("interface", arm_name)
|
|
298
|
+
label = f"{arm_name} ({iface.upper()})"
|
|
299
|
+
arms_html += render_grade_card(label, arm_data.get("grade"), arm_data.get("timing"), arm_color)
|
|
300
|
+
|
|
301
|
+
comp_html = render_comparison_card(run_data.get("comparison"))
|
|
302
|
+
runs_html += f"""
|
|
303
|
+
<div class="run-block">
|
|
304
|
+
<div class="run-label">{esc(run_num)}</div>
|
|
305
|
+
<div class="arms-row">{arms_html}</div>
|
|
306
|
+
{comp_html}
|
|
307
|
+
</div>"""
|
|
308
|
+
|
|
309
|
+
return f"""
|
|
310
|
+
<div class="slot-block">
|
|
311
|
+
<div class="slot-title">{esc(slot_name)}</div>
|
|
312
|
+
{runs_html}
|
|
313
|
+
</div>"""
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def generate_html(run_dir: Path) -> str:
|
|
317
|
+
data = collect_run_data(run_dir)
|
|
318
|
+
manifest = data["manifest"]
|
|
319
|
+
token_summary = data["token_summary"]
|
|
320
|
+
analysis = data["analysis"]
|
|
321
|
+
report_md = data["report_md"]
|
|
322
|
+
|
|
323
|
+
mode = manifest.get("mode", "—")
|
|
324
|
+
created_at = manifest.get("created_at", "—")
|
|
325
|
+
arms_info = manifest.get("arms", {})
|
|
326
|
+
arm_a_label = arms_info.get("A", {}).get("label", "Arm A")
|
|
327
|
+
arm_b_label = arms_info.get("B", {}).get("label", "Arm B")
|
|
328
|
+
|
|
329
|
+
slots_html = ""
|
|
330
|
+
for slot_name, slot_data in sorted(data["slots"].items()):
|
|
331
|
+
slots_html += render_slot(slot_name, slot_data)
|
|
332
|
+
|
|
333
|
+
token_html = render_token_summary(token_summary)
|
|
334
|
+
|
|
335
|
+
analysis_html = ""
|
|
336
|
+
if analysis:
|
|
337
|
+
recs = analysis.get("improvement_suggestions", [])
|
|
338
|
+
rec_items = "".join(
|
|
339
|
+
f'<div class="rec-item" style="border-left:3px solid {"#ef4444" if r.get("priority")=="high" else "#eab308" if r.get("priority")=="medium" else "#6366f1"}">'
|
|
340
|
+
f'<div class="rec-priority">{esc(r.get("priority","").upper())}</div>'
|
|
341
|
+
f'<div class="rec-dim">{esc(r.get("dimension",""))}</div>'
|
|
342
|
+
f'<div class="rec-text">{esc(r.get("suggestion",""))}</div>'
|
|
343
|
+
f'<div class="rec-impact">{esc(r.get("expected_impact",""))}</div>'
|
|
344
|
+
f'</div>'
|
|
345
|
+
for r in recs
|
|
346
|
+
)
|
|
347
|
+
analysis_html = f'<div class="section-title">Recommendations</div>{rec_items}' if rec_items else ""
|
|
348
|
+
|
|
349
|
+
report_html = ""
|
|
350
|
+
if report_md:
|
|
351
|
+
pre_lines = report_md.replace("&", "&").replace("<", "<").replace(">", ">")
|
|
352
|
+
report_html = f'<pre class="report-pre">{pre_lines}</pre>'
|
|
353
|
+
|
|
354
|
+
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
|
355
|
+
return f"""<!DOCTYPE html>
|
|
356
|
+
<html lang="en">
|
|
357
|
+
<head>
|
|
358
|
+
<meta charset="UTF-8">
|
|
359
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
360
|
+
<title>ct-grade Results Viewer</title>
|
|
361
|
+
<style>
|
|
362
|
+
:root {{
|
|
363
|
+
--bg:#0f1117;--surface:#1a1d27;--surface2:#21263a;--border:#2a2f45;
|
|
364
|
+
--text:#e8eaf0;--muted:#6b7280;--accent:#6366f1;--radius:8px;
|
|
365
|
+
--green:#22c55e;--red:#ef4444;--yellow:#eab308;
|
|
366
|
+
}}
|
|
367
|
+
*{{box-sizing:border-box;margin:0;padding:0}}
|
|
368
|
+
body{{font-family:system-ui,sans-serif;background:var(--bg);color:var(--text);min-height:100vh;font-size:14px}}
|
|
369
|
+
.topbar{{background:var(--surface);border-bottom:1px solid var(--border);padding:12px 24px;display:flex;align-items:center;justify-content:space-between}}
|
|
370
|
+
.topbar h1{{font-size:1rem;font-weight:700;letter-spacing:-.01em}}
|
|
371
|
+
.topbar .meta{{font-size:11px;color:var(--muted)}}
|
|
372
|
+
.badge{{background:#3730a322;color:var(--accent);font-size:11px;padding:2px 8px;border-radius:20px;font-weight:600;margin-left:8px}}
|
|
373
|
+
.tabs{{display:flex;gap:0;border-bottom:1px solid var(--border);background:var(--surface);padding:0 24px}}
|
|
374
|
+
.tab{{padding:10px 16px;cursor:pointer;font-size:13px;font-weight:500;color:var(--muted);border-bottom:2px solid transparent;background:none;border-top:none;border-left:none;border-right:none;color:var(--muted)}}
|
|
375
|
+
.tab:hover{{color:var(--text)}}
|
|
376
|
+
.tab.active{{color:var(--accent);border-bottom-color:var(--accent)}}
|
|
377
|
+
.pane{{display:none;padding:24px;max-width:1200px;margin:0 auto}}
|
|
378
|
+
.pane.active{{display:block}}
|
|
379
|
+
.slot-block{{margin-bottom:32px}}
|
|
380
|
+
.slot-title{{font-size:16px;font-weight:700;margin-bottom:16px;padding-bottom:8px;border-bottom:1px solid var(--border)}}
|
|
381
|
+
.run-block{{margin-bottom:24px}}
|
|
382
|
+
.run-label{{font-size:12px;color:var(--muted);font-family:monospace;margin-bottom:10px}}
|
|
383
|
+
.arms-row{{display:grid;grid-template-columns:1fr 1fr;gap:16px;margin-bottom:12px}}
|
|
384
|
+
.grade-card{{background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:16px}}
|
|
385
|
+
.grade-card-header{{display:flex;align-items:center;gap:12px;margin-bottom:12px;flex-wrap:wrap}}
|
|
386
|
+
.arm-label{{font-size:13px;font-weight:700}}
|
|
387
|
+
.grade-score-block{{display:flex;align-items:center;gap:8px}}
|
|
388
|
+
.grade-badge{{font-size:20px;font-weight:700;padding:2px 10px;border-radius:6px}}
|
|
389
|
+
.score-text{{font-size:22px;font-weight:700}}
|
|
390
|
+
.score-max{{font-size:13px;color:var(--muted);font-weight:400}}
|
|
391
|
+
.grade-meta{{font-size:11px;color:var(--muted);display:flex;gap:12px;margin-left:auto}}
|
|
392
|
+
.dim-row{{margin-bottom:8px}}
|
|
393
|
+
.dim-label{{display:flex;justify-content:space-between;font-size:11px;color:var(--muted);margin-bottom:3px}}
|
|
394
|
+
.dim-score{{font-weight:600;color:var(--text)}}
|
|
395
|
+
.dim-track{{height:6px;background:var(--surface2);border-radius:3px;overflow:hidden}}
|
|
396
|
+
.dim-fill{{height:100%;border-radius:3px;transition:width .4s}}
|
|
397
|
+
.flags-section{{margin-top:12px;padding:8px 10px;border-radius:4px;background:rgba(239,68,68,.08);border:1px solid rgba(239,68,68,.2)}}
|
|
398
|
+
.no-flags{{background:rgba(34,197,94,.08);border:1px solid rgba(34,197,94,.2);color:var(--green);font-size:12px}}
|
|
399
|
+
.flags-label{{font-size:11px;font-weight:700;color:var(--red);margin-bottom:6px}}
|
|
400
|
+
.flags-list{{padding-left:16px;font-size:12px;color:var(--red);line-height:1.8}}
|
|
401
|
+
.evidence-section{{margin-top:10px}}
|
|
402
|
+
.ev-title{{font-size:11px;font-weight:700;color:var(--muted);margin-bottom:6px}}
|
|
403
|
+
.ev-dim{{font-size:11px;font-weight:600;color:var(--accent);margin-top:6px}}
|
|
404
|
+
.ev-list{{padding-left:16px;font-size:11px;color:var(--muted);line-height:1.8}}
|
|
405
|
+
.comp-card{{background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:16px;margin-top:12px}}
|
|
406
|
+
.comp-header{{display:flex;align-items:center;justify-content:space-between;margin-bottom:10px}}
|
|
407
|
+
.comp-title{{font-size:13px;font-weight:700}}
|
|
408
|
+
.winner-badge{{font-size:13px;font-weight:700;padding:4px 12px;border-radius:20px}}
|
|
409
|
+
.reasoning-text{{font-size:12px;color:var(--muted);line-height:1.6;margin-bottom:12px}}
|
|
410
|
+
.comp-tables{{display:grid;grid-template-columns:1fr 1fr;gap:12px}}
|
|
411
|
+
.comp-table{{width:100%;border-collapse:collapse;font-size:12px}}
|
|
412
|
+
.comp-table th,.comp-table td{{padding:6px 10px;border:1px solid var(--border);text-align:left}}
|
|
413
|
+
.comp-table th{{background:var(--surface2);font-weight:600}}
|
|
414
|
+
.token-section{{background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:16px;margin-bottom:24px}}
|
|
415
|
+
.section-title{{font-size:15px;font-weight:700;margin-bottom:14px}}
|
|
416
|
+
.delta-row{{margin-top:10px;font-size:13px;color:var(--muted);padding:8px;background:var(--surface2);border-radius:4px}}
|
|
417
|
+
.warn-row{{margin-top:6px;font-size:12px;color:var(--yellow);padding:6px 8px;background:rgba(234,179,8,.08);border-radius:4px}}
|
|
418
|
+
.rec-item{{background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:12px 16px;margin-bottom:10px}}
|
|
419
|
+
.rec-priority{{font-size:10px;font-weight:700;color:var(--muted);letter-spacing:.08em;margin-bottom:3px}}
|
|
420
|
+
.rec-dim{{font-size:12px;font-weight:600;color:var(--accent);margin-bottom:4px}}
|
|
421
|
+
.rec-text{{font-size:13px;line-height:1.5;margin-bottom:4px}}
|
|
422
|
+
.rec-impact{{font-size:11px;color:var(--muted);font-style:italic}}
|
|
423
|
+
.report-pre{{font-family:monospace;font-size:12px;line-height:1.8;white-space:pre-wrap;word-break:break-all;color:var(--text);background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:16px}}
|
|
424
|
+
.no-data{{color:var(--muted);font-size:12px;padding:12px}}
|
|
425
|
+
@media(max-width:700px){{.arms-row,.comp-tables{{grid-template-columns:1fr}}}}
|
|
426
|
+
</style>
|
|
427
|
+
</head>
|
|
428
|
+
<body>
|
|
429
|
+
<div class="topbar">
|
|
430
|
+
<h1>ct-grade <span class="badge">Results Viewer</span></h1>
|
|
431
|
+
<div class="meta">Mode: {esc(mode)} · Run: {esc(str(run_dir))} · Generated: {esc(ts)}</div>
|
|
432
|
+
</div>
|
|
433
|
+
<div class="tabs">
|
|
434
|
+
<button class="tab active" onclick="showTab(event,'pane-results')">Results</button>
|
|
435
|
+
<button class="tab" onclick="showTab(event,'pane-tokens')">Token Economy</button>
|
|
436
|
+
<button class="tab" onclick="showTab(event,'pane-analysis')">Analysis</button>
|
|
437
|
+
<button class="tab" onclick="showTab(event,'pane-report')">Report</button>
|
|
438
|
+
</div>
|
|
439
|
+
<div class="pane active" id="pane-results">
|
|
440
|
+
{slots_html if slots_html else '<div style="color:var(--muted);padding:40px;text-align:center">No run data found. Run setup_run.py and execute the agents first.</div>'}
|
|
441
|
+
</div>
|
|
442
|
+
<div class="pane" id="pane-tokens">
|
|
443
|
+
{token_html if token_html else '<div style="color:var(--muted);padding:40px;text-align:center">No token data. Run: python scripts/token_tracker.py --run-dir <dir></div>'}
|
|
444
|
+
</div>
|
|
445
|
+
<div class="pane" id="pane-analysis">
|
|
446
|
+
{analysis_html if analysis_html else '<div style="color:var(--muted);padding:40px;text-align:center">No analysis.json found. Spawn the analysis-reporter agent first.</div>'}
|
|
447
|
+
</div>
|
|
448
|
+
<div class="pane" id="pane-report">
|
|
449
|
+
{report_html if report_html else '<div style="color:var(--muted);padding:40px;text-align:center">No report.md found. Run: python scripts/generate_report.py --run-dir <dir></div>'}
|
|
450
|
+
</div>
|
|
451
|
+
<script>
|
|
452
|
+
function showTab(evt, paneId) {{
|
|
453
|
+
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
|
|
454
|
+
document.querySelectorAll('.pane').forEach(p => p.classList.remove('active'));
|
|
455
|
+
evt.target.classList.add('active');
|
|
456
|
+
document.getElementById(paneId).classList.add('active');
|
|
457
|
+
}}
|
|
458
|
+
</script>
|
|
459
|
+
</body>
|
|
460
|
+
</html>"""
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def _kill_port(port: int) -> None:
|
|
464
|
+
try:
|
|
465
|
+
result = subprocess.run(["lsof", "-ti", f":{port}"], capture_output=True, text=True, timeout=5)
|
|
466
|
+
for pid_str in result.stdout.strip().split("\n"):
|
|
467
|
+
if pid_str.strip():
|
|
468
|
+
try:
|
|
469
|
+
os.kill(int(pid_str.strip()), signal.SIGTERM)
|
|
470
|
+
except (ProcessLookupError, ValueError):
|
|
471
|
+
pass
|
|
472
|
+
if result.stdout.strip():
|
|
473
|
+
time.sleep(0.5)
|
|
474
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
475
|
+
pass
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
class GradeViewerHandler(BaseHTTPRequestHandler):
|
|
479
|
+
def __init__(self, run_dir, *args, **kwargs):
|
|
480
|
+
self.run_dir = run_dir
|
|
481
|
+
super().__init__(*args, **kwargs)
|
|
482
|
+
|
|
483
|
+
def do_GET(self) -> None:
|
|
484
|
+
if self.path in ("/", "/index.html"):
|
|
485
|
+
# Regenerate on every request — picks up new run data without restart
|
|
486
|
+
html = generate_html(self.run_dir)
|
|
487
|
+
content = html.encode("utf-8")
|
|
488
|
+
self.send_response(200)
|
|
489
|
+
self.send_header("Content-Type", "text/html; charset=utf-8")
|
|
490
|
+
self.send_header("Content-Length", str(len(content)))
|
|
491
|
+
self.end_headers()
|
|
492
|
+
self.wfile.write(content)
|
|
493
|
+
else:
|
|
494
|
+
self.send_error(404)
|
|
495
|
+
|
|
496
|
+
def log_message(self, fmt, *args):
|
|
497
|
+
pass
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def main() -> None:
|
|
501
|
+
parser = argparse.ArgumentParser(description="ct-grade A/B Results Viewer")
|
|
502
|
+
parser.add_argument("--run-dir", required=True, type=Path, help="Path to A/B run directory")
|
|
503
|
+
parser.add_argument("--port", "-p", type=int, default=3119)
|
|
504
|
+
parser.add_argument("--static", "-s", type=Path, default=None, help="Write static HTML, don't serve")
|
|
505
|
+
args = parser.parse_args()
|
|
506
|
+
|
|
507
|
+
run_dir = args.run_dir.resolve()
|
|
508
|
+
if not run_dir.is_dir():
|
|
509
|
+
print(f"ERROR: run-dir not found: {run_dir}", file=sys.stderr)
|
|
510
|
+
sys.exit(1)
|
|
511
|
+
|
|
512
|
+
if args.static:
|
|
513
|
+
html = generate_html(run_dir)
|
|
514
|
+
args.static.parent.mkdir(parents=True, exist_ok=True)
|
|
515
|
+
args.static.write_text(html)
|
|
516
|
+
print(f"\n Grade viewer written: {args.static}\n")
|
|
517
|
+
sys.exit(0)
|
|
518
|
+
|
|
519
|
+
port = args.port
|
|
520
|
+
_kill_port(port)
|
|
521
|
+
handler = partial(GradeViewerHandler, run_dir)
|
|
522
|
+
try:
|
|
523
|
+
server = HTTPServer(("127.0.0.1", port), handler)
|
|
524
|
+
except OSError:
|
|
525
|
+
server = HTTPServer(("127.0.0.1", 0), handler)
|
|
526
|
+
port = server.server_address[1]
|
|
527
|
+
|
|
528
|
+
url = f"http://localhost:{port}"
|
|
529
|
+
print(f"\n ct-grade Results Viewer")
|
|
530
|
+
print(f" ─────────────────────────────────────")
|
|
531
|
+
print(f" URL: {url}")
|
|
532
|
+
print(f" Run dir: {run_dir}")
|
|
533
|
+
print(f"\n Refreshes on browser reload (live data).")
|
|
534
|
+
print(f" Press Ctrl+C to stop.\n")
|
|
535
|
+
webbrowser.open(url)
|
|
536
|
+
try:
|
|
537
|
+
server.serve_forever()
|
|
538
|
+
except KeyboardInterrupt:
|
|
539
|
+
print("\nStopped.")
|
|
540
|
+
server.server_close()
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
if __name__ == "__main__":
|
|
544
|
+
main()
|