@cleocode/skills 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dispatch-config.json +404 -0
- package/index.d.ts +178 -0
- package/index.js +405 -0
- package/package.json +14 -0
- package/profiles/core.json +7 -0
- package/profiles/full.json +10 -0
- package/profiles/minimal.json +7 -0
- package/profiles/recommended.json +7 -0
- package/provider-skills-map.json +97 -0
- package/skills/_shared/cleo-style-guide.md +84 -0
- package/skills/_shared/manifest-operations.md +810 -0
- package/skills/_shared/placeholders.json +433 -0
- package/skills/_shared/skill-chaining-patterns.md +237 -0
- package/skills/_shared/subagent-protocol-base.md +223 -0
- package/skills/_shared/task-system-integration.md +232 -0
- package/skills/_shared/testing-framework-config.md +110 -0
- package/skills/ct-cleo/SKILL.md +490 -0
- package/skills/ct-cleo/references/anti-patterns.md +19 -0
- package/skills/ct-cleo/references/loom-lifecycle.md +136 -0
- package/skills/ct-cleo/references/orchestrator-constraints.md +55 -0
- package/skills/ct-cleo/references/session-protocol.md +162 -0
- package/skills/ct-codebase-mapper/SKILL.md +82 -0
- package/skills/ct-contribution/SKILL.md +521 -0
- package/skills/ct-contribution/templates/contribution-init.json +21 -0
- package/skills/ct-dev-workflow/SKILL.md +423 -0
- package/skills/ct-docs-lookup/SKILL.md +66 -0
- package/skills/ct-docs-review/SKILL.md +175 -0
- package/skills/ct-docs-write/SKILL.md +108 -0
- package/skills/ct-documentor/SKILL.md +231 -0
- package/skills/ct-epic-architect/SKILL.md +305 -0
- package/skills/ct-epic-architect/references/bug-epic-example.md +172 -0
- package/skills/ct-epic-architect/references/commands.md +201 -0
- package/skills/ct-epic-architect/references/feature-epic-example.md +210 -0
- package/skills/ct-epic-architect/references/migration-epic-example.md +244 -0
- package/skills/ct-epic-architect/references/output-format.md +92 -0
- package/skills/ct-epic-architect/references/patterns.md +284 -0
- package/skills/ct-epic-architect/references/refactor-epic-example.md +412 -0
- package/skills/ct-epic-architect/references/research-epic-example.md +226 -0
- package/skills/ct-epic-architect/references/shell-escaping.md +86 -0
- package/skills/ct-epic-architect/references/skill-aware-execution.md +195 -0
- package/skills/ct-grade/SKILL.md +230 -0
- package/skills/ct-grade/agents/analysis-reporter.md +203 -0
- package/skills/ct-grade/agents/blind-comparator.md +157 -0
- package/skills/ct-grade/agents/scenario-runner.md +134 -0
- package/skills/ct-grade/eval-viewer/__pycache__/generate_grade_review.cpython-314.pyc +0 -0
- package/skills/ct-grade/eval-viewer/generate_grade_review.py +1138 -0
- package/skills/ct-grade/eval-viewer/generate_grade_viewer.py +544 -0
- package/skills/ct-grade/eval-viewer/generate_review.py +283 -0
- package/skills/ct-grade/eval-viewer/grade-review.html +1574 -0
- package/skills/ct-grade/eval-viewer/viewer.html +219 -0
- package/skills/ct-grade/evals/evals.json +94 -0
- package/skills/ct-grade/references/ab-test-methodology.md +150 -0
- package/skills/ct-grade/references/domains.md +137 -0
- package/skills/ct-grade/references/grade-spec.md +236 -0
- package/skills/ct-grade/references/scenario-playbook.md +234 -0
- package/skills/ct-grade/references/token-tracking.md +120 -0
- package/skills/ct-grade/scripts/__pycache__/audit_analyzer.cpython-314.pyc +0 -0
- package/skills/ct-grade/scripts/__pycache__/run_ab_test.cpython-314.pyc +0 -0
- package/skills/ct-grade/scripts/__pycache__/run_all.cpython-314.pyc +0 -0
- package/skills/ct-grade/scripts/__pycache__/token_tracker.cpython-314.pyc +0 -0
- package/skills/ct-grade/scripts/audit_analyzer.py +279 -0
- package/skills/ct-grade/scripts/generate_report.py +283 -0
- package/skills/ct-grade/scripts/run_ab_test.py +504 -0
- package/skills/ct-grade/scripts/run_all.py +287 -0
- package/skills/ct-grade/scripts/setup_run.py +183 -0
- package/skills/ct-grade/scripts/token_tracker.py +630 -0
- package/skills/ct-grade-v2-1/SKILL.md +237 -0
- package/skills/ct-grade-v2-1/agents/analysis-reporter.md +203 -0
- package/skills/ct-grade-v2-1/agents/blind-comparator.md +157 -0
- package/skills/ct-grade-v2-1/agents/scenario-runner.md +179 -0
- package/skills/ct-grade-v2-1/evals/evals.json +74 -0
- package/skills/ct-grade-v2-1/grade-viewer/__pycache__/build_op_stats.cpython-314.pyc +0 -0
- package/skills/ct-grade-v2-1/grade-viewer/__pycache__/generate_grade_review.cpython-314.pyc +0 -0
- package/skills/ct-grade-v2-1/grade-viewer/build_op_stats.py +174 -0
- package/skills/ct-grade-v2-1/grade-viewer/eval-analysis.json +41 -0
- package/skills/ct-grade-v2-1/grade-viewer/eval-report.md +34 -0
- package/skills/ct-grade-v2-1/grade-viewer/generate_grade_review.py +1023 -0
- package/skills/ct-grade-v2-1/grade-viewer/generate_grade_viewer.py +548 -0
- package/skills/ct-grade-v2-1/grade-viewer/grade-review-eval.html +613 -0
- package/skills/ct-grade-v2-1/grade-viewer/grade-review.html +1532 -0
- package/skills/ct-grade-v2-1/grade-viewer/viewer.html +620 -0
- package/skills/ct-grade-v2-1/manifest-entry.json +31 -0
- package/skills/ct-grade-v2-1/references/ab-testing.md +233 -0
- package/skills/ct-grade-v2-1/references/domains-ssot.md +156 -0
- package/skills/ct-grade-v2-1/references/grade-spec-v2.md +167 -0
- package/skills/ct-grade-v2-1/references/playbook-v2.md +393 -0
- package/skills/ct-grade-v2-1/references/token-tracking.md +202 -0
- package/skills/ct-grade-v2-1/scripts/generate_report.py +419 -0
- package/skills/ct-grade-v2-1/scripts/run_ab_test.py +493 -0
- package/skills/ct-grade-v2-1/scripts/run_scenario.py +396 -0
- package/skills/ct-grade-v2-1/scripts/setup_run.py +207 -0
- package/skills/ct-grade-v2-1/scripts/token_tracker.py +175 -0
- package/skills/ct-memory/SKILL.md +84 -0
- package/skills/ct-orchestrator/INSTALL.md +61 -0
- package/skills/ct-orchestrator/README.md +69 -0
- package/skills/ct-orchestrator/SKILL.md +380 -0
- package/skills/ct-orchestrator/manifest-entry.json +19 -0
- package/skills/ct-orchestrator/orchestrator-prompt.txt +17 -0
- package/skills/ct-orchestrator/references/SUBAGENT-PROTOCOL-BLOCK.md +66 -0
- package/skills/ct-orchestrator/references/autonomous-operation.md +167 -0
- package/skills/ct-orchestrator/references/lifecycle-gates.md +98 -0
- package/skills/ct-orchestrator/references/orchestrator-compliance.md +271 -0
- package/skills/ct-orchestrator/references/orchestrator-handoffs.md +85 -0
- package/skills/ct-orchestrator/references/orchestrator-patterns.md +164 -0
- package/skills/ct-orchestrator/references/orchestrator-recovery.md +113 -0
- package/skills/ct-orchestrator/references/orchestrator-spawning.md +271 -0
- package/skills/ct-orchestrator/references/orchestrator-tokens.md +180 -0
- package/skills/ct-research-agent/SKILL.md +226 -0
- package/skills/ct-skill-creator/.cleo/.context-state.json +13 -0
- package/skills/ct-skill-creator/.cleo/logs/cleo.2026-03-07.1.log +24 -0
- package/skills/ct-skill-creator/.cleo/tasks.db +0 -0
- package/skills/ct-skill-creator/SKILL.md +356 -0
- package/skills/ct-skill-creator/agents/analyzer.md +276 -0
- package/skills/ct-skill-creator/agents/comparator.md +204 -0
- package/skills/ct-skill-creator/agents/grader.md +225 -0
- package/skills/ct-skill-creator/assets/eval_review.html +146 -0
- package/skills/ct-skill-creator/eval-viewer/__pycache__/generate_review.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/eval-viewer/generate_review.py +471 -0
- package/skills/ct-skill-creator/eval-viewer/viewer.html +1325 -0
- package/skills/ct-skill-creator/manifest-entry.json +17 -0
- package/skills/ct-skill-creator/references/dynamic-context.md +228 -0
- package/skills/ct-skill-creator/references/frontmatter.md +83 -0
- package/skills/ct-skill-creator/references/invocation-control.md +165 -0
- package/skills/ct-skill-creator/references/output-patterns.md +86 -0
- package/skills/ct-skill-creator/references/provider-deployment.md +175 -0
- package/skills/ct-skill-creator/references/schemas.md +430 -0
- package/skills/ct-skill-creator/references/workflows.md +28 -0
- package/skills/ct-skill-creator/scripts/__init__.py +1 -0
- package/skills/ct-skill-creator/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/aggregate_benchmark.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/generate_report.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/improve_description.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/init_skill.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/quick_validate.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/run_eval.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/run_loop.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/utils.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/skills/ct-skill-creator/scripts/generate_report.py +326 -0
- package/skills/ct-skill-creator/scripts/improve_description.py +247 -0
- package/skills/ct-skill-creator/scripts/init_skill.py +306 -0
- package/skills/ct-skill-creator/scripts/package_skill.py +110 -0
- package/skills/ct-skill-creator/scripts/quick_validate.py +97 -0
- package/skills/ct-skill-creator/scripts/run_eval.py +310 -0
- package/skills/ct-skill-creator/scripts/run_loop.py +328 -0
- package/skills/ct-skill-creator/scripts/utils.py +47 -0
- package/skills/ct-skill-validator/SKILL.md +178 -0
- package/skills/ct-skill-validator/agents/ecosystem-checker.md +151 -0
- package/skills/ct-skill-validator/assets/valid-skill-example.md +13 -0
- package/skills/ct-skill-validator/evals/eval_set.json +14 -0
- package/skills/ct-skill-validator/evals/evals.json +52 -0
- package/skills/ct-skill-validator/manifest-entry.json +20 -0
- package/skills/ct-skill-validator/references/cleo-ecosystem-rules.md +163 -0
- package/skills/ct-skill-validator/references/validation-rules.md +168 -0
- package/skills/ct-skill-validator/scripts/__init__.py +0 -0
- package/skills/ct-skill-validator/scripts/__pycache__/audit_body.cpython-314.pyc +0 -0
- package/skills/ct-skill-validator/scripts/__pycache__/check_ecosystem.cpython-314.pyc +0 -0
- package/skills/ct-skill-validator/scripts/__pycache__/generate_validation_report.cpython-314.pyc +0 -0
- package/skills/ct-skill-validator/scripts/__pycache__/validate.cpython-314.pyc +0 -0
- package/skills/ct-skill-validator/scripts/audit_body.py +242 -0
- package/skills/ct-skill-validator/scripts/check_ecosystem.py +169 -0
- package/skills/ct-skill-validator/scripts/check_manifest.py +172 -0
- package/skills/ct-skill-validator/scripts/generate_validation_report.py +442 -0
- package/skills/ct-skill-validator/scripts/validate.py +422 -0
- package/skills/ct-spec-writer/SKILL.md +189 -0
- package/skills/ct-stickynote/README.md +14 -0
- package/skills/ct-stickynote/SKILL.md +46 -0
- package/skills/ct-task-executor/SKILL.md +296 -0
- package/skills/ct-validator/SKILL.md +216 -0
- package/skills/manifest.json +469 -0
- package/skills.json +281 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
generate_review.py — Serve an interactive eval review for ct-grade.
|
|
4
|
+
|
|
5
|
+
Reads eval run outputs from a workspace directory, embeds all data into
|
|
6
|
+
the viewer.html template, and serves it at localhost:3118.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
# Serve (live-reloading on refresh):
|
|
10
|
+
python eval-viewer/generate_review.py <workspace-path> [--port 3118]
|
|
11
|
+
|
|
12
|
+
# Write static HTML file instead:
|
|
13
|
+
python eval-viewer/generate_review.py <workspace-path> --static output.html
|
|
14
|
+
|
|
15
|
+
# Include benchmark data:
|
|
16
|
+
python eval-viewer/generate_review.py <workspace-path> --benchmark benchmark.json
|
|
17
|
+
|
|
18
|
+
No external dependencies — stdlib only.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import json
|
|
23
|
+
import os
|
|
24
|
+
import re
|
|
25
|
+
import signal
|
|
26
|
+
import subprocess
|
|
27
|
+
import sys
|
|
28
|
+
import time
|
|
29
|
+
import webbrowser
|
|
30
|
+
from functools import partial
|
|
31
|
+
from http.server import HTTPServer, BaseHTTPRequestHandler
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
TEXT_EXTENSIONS = {
|
|
36
|
+
".txt", ".md", ".json", ".jsonl", ".csv", ".py", ".ts", ".js",
|
|
37
|
+
".yaml", ".yml", ".sh", ".html", ".css",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
METADATA_FILES = {"transcript.md", "user_notes.md", "metrics.json"}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def find_runs(workspace: Path) -> list[dict]:
|
|
44
|
+
"""Find eval run dirs — directories with an outputs/ subdir."""
|
|
45
|
+
runs = []
|
|
46
|
+
_find_recursive(workspace, workspace, runs)
|
|
47
|
+
runs.sort(key=lambda r: (r.get("eval_id") or float("inf"), r["id"]))
|
|
48
|
+
return runs
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _find_recursive(root: Path, current: Path, runs: list) -> None:
|
|
52
|
+
if not current.is_dir():
|
|
53
|
+
return
|
|
54
|
+
skip = {"node_modules", ".git", "__pycache__", "eval-viewer", "assets", "scripts"}
|
|
55
|
+
outputs_dir = current / "outputs"
|
|
56
|
+
if outputs_dir.is_dir():
|
|
57
|
+
run = _build_run(root, current)
|
|
58
|
+
if run:
|
|
59
|
+
runs.append(run)
|
|
60
|
+
return
|
|
61
|
+
for child in sorted(current.iterdir()):
|
|
62
|
+
if child.is_dir() and child.name not in skip:
|
|
63
|
+
_find_recursive(root, child, runs)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _build_run(root: Path, run_dir: Path) -> dict | None:
|
|
67
|
+
prompt = ""
|
|
68
|
+
eval_id = None
|
|
69
|
+
|
|
70
|
+
for candidate in [run_dir / "eval_metadata.json", run_dir.parent / "eval_metadata.json"]:
|
|
71
|
+
if candidate.exists():
|
|
72
|
+
try:
|
|
73
|
+
meta = json.loads(candidate.read_text())
|
|
74
|
+
prompt = meta.get("prompt", "")
|
|
75
|
+
eval_id = meta.get("eval_id")
|
|
76
|
+
except Exception:
|
|
77
|
+
pass
|
|
78
|
+
if prompt:
|
|
79
|
+
break
|
|
80
|
+
|
|
81
|
+
if not prompt:
|
|
82
|
+
for candidate in [run_dir / "transcript.md", run_dir / "outputs" / "transcript.md"]:
|
|
83
|
+
if candidate.exists():
|
|
84
|
+
try:
|
|
85
|
+
text = candidate.read_text()
|
|
86
|
+
m = re.search(r"## Eval Prompt\n\n([\s\S]*?)(?=\n##|$)", text)
|
|
87
|
+
if m:
|
|
88
|
+
prompt = m.group(1).strip()
|
|
89
|
+
except Exception:
|
|
90
|
+
pass
|
|
91
|
+
if prompt:
|
|
92
|
+
break
|
|
93
|
+
|
|
94
|
+
prompt = prompt or "(No prompt found)"
|
|
95
|
+
run_id = str(run_dir.relative_to(root)).replace("/", "-").replace("\\", "-")
|
|
96
|
+
|
|
97
|
+
outputs_dir = run_dir / "outputs"
|
|
98
|
+
output_files = []
|
|
99
|
+
if outputs_dir.is_dir():
|
|
100
|
+
for f in sorted(outputs_dir.iterdir()):
|
|
101
|
+
if f.is_file() and f.name not in METADATA_FILES:
|
|
102
|
+
output_files.append(_embed_file(f))
|
|
103
|
+
|
|
104
|
+
grading = None
|
|
105
|
+
for candidate in [run_dir / "grading.json", run_dir.parent / "grading.json"]:
|
|
106
|
+
if candidate.exists():
|
|
107
|
+
try:
|
|
108
|
+
grading = json.loads(candidate.read_text())
|
|
109
|
+
except Exception:
|
|
110
|
+
pass
|
|
111
|
+
if grading:
|
|
112
|
+
break
|
|
113
|
+
|
|
114
|
+
return {
|
|
115
|
+
"id": run_id,
|
|
116
|
+
"prompt": prompt,
|
|
117
|
+
"eval_id": eval_id,
|
|
118
|
+
"outputs": output_files,
|
|
119
|
+
"grading": grading,
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _embed_file(path: Path) -> dict:
|
|
124
|
+
ext = path.suffix.lower()
|
|
125
|
+
if ext in TEXT_EXTENSIONS:
|
|
126
|
+
try:
|
|
127
|
+
content = path.read_text(errors="replace")
|
|
128
|
+
except OSError:
|
|
129
|
+
content = "(Error reading file)"
|
|
130
|
+
return {"name": path.name, "type": "text", "content": content}
|
|
131
|
+
else:
|
|
132
|
+
import base64
|
|
133
|
+
try:
|
|
134
|
+
raw = path.read_bytes()
|
|
135
|
+
b64 = base64.b64encode(raw).decode("ascii")
|
|
136
|
+
except OSError:
|
|
137
|
+
return {"name": path.name, "type": "error", "content": "(Error reading file)"}
|
|
138
|
+
return {"name": path.name, "type": "binary", "data_b64": b64}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _generate_html(runs: list[dict], skill_name: str, benchmark: dict | None = None) -> str:
|
|
142
|
+
template_path = Path(__file__).parent / "viewer.html"
|
|
143
|
+
template = template_path.read_text()
|
|
144
|
+
embedded = {"skill_name": skill_name, "runs": runs, "previous_feedback": {}, "previous_outputs": {}}
|
|
145
|
+
if benchmark:
|
|
146
|
+
embedded["benchmark"] = benchmark
|
|
147
|
+
data_json = json.dumps(embedded)
|
|
148
|
+
return template.replace("/*__EMBEDDED_DATA__*/", f"const EMBEDDED_DATA = {data_json};")
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _kill_port(port: int) -> None:
|
|
152
|
+
try:
|
|
153
|
+
result = subprocess.run(["lsof", "-ti", f":{port}"], capture_output=True, text=True, timeout=5)
|
|
154
|
+
for pid_str in result.stdout.strip().split("\n"):
|
|
155
|
+
if pid_str.strip():
|
|
156
|
+
try:
|
|
157
|
+
os.kill(int(pid_str.strip()), signal.SIGTERM)
|
|
158
|
+
except (ProcessLookupError, ValueError):
|
|
159
|
+
pass
|
|
160
|
+
if result.stdout.strip():
|
|
161
|
+
time.sleep(0.5)
|
|
162
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
163
|
+
pass
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class Handler(BaseHTTPRequestHandler):
|
|
167
|
+
def __init__(self, workspace, skill_name, feedback_path, benchmark_path, *args, **kwargs):
|
|
168
|
+
self.workspace = workspace
|
|
169
|
+
self.skill_name = skill_name
|
|
170
|
+
self.feedback_path = feedback_path
|
|
171
|
+
self.benchmark_path = benchmark_path
|
|
172
|
+
super().__init__(*args, **kwargs)
|
|
173
|
+
|
|
174
|
+
def do_GET(self) -> None:
|
|
175
|
+
if self.path in ("/", "/index.html"):
|
|
176
|
+
runs = find_runs(self.workspace)
|
|
177
|
+
benchmark = None
|
|
178
|
+
if self.benchmark_path and self.benchmark_path.exists():
|
|
179
|
+
try:
|
|
180
|
+
benchmark = json.loads(self.benchmark_path.read_text())
|
|
181
|
+
except Exception:
|
|
182
|
+
pass
|
|
183
|
+
html = _generate_html(runs, self.skill_name, benchmark)
|
|
184
|
+
content = html.encode("utf-8")
|
|
185
|
+
self.send_response(200)
|
|
186
|
+
self.send_header("Content-Type", "text/html; charset=utf-8")
|
|
187
|
+
self.send_header("Content-Length", str(len(content)))
|
|
188
|
+
self.end_headers()
|
|
189
|
+
self.wfile.write(content)
|
|
190
|
+
elif self.path == "/api/feedback":
|
|
191
|
+
data = self.feedback_path.read_bytes() if self.feedback_path.exists() else b"{}"
|
|
192
|
+
self.send_response(200)
|
|
193
|
+
self.send_header("Content-Type", "application/json")
|
|
194
|
+
self.send_header("Content-Length", str(len(data)))
|
|
195
|
+
self.end_headers()
|
|
196
|
+
self.wfile.write(data)
|
|
197
|
+
else:
|
|
198
|
+
self.send_error(404)
|
|
199
|
+
|
|
200
|
+
def do_POST(self) -> None:
|
|
201
|
+
if self.path == "/api/feedback":
|
|
202
|
+
length = int(self.headers.get("Content-Length", 0))
|
|
203
|
+
body = self.rfile.read(length)
|
|
204
|
+
try:
|
|
205
|
+
data = json.loads(body)
|
|
206
|
+
self.feedback_path.write_text(json.dumps(data, indent=2) + "\n")
|
|
207
|
+
resp = b'{"ok":true}'
|
|
208
|
+
self.send_response(200)
|
|
209
|
+
except Exception as e:
|
|
210
|
+
resp = json.dumps({"error": str(e)}).encode()
|
|
211
|
+
self.send_response(500)
|
|
212
|
+
self.send_header("Content-Type", "application/json")
|
|
213
|
+
self.send_header("Content-Length", str(len(resp)))
|
|
214
|
+
self.end_headers()
|
|
215
|
+
self.wfile.write(resp)
|
|
216
|
+
else:
|
|
217
|
+
self.send_error(404)
|
|
218
|
+
|
|
219
|
+
def log_message(self, fmt, *args):
|
|
220
|
+
pass
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def main() -> None:
|
|
224
|
+
parser = argparse.ArgumentParser(description="ct-grade eval review viewer")
|
|
225
|
+
parser.add_argument("workspace", type=Path, help="Workspace directory with eval runs")
|
|
226
|
+
parser.add_argument("--port", "-p", type=int, default=3118)
|
|
227
|
+
parser.add_argument("--skill-name", "-n", default="ct-grade")
|
|
228
|
+
parser.add_argument("--benchmark", type=Path, default=None)
|
|
229
|
+
parser.add_argument("--static", "-s", type=Path, default=None, help="Write static HTML, don't serve")
|
|
230
|
+
args = parser.parse_args()
|
|
231
|
+
|
|
232
|
+
workspace = args.workspace.resolve()
|
|
233
|
+
if not workspace.is_dir():
|
|
234
|
+
print(f"Error: {workspace} is not a directory", file=sys.stderr)
|
|
235
|
+
sys.exit(1)
|
|
236
|
+
|
|
237
|
+
runs = find_runs(workspace)
|
|
238
|
+
if not runs:
|
|
239
|
+
print(f"No eval runs found in {workspace}", file=sys.stderr)
|
|
240
|
+
print("Runs need an outputs/ subdirectory with result files.", file=sys.stderr)
|
|
241
|
+
sys.exit(1)
|
|
242
|
+
|
|
243
|
+
benchmark = None
|
|
244
|
+
if args.benchmark and args.benchmark.exists():
|
|
245
|
+
try:
|
|
246
|
+
benchmark = json.loads(args.benchmark.read_text())
|
|
247
|
+
except Exception:
|
|
248
|
+
pass
|
|
249
|
+
|
|
250
|
+
if args.static:
|
|
251
|
+
html = _generate_html(runs, args.skill_name, benchmark)
|
|
252
|
+
args.static.parent.mkdir(parents=True, exist_ok=True)
|
|
253
|
+
args.static.write_text(html)
|
|
254
|
+
print(f"\n Static viewer: {args.static}\n")
|
|
255
|
+
sys.exit(0)
|
|
256
|
+
|
|
257
|
+
port = args.port
|
|
258
|
+
_kill_port(port)
|
|
259
|
+
feedback_path = workspace / "feedback.json"
|
|
260
|
+
handler = partial(Handler, workspace, args.skill_name, feedback_path, args.benchmark)
|
|
261
|
+
try:
|
|
262
|
+
server = HTTPServer(("127.0.0.1", port), handler)
|
|
263
|
+
except OSError:
|
|
264
|
+
server = HTTPServer(("127.0.0.1", 0), handler)
|
|
265
|
+
port = server.server_address[1]
|
|
266
|
+
|
|
267
|
+
url = f"http://localhost:{port}"
|
|
268
|
+
print(f"\n ct-grade Eval Viewer")
|
|
269
|
+
print(f" ───────────────────────────")
|
|
270
|
+
print(f" URL: {url}")
|
|
271
|
+
print(f" Workspace: {workspace}")
|
|
272
|
+
print(f" Runs: {len(runs)} found")
|
|
273
|
+
print(f"\n Press Ctrl+C to stop.\n")
|
|
274
|
+
webbrowser.open(url)
|
|
275
|
+
try:
|
|
276
|
+
server.serve_forever()
|
|
277
|
+
except KeyboardInterrupt:
|
|
278
|
+
print("\nStopped.")
|
|
279
|
+
server.server_close()
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
if __name__ == "__main__":
|
|
283
|
+
main()
|