codymaster 4.4.5 → 4.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. package/CHANGELOG.md +33 -0
  2. package/README.md +29 -14
  3. package/commands/demo.md +1 -1
  4. package/dist/context-bus.js +70 -0
  5. package/dist/context-db.js +265 -0
  6. package/dist/continuity.js +12 -0
  7. package/dist/file-watcher.js +79 -0
  8. package/dist/index.js +152 -1
  9. package/dist/l0-indexer.js +158 -0
  10. package/dist/mcp-context-server.js +400 -0
  11. package/dist/migrate-json-to-sqlite.js +126 -0
  12. package/dist/skill-chain.js +19 -3
  13. package/dist/token-budget.js +108 -0
  14. package/dist/uri-resolver.js +203 -0
  15. package/package.json +5 -1
  16. package/scripts/gate-0-secrets.js +63 -0
  17. package/scripts/gate-1-syntax.js +53 -0
  18. package/scripts/gate-5-dist-verify.js +55 -0
  19. package/scripts/gate-6-smoke-test.js +30 -0
  20. package/scripts/index-codebase.sh +552 -0
  21. package/scripts/mcp-bridge.js +284 -0
  22. package/scripts/postinstall.js +301 -0
  23. package/scripts/security-fixer.js +143 -0
  24. package/scripts/security-scan.js +55 -0
  25. package/scripts/test-gemini.js +13 -0
  26. package/scripts/todo-bridge.js +112 -0
  27. package/skills/_shared/helpers.md +50 -14
  28. package/skills/cm-autopilot/SKILL.md +29 -0
  29. package/skills/cm-autopilot/scripts/autopilot.py +190 -0
  30. package/skills/cm-continuity/SKILL.md +90 -28
  31. package/skills/cm-skill-chain/SKILL.md +47 -1
  32. package/skills/cm-start/SKILL.md +11 -2
  33. package/skills/boxme-git-config/SKILL.md +0 -56
  34. package/skills/boxme-local-dev/SKILL.md +0 -66
  35. package/skills/jobs-to-be-done/SKILL.md +0 -266
  36. package/skills/jobs-to-be-done/references/case-studies.md +0 -154
  37. package/skills/jobs-to-be-done/references/competitive-strategy.md +0 -280
  38. package/skills/jobs-to-be-done/references/diagnostics.md +0 -158
  39. package/skills/jobs-to-be-done/references/innovation-process.md +0 -392
  40. package/skills/jobs-to-be-done/references/organizational-change.md +0 -328
  41. package/skills/marketplace-report-crawler/SKILL.md +0 -176
  42. package/skills/marketplace-report-crawler/config/accounts.json +0 -41
  43. package/skills/marketplace-report-crawler/config/report-types.json +0 -422
  44. package/skills/marketplace-report-crawler/config/sessions.json +0 -3
  45. package/skills/marketplace-report-crawler/scripts/ab-wrapper.sh +0 -102
  46. package/skills/marketplace-report-crawler/scripts/browser-actions/lazada/lazada-actions.js +0 -114
  47. package/skills/marketplace-report-crawler/scripts/browser-actions/shopee/shopee-actions.js +0 -94
  48. package/skills/marketplace-report-crawler/scripts/browser-actions/tiktok/tiktok-actions.js +0 -272
  49. package/skills/marketplace-report-crawler/scripts/crawl-runner.js +0 -281
  50. package/skills/marketplace-report-crawler/scripts/session-check.sh +0 -72
  51. package/skills/marketplace-report-crawler/scripts/session-manager.sh +0 -349
  52. package/skills/marketplace-report-crawler/scripts/setup-folders.sh +0 -83
  53. package/skills/medical-research/SKILL.md +0 -194
  54. package/skills/medical-research/scripts/evidence_checker.py +0 -288
  55. package/skills/mom-test/SKILL.md +0 -267
  56. package/skills/mom-test/references/avoiding-bad-data.md +0 -221
  57. package/skills/mom-test/references/case-studies.md +0 -306
  58. package/skills/mom-test/references/commitment-advancement.md +0 -219
  59. package/skills/mom-test/references/finding-conversations.md +0 -251
  60. package/skills/mom-test/references/processing-learning.md +0 -256
  61. package/skills/mom-test/references/question-patterns.md +0 -198
  62. package/skills/pandasai-analytics/SKILL.md +0 -251
  63. package/skills/release-it/SKILL.md +0 -235
  64. package/skills/release-it/references/anti-patterns.md +0 -279
  65. package/skills/release-it/references/capacity-planning.md +0 -285
  66. package/skills/release-it/references/chaos-engineering.md +0 -325
  67. package/skills/release-it/references/deployment-strategies.md +0 -331
  68. package/skills/release-it/references/observability.md +0 -301
  69. package/skills/release-it/references/stability-patterns.md +0 -355
  70. package/skills/skill-creator-ultra/.agents/workflows/skill-audit.md +0 -37
  71. package/skills/skill-creator-ultra/.agents/workflows/skill-compare.md +0 -34
  72. package/skills/skill-creator-ultra/.agents/workflows/skill-export.md +0 -51
  73. package/skills/skill-creator-ultra/.agents/workflows/skill-generate.md +0 -39
  74. package/skills/skill-creator-ultra/.agents/workflows/skill-scaffold.md +0 -52
  75. package/skills/skill-creator-ultra/.agents/workflows/skill-simulate.md +0 -25
  76. package/skills/skill-creator-ultra/.agents/workflows/skill-stats.md +0 -31
  77. package/skills/skill-creator-ultra/.agents/workflows/skill-validate.md +0 -25
  78. package/skills/skill-creator-ultra/README.md +0 -1242
  79. package/skills/skill-creator-ultra/SKILL.md +0 -388
  80. package/skills/skill-creator-ultra/agents/analyzer.md +0 -274
  81. package/skills/skill-creator-ultra/agents/comparator.md +0 -202
  82. package/skills/skill-creator-ultra/agents/grader.md +0 -223
  83. package/skills/skill-creator-ultra/assets/eval_review.html +0 -146
  84. package/skills/skill-creator-ultra/eval-viewer/generate_review.py +0 -471
  85. package/skills/skill-creator-ultra/eval-viewer/viewer.html +0 -1325
  86. package/skills/skill-creator-ultra/examples/example_anthropic_frontend.md +0 -109
  87. package/skills/skill-creator-ultra/examples/example_anthropic_pdf.md +0 -116
  88. package/skills/skill-creator-ultra/examples/example_api_docs.md +0 -189
  89. package/skills/skill-creator-ultra/examples/example_db_migration.md +0 -253
  90. package/skills/skill-creator-ultra/examples/example_git_commit.md +0 -111
  91. package/skills/skill-creator-ultra/install.ps1 +0 -289
  92. package/skills/skill-creator-ultra/install.sh +0 -313
  93. package/skills/skill-creator-ultra/phases/phase1_interview.md +0 -202
  94. package/skills/skill-creator-ultra/phases/phase2_extract.md +0 -55
  95. package/skills/skill-creator-ultra/phases/phase3_detect.md +0 -57
  96. package/skills/skill-creator-ultra/phases/phase4_generate.md +0 -543
  97. package/skills/skill-creator-ultra/phases/phase5_test.md +0 -319
  98. package/skills/skill-creator-ultra/phases/phase6_eval.md +0 -301
  99. package/skills/skill-creator-ultra/phases/phase7_iterate.md +0 -103
  100. package/skills/skill-creator-ultra/phases/phase8_optimize.md +0 -113
  101. package/skills/skill-creator-ultra/resources/advanced_patterns.md +0 -499
  102. package/skills/skill-creator-ultra/resources/anti_patterns.md +0 -376
  103. package/skills/skill-creator-ultra/resources/blueprints.md +0 -498
  104. package/skills/skill-creator-ultra/resources/checklist.md +0 -243
  105. package/skills/skill-creator-ultra/resources/composition_cookbook.md +0 -291
  106. package/skills/skill-creator-ultra/resources/description_optimization.md +0 -90
  107. package/skills/skill-creator-ultra/resources/eval_guide.md +0 -133
  108. package/skills/skill-creator-ultra/resources/industry_questions.md +0 -189
  109. package/skills/skill-creator-ultra/resources/interview_questions.md +0 -200
  110. package/skills/skill-creator-ultra/resources/pattern_detection.md +0 -200
  111. package/skills/skill-creator-ultra/resources/prompt_engineering.md +0 -531
  112. package/skills/skill-creator-ultra/resources/schemas.md +0 -430
  113. package/skills/skill-creator-ultra/resources/script_integration.md +0 -593
  114. package/skills/skill-creator-ultra/resources/scripts_guide.md +0 -339
  115. package/skills/skill-creator-ultra/resources/skill_template.md +0 -124
  116. package/skills/skill-creator-ultra/resources/skill_writing_guide.md +0 -634
  117. package/skills/skill-creator-ultra/resources/versioning_guide.md +0 -193
  118. package/skills/skill-creator-ultra/scripts/ci_eval.py +0 -200
  119. package/skills/skill-creator-ultra/scripts/package_skill.py +0 -165
  120. package/skills/skill-creator-ultra/scripts/simulate_skill.py +0 -398
  121. package/skills/skill-creator-ultra/scripts/skill_audit.py +0 -611
  122. package/skills/skill-creator-ultra/scripts/skill_compare.py +0 -265
  123. package/skills/skill-creator-ultra/scripts/skill_export.py +0 -334
  124. package/skills/skill-creator-ultra/scripts/skill_scaffold.py +0 -403
  125. package/skills/skill-creator-ultra/scripts/skill_stats.py +0 -339
  126. package/skills/skill-creator-ultra/scripts/validate_skill.py +0 -411
  127. package/skills/tailwind-mastery/SKILL.md +0 -229
  128. package/skills/vercel-react-best-practices/AGENTS.md +0 -3373
  129. package/skills/vercel-react-best-practices/README.md +0 -123
  130. package/skills/vercel-react-best-practices/SKILL.md +0 -143
  131. package/skills/vercel-react-best-practices/rules/_sections.md +0 -46
  132. package/skills/vercel-react-best-practices/rules/_template.md +0 -28
  133. package/skills/vercel-react-best-practices/rules/advanced-event-handler-refs.md +0 -55
  134. package/skills/vercel-react-best-practices/rules/advanced-init-once.md +0 -42
  135. package/skills/vercel-react-best-practices/rules/advanced-use-latest.md +0 -39
  136. package/skills/vercel-react-best-practices/rules/async-api-routes.md +0 -38
  137. package/skills/vercel-react-best-practices/rules/async-defer-await.md +0 -80
  138. package/skills/vercel-react-best-practices/rules/async-dependencies.md +0 -51
  139. package/skills/vercel-react-best-practices/rules/async-parallel.md +0 -28
  140. package/skills/vercel-react-best-practices/rules/async-suspense-boundaries.md +0 -99
  141. package/skills/vercel-react-best-practices/rules/bundle-barrel-imports.md +0 -59
  142. package/skills/vercel-react-best-practices/rules/bundle-conditional.md +0 -31
  143. package/skills/vercel-react-best-practices/rules/bundle-defer-third-party.md +0 -49
  144. package/skills/vercel-react-best-practices/rules/bundle-dynamic-imports.md +0 -35
  145. package/skills/vercel-react-best-practices/rules/bundle-preload.md +0 -50
  146. package/skills/vercel-react-best-practices/rules/client-event-listeners.md +0 -74
  147. package/skills/vercel-react-best-practices/rules/client-localstorage-schema.md +0 -71
  148. package/skills/vercel-react-best-practices/rules/client-passive-event-listeners.md +0 -48
  149. package/skills/vercel-react-best-practices/rules/client-swr-dedup.md +0 -56
  150. package/skills/vercel-react-best-practices/rules/js-batch-dom-css.md +0 -107
  151. package/skills/vercel-react-best-practices/rules/js-cache-function-results.md +0 -80
  152. package/skills/vercel-react-best-practices/rules/js-cache-property-access.md +0 -28
  153. package/skills/vercel-react-best-practices/rules/js-cache-storage.md +0 -70
  154. package/skills/vercel-react-best-practices/rules/js-combine-iterations.md +0 -32
  155. package/skills/vercel-react-best-practices/rules/js-early-exit.md +0 -50
  156. package/skills/vercel-react-best-practices/rules/js-flatmap-filter.md +0 -60
  157. package/skills/vercel-react-best-practices/rules/js-hoist-regexp.md +0 -45
  158. package/skills/vercel-react-best-practices/rules/js-index-maps.md +0 -37
  159. package/skills/vercel-react-best-practices/rules/js-length-check-first.md +0 -49
  160. package/skills/vercel-react-best-practices/rules/js-min-max-loop.md +0 -82
  161. package/skills/vercel-react-best-practices/rules/js-set-map-lookups.md +0 -24
  162. package/skills/vercel-react-best-practices/rules/js-tosorted-immutable.md +0 -57
  163. package/skills/vercel-react-best-practices/rules/rendering-activity.md +0 -26
  164. package/skills/vercel-react-best-practices/rules/rendering-animate-svg-wrapper.md +0 -47
  165. package/skills/vercel-react-best-practices/rules/rendering-conditional-render.md +0 -40
  166. package/skills/vercel-react-best-practices/rules/rendering-content-visibility.md +0 -38
  167. package/skills/vercel-react-best-practices/rules/rendering-hoist-jsx.md +0 -46
  168. package/skills/vercel-react-best-practices/rules/rendering-hydration-no-flicker.md +0 -82
  169. package/skills/vercel-react-best-practices/rules/rendering-hydration-suppress-warning.md +0 -30
  170. package/skills/vercel-react-best-practices/rules/rendering-resource-hints.md +0 -85
  171. package/skills/vercel-react-best-practices/rules/rendering-script-defer-async.md +0 -68
  172. package/skills/vercel-react-best-practices/rules/rendering-svg-precision.md +0 -28
  173. package/skills/vercel-react-best-practices/rules/rendering-usetransition-loading.md +0 -75
  174. package/skills/vercel-react-best-practices/rules/rerender-defer-reads.md +0 -39
  175. package/skills/vercel-react-best-practices/rules/rerender-dependencies.md +0 -45
  176. package/skills/vercel-react-best-practices/rules/rerender-derived-state-no-effect.md +0 -40
  177. package/skills/vercel-react-best-practices/rules/rerender-derived-state.md +0 -29
  178. package/skills/vercel-react-best-practices/rules/rerender-functional-setstate.md +0 -74
  179. package/skills/vercel-react-best-practices/rules/rerender-lazy-state-init.md +0 -58
  180. package/skills/vercel-react-best-practices/rules/rerender-memo-with-default-value.md +0 -38
  181. package/skills/vercel-react-best-practices/rules/rerender-memo.md +0 -44
  182. package/skills/vercel-react-best-practices/rules/rerender-move-effect-to-event.md +0 -45
  183. package/skills/vercel-react-best-practices/rules/rerender-no-inline-components.md +0 -82
  184. package/skills/vercel-react-best-practices/rules/rerender-simple-expression-in-memo.md +0 -35
  185. package/skills/vercel-react-best-practices/rules/rerender-split-combined-hooks.md +0 -64
  186. package/skills/vercel-react-best-practices/rules/rerender-transitions.md +0 -40
  187. package/skills/vercel-react-best-practices/rules/rerender-use-deferred-value.md +0 -59
  188. package/skills/vercel-react-best-practices/rules/rerender-use-ref-transient-values.md +0 -73
  189. package/skills/vercel-react-best-practices/rules/server-after-nonblocking.md +0 -73
  190. package/skills/vercel-react-best-practices/rules/server-auth-actions.md +0 -96
  191. package/skills/vercel-react-best-practices/rules/server-cache-lru.md +0 -41
  192. package/skills/vercel-react-best-practices/rules/server-cache-react.md +0 -76
  193. package/skills/vercel-react-best-practices/rules/server-dedup-props.md +0 -65
  194. package/skills/vercel-react-best-practices/rules/server-hoist-static-io.md +0 -142
  195. package/skills/vercel-react-best-practices/rules/server-parallel-fetching.md +0 -83
  196. package/skills/vercel-react-best-practices/rules/server-serialization.md +0 -38
  197. package/skills/web-design-guidelines/SKILL.md +0 -39
@@ -1,471 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Generate and serve a review page for eval results.
3
-
4
- Reads the workspace directory, discovers runs (directories with outputs/),
5
- embeds all output data into a self-contained HTML page, and serves it via
6
- a tiny HTTP server. Feedback auto-saves to feedback.json in the workspace.
7
-
8
- Usage:
9
- python generate_review.py <workspace-path> [--port PORT] [--skill-name NAME]
10
- python generate_review.py <workspace-path> --previous-feedback /path/to/old/feedback.json
11
-
12
- No dependencies beyond the Python stdlib are required.
13
- """
14
-
15
- import argparse
16
- import base64
17
- import json
18
- import mimetypes
19
- import os
20
- import re
21
- import signal
22
- import subprocess
23
- import sys
24
- import time
25
- import webbrowser
26
- from functools import partial
27
- from http.server import HTTPServer, BaseHTTPRequestHandler
28
- from pathlib import Path
29
-
30
- # Files to exclude from output listings
31
- METADATA_FILES = {"transcript.md", "user_notes.md", "metrics.json"}
32
-
33
- # Extensions we render as inline text
34
- TEXT_EXTENSIONS = {
35
- ".txt", ".md", ".json", ".csv", ".py", ".js", ".ts", ".tsx", ".jsx",
36
- ".yaml", ".yml", ".xml", ".html", ".css", ".sh", ".rb", ".go", ".rs",
37
- ".java", ".c", ".cpp", ".h", ".hpp", ".sql", ".r", ".toml",
38
- }
39
-
40
- # Extensions we render as inline images
41
- IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp"}
42
-
43
- # MIME type overrides for common types
44
- MIME_OVERRIDES = {
45
- ".svg": "image/svg+xml",
46
- ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
47
- ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
48
- ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
49
- }
50
-
51
-
52
- def get_mime_type(path: Path) -> str:
53
- ext = path.suffix.lower()
54
- if ext in MIME_OVERRIDES:
55
- return MIME_OVERRIDES[ext]
56
- mime, _ = mimetypes.guess_type(str(path))
57
- return mime or "application/octet-stream"
58
-
59
-
60
- def find_runs(workspace: Path) -> list[dict]:
61
- """Recursively find directories that contain an outputs/ subdirectory."""
62
- runs: list[dict] = []
63
- _find_runs_recursive(workspace, workspace, runs)
64
- runs.sort(key=lambda r: (r.get("eval_id", float("inf")), r["id"]))
65
- return runs
66
-
67
-
68
- def _find_runs_recursive(root: Path, current: Path, runs: list[dict]) -> None:
69
- if not current.is_dir():
70
- return
71
-
72
- outputs_dir = current / "outputs"
73
- if outputs_dir.is_dir():
74
- run = build_run(root, current)
75
- if run:
76
- runs.append(run)
77
- return
78
-
79
- skip = {"node_modules", ".git", "__pycache__", "skill", "inputs"}
80
- for child in sorted(current.iterdir()):
81
- if child.is_dir() and child.name not in skip:
82
- _find_runs_recursive(root, child, runs)
83
-
84
-
85
- def build_run(root: Path, run_dir: Path) -> dict | None:
86
- """Build a run dict with prompt, outputs, and grading data."""
87
- prompt = ""
88
- eval_id = None
89
-
90
- # Try eval_metadata.json
91
- for candidate in [run_dir / "eval_metadata.json", run_dir.parent / "eval_metadata.json"]:
92
- if candidate.exists():
93
- try:
94
- metadata = json.loads(candidate.read_text())
95
- prompt = metadata.get("prompt", "")
96
- eval_id = metadata.get("eval_id")
97
- except (json.JSONDecodeError, OSError):
98
- pass
99
- if prompt:
100
- break
101
-
102
- # Fall back to transcript.md
103
- if not prompt:
104
- for candidate in [run_dir / "transcript.md", run_dir / "outputs" / "transcript.md"]:
105
- if candidate.exists():
106
- try:
107
- text = candidate.read_text()
108
- match = re.search(r"## Eval Prompt\n\n([\s\S]*?)(?=\n##|$)", text)
109
- if match:
110
- prompt = match.group(1).strip()
111
- except OSError:
112
- pass
113
- if prompt:
114
- break
115
-
116
- if not prompt:
117
- prompt = "(No prompt found)"
118
-
119
- run_id = str(run_dir.relative_to(root)).replace("/", "-").replace("\\", "-")
120
-
121
- # Collect output files
122
- outputs_dir = run_dir / "outputs"
123
- output_files: list[dict] = []
124
- if outputs_dir.is_dir():
125
- for f in sorted(outputs_dir.iterdir()):
126
- if f.is_file() and f.name not in METADATA_FILES:
127
- output_files.append(embed_file(f))
128
-
129
- # Load grading if present
130
- grading = None
131
- for candidate in [run_dir / "grading.json", run_dir.parent / "grading.json"]:
132
- if candidate.exists():
133
- try:
134
- grading = json.loads(candidate.read_text())
135
- except (json.JSONDecodeError, OSError):
136
- pass
137
- if grading:
138
- break
139
-
140
- return {
141
- "id": run_id,
142
- "prompt": prompt,
143
- "eval_id": eval_id,
144
- "outputs": output_files,
145
- "grading": grading,
146
- }
147
-
148
-
149
- def embed_file(path: Path) -> dict:
150
- """Read a file and return an embedded representation."""
151
- ext = path.suffix.lower()
152
- mime = get_mime_type(path)
153
-
154
- if ext in TEXT_EXTENSIONS:
155
- try:
156
- content = path.read_text(errors="replace")
157
- except OSError:
158
- content = "(Error reading file)"
159
- return {
160
- "name": path.name,
161
- "type": "text",
162
- "content": content,
163
- }
164
- elif ext in IMAGE_EXTENSIONS:
165
- try:
166
- raw = path.read_bytes()
167
- b64 = base64.b64encode(raw).decode("ascii")
168
- except OSError:
169
- return {"name": path.name, "type": "error", "content": "(Error reading file)"}
170
- return {
171
- "name": path.name,
172
- "type": "image",
173
- "mime": mime,
174
- "data_uri": f"data:{mime};base64,{b64}",
175
- }
176
- elif ext == ".pdf":
177
- try:
178
- raw = path.read_bytes()
179
- b64 = base64.b64encode(raw).decode("ascii")
180
- except OSError:
181
- return {"name": path.name, "type": "error", "content": "(Error reading file)"}
182
- return {
183
- "name": path.name,
184
- "type": "pdf",
185
- "data_uri": f"data:{mime};base64,{b64}",
186
- }
187
- elif ext == ".xlsx":
188
- try:
189
- raw = path.read_bytes()
190
- b64 = base64.b64encode(raw).decode("ascii")
191
- except OSError:
192
- return {"name": path.name, "type": "error", "content": "(Error reading file)"}
193
- return {
194
- "name": path.name,
195
- "type": "xlsx",
196
- "data_b64": b64,
197
- }
198
- else:
199
- # Binary / unknown — base64 download link
200
- try:
201
- raw = path.read_bytes()
202
- b64 = base64.b64encode(raw).decode("ascii")
203
- except OSError:
204
- return {"name": path.name, "type": "error", "content": "(Error reading file)"}
205
- return {
206
- "name": path.name,
207
- "type": "binary",
208
- "mime": mime,
209
- "data_uri": f"data:{mime};base64,{b64}",
210
- }
211
-
212
-
213
- def load_previous_iteration(workspace: Path) -> dict[str, dict]:
214
- """Load previous iteration's feedback and outputs.
215
-
216
- Returns a map of run_id -> {"feedback": str, "outputs": list[dict]}.
217
- """
218
- result: dict[str, dict] = {}
219
-
220
- # Load feedback
221
- feedback_map: dict[str, str] = {}
222
- feedback_path = workspace / "feedback.json"
223
- if feedback_path.exists():
224
- try:
225
- data = json.loads(feedback_path.read_text())
226
- feedback_map = {
227
- r["run_id"]: r["feedback"]
228
- for r in data.get("reviews", [])
229
- if r.get("feedback", "").strip()
230
- }
231
- except (json.JSONDecodeError, OSError, KeyError):
232
- pass
233
-
234
- # Load runs (to get outputs)
235
- prev_runs = find_runs(workspace)
236
- for run in prev_runs:
237
- result[run["id"]] = {
238
- "feedback": feedback_map.get(run["id"], ""),
239
- "outputs": run.get("outputs", []),
240
- }
241
-
242
- # Also add feedback for run_ids that had feedback but no matching run
243
- for run_id, fb in feedback_map.items():
244
- if run_id not in result:
245
- result[run_id] = {"feedback": fb, "outputs": []}
246
-
247
- return result
248
-
249
-
250
- def generate_html(
251
- runs: list[dict],
252
- skill_name: str,
253
- previous: dict[str, dict] | None = None,
254
- benchmark: dict | None = None,
255
- ) -> str:
256
- """Generate the complete standalone HTML page with embedded data."""
257
- template_path = Path(__file__).parent / "viewer.html"
258
- template = template_path.read_text()
259
-
260
- # Build previous_feedback and previous_outputs maps for the template
261
- previous_feedback: dict[str, str] = {}
262
- previous_outputs: dict[str, list[dict]] = {}
263
- if previous:
264
- for run_id, data in previous.items():
265
- if data.get("feedback"):
266
- previous_feedback[run_id] = data["feedback"]
267
- if data.get("outputs"):
268
- previous_outputs[run_id] = data["outputs"]
269
-
270
- embedded = {
271
- "skill_name": skill_name,
272
- "runs": runs,
273
- "previous_feedback": previous_feedback,
274
- "previous_outputs": previous_outputs,
275
- }
276
- if benchmark:
277
- embedded["benchmark"] = benchmark
278
-
279
- data_json = json.dumps(embedded)
280
-
281
- return template.replace("/*__EMBEDDED_DATA__*/", f"const EMBEDDED_DATA = {data_json};")
282
-
283
-
284
- # ---------------------------------------------------------------------------
285
- # HTTP server (stdlib only, zero dependencies)
286
- # ---------------------------------------------------------------------------
287
-
288
- def _kill_port(port: int) -> None:
289
- """Kill any process listening on the given port."""
290
- try:
291
- result = subprocess.run(
292
- ["lsof", "-ti", f":{port}"],
293
- capture_output=True, text=True, timeout=5,
294
- )
295
- for pid_str in result.stdout.strip().split("\n"):
296
- if pid_str.strip():
297
- try:
298
- os.kill(int(pid_str.strip()), signal.SIGTERM)
299
- except (ProcessLookupError, ValueError):
300
- pass
301
- if result.stdout.strip():
302
- time.sleep(0.5)
303
- except subprocess.TimeoutExpired:
304
- pass
305
- except FileNotFoundError:
306
- print("Note: lsof not found, cannot check if port is in use", file=sys.stderr)
307
-
308
- class ReviewHandler(BaseHTTPRequestHandler):
309
- """Serves the review HTML and handles feedback saves.
310
-
311
- Regenerates the HTML on each page load so that refreshing the browser
312
- picks up new eval outputs without restarting the server.
313
- """
314
-
315
- def __init__(
316
- self,
317
- workspace: Path,
318
- skill_name: str,
319
- feedback_path: Path,
320
- previous: dict[str, dict],
321
- benchmark_path: Path | None,
322
- *args,
323
- **kwargs,
324
- ):
325
- self.workspace = workspace
326
- self.skill_name = skill_name
327
- self.feedback_path = feedback_path
328
- self.previous = previous
329
- self.benchmark_path = benchmark_path
330
- super().__init__(*args, **kwargs)
331
-
332
- def do_GET(self) -> None:
333
- if self.path == "/" or self.path == "/index.html":
334
- # Regenerate HTML on each request (re-scans workspace for new outputs)
335
- runs = find_runs(self.workspace)
336
- benchmark = None
337
- if self.benchmark_path and self.benchmark_path.exists():
338
- try:
339
- benchmark = json.loads(self.benchmark_path.read_text())
340
- except (json.JSONDecodeError, OSError):
341
- pass
342
- html = generate_html(runs, self.skill_name, self.previous, benchmark)
343
- content = html.encode("utf-8")
344
- self.send_response(200)
345
- self.send_header("Content-Type", "text/html; charset=utf-8")
346
- self.send_header("Content-Length", str(len(content)))
347
- self.end_headers()
348
- self.wfile.write(content)
349
- elif self.path == "/api/feedback":
350
- data = b"{}"
351
- if self.feedback_path.exists():
352
- data = self.feedback_path.read_bytes()
353
- self.send_response(200)
354
- self.send_header("Content-Type", "application/json")
355
- self.send_header("Content-Length", str(len(data)))
356
- self.end_headers()
357
- self.wfile.write(data)
358
- else:
359
- self.send_error(404)
360
-
361
- def do_POST(self) -> None:
362
- if self.path == "/api/feedback":
363
- length = int(self.headers.get("Content-Length", 0))
364
- body = self.rfile.read(length)
365
- try:
366
- data = json.loads(body)
367
- if not isinstance(data, dict) or "reviews" not in data:
368
- raise ValueError("Expected JSON object with 'reviews' key")
369
- self.feedback_path.write_text(json.dumps(data, indent=2) + "\n")
370
- resp = b'{"ok":true}'
371
- self.send_response(200)
372
- except (json.JSONDecodeError, OSError, ValueError) as e:
373
- resp = json.dumps({"error": str(e)}).encode()
374
- self.send_response(500)
375
- self.send_header("Content-Type", "application/json")
376
- self.send_header("Content-Length", str(len(resp)))
377
- self.end_headers()
378
- self.wfile.write(resp)
379
- else:
380
- self.send_error(404)
381
-
382
- def log_message(self, format: str, *args: object) -> None:
383
- # Suppress request logging to keep terminal clean
384
- pass
385
-
386
-
387
- def main() -> None:
388
- parser = argparse.ArgumentParser(description="Generate and serve eval review")
389
- parser.add_argument("workspace", type=Path, help="Path to workspace directory")
390
- parser.add_argument("--port", "-p", type=int, default=3117, help="Server port (default: 3117)")
391
- parser.add_argument("--skill-name", "-n", type=str, default=None, help="Skill name for header")
392
- parser.add_argument(
393
- "--previous-workspace", type=Path, default=None,
394
- help="Path to previous iteration's workspace (shows old outputs and feedback as context)",
395
- )
396
- parser.add_argument(
397
- "--benchmark", type=Path, default=None,
398
- help="Path to benchmark.json to show in the Benchmark tab",
399
- )
400
- parser.add_argument(
401
- "--static", "-s", type=Path, default=None,
402
- help="Write standalone HTML to this path instead of starting a server",
403
- )
404
- args = parser.parse_args()
405
-
406
- workspace = args.workspace.resolve()
407
- if not workspace.is_dir():
408
- print(f"Error: {workspace} is not a directory", file=sys.stderr)
409
- sys.exit(1)
410
-
411
- runs = find_runs(workspace)
412
- if not runs:
413
- print(f"No runs found in {workspace}", file=sys.stderr)
414
- sys.exit(1)
415
-
416
- skill_name = args.skill_name or workspace.name.replace("-workspace", "")
417
- feedback_path = workspace / "feedback.json"
418
-
419
- previous: dict[str, dict] = {}
420
- if args.previous_workspace:
421
- previous = load_previous_iteration(args.previous_workspace.resolve())
422
-
423
- benchmark_path = args.benchmark.resolve() if args.benchmark else None
424
- benchmark = None
425
- if benchmark_path and benchmark_path.exists():
426
- try:
427
- benchmark = json.loads(benchmark_path.read_text())
428
- except (json.JSONDecodeError, OSError):
429
- pass
430
-
431
- if args.static:
432
- html = generate_html(runs, skill_name, previous, benchmark)
433
- args.static.parent.mkdir(parents=True, exist_ok=True)
434
- args.static.write_text(html)
435
- print(f"\n Static viewer written to: {args.static}\n")
436
- sys.exit(0)
437
-
438
- # Kill any existing process on the target port
439
- port = args.port
440
- _kill_port(port)
441
- handler = partial(ReviewHandler, workspace, skill_name, feedback_path, previous, benchmark_path)
442
- try:
443
- server = HTTPServer(("127.0.0.1", port), handler)
444
- except OSError:
445
- # Port still in use after kill attempt — find a free one
446
- server = HTTPServer(("127.0.0.1", 0), handler)
447
- port = server.server_address[1]
448
-
449
- url = f"http://localhost:{port}"
450
- print(f"\n Eval Viewer")
451
- print(f" ─────────────────────────────────")
452
- print(f" URL: {url}")
453
- print(f" Workspace: {workspace}")
454
- print(f" Feedback: {feedback_path}")
455
- if previous:
456
- print(f" Previous: {args.previous_workspace} ({len(previous)} runs)")
457
- if benchmark_path:
458
- print(f" Benchmark: {benchmark_path}")
459
- print(f"\n Press Ctrl+C to stop.\n")
460
-
461
- webbrowser.open(url)
462
-
463
- try:
464
- server.serve_forever()
465
- except KeyboardInterrupt:
466
- print("\nStopped.")
467
- server.server_close()
468
-
469
-
470
- if __name__ == "__main__":
471
- main()