tarsk 0.5.41 → 0.5.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/dist/bundled-skills/skill-creator/LICENSE.txt +202 -0
  2. package/dist/bundled-skills/skill-creator/SKILL.md +510 -0
  3. package/dist/bundled-skills/skill-creator/agents/analyzer.md +283 -0
  4. package/dist/bundled-skills/skill-creator/agents/comparator.md +203 -0
  5. package/dist/bundled-skills/skill-creator/agents/grader.md +227 -0
  6. package/dist/bundled-skills/skill-creator/assets/eval_review.html +292 -0
  7. package/dist/bundled-skills/skill-creator/eval-viewer/generate_review.js +544 -0
  8. package/dist/bundled-skills/skill-creator/eval-viewer/viewer.html +1478 -0
  9. package/dist/bundled-skills/skill-creator/package.json +3 -0
  10. package/dist/bundled-skills/skill-creator/references/schemas.md +423 -0
  11. package/dist/bundled-skills/skill-creator/scripts/aggregate_benchmark.js +377 -0
  12. package/dist/bundled-skills/skill-creator/scripts/generate_report.js +345 -0
  13. package/dist/bundled-skills/skill-creator/scripts/improve_description.js +263 -0
  14. package/dist/bundled-skills/skill-creator/scripts/package_skill.js +147 -0
  15. package/dist/bundled-skills/skill-creator/scripts/quick_validate.js +132 -0
  16. package/dist/bundled-skills/skill-creator/scripts/run_eval.js +345 -0
  17. package/dist/bundled-skills/skill-creator/scripts/run_loop.js +411 -0
  18. package/dist/bundled-skills/skill-creator/scripts/utils.js +60 -0
  19. package/dist/index.js +8817 -6340
  20. package/dist/public/assets/{account-view-D-dJ0y-D.js → account-view-xKotpUyx.js} +1 -1
  21. package/dist/public/assets/api-D6uLdHBQ.js +1 -0
  22. package/dist/public/assets/browser-tab-DxigYzoT.js +1 -0
  23. package/dist/public/assets/commit-dialog-CLQM9ah3.js +1 -0
  24. package/dist/public/assets/context-menu-rC7iWcty.js +1 -0
  25. package/dist/public/assets/create-repo-dialog-C6k5wZPW.js +1 -0
  26. package/dist/public/assets/{dialogs-config-B-LZ4nOb.js → dialogs-config-CjKh5Rl2.js} +14 -14
  27. package/dist/public/assets/diff-view-DWDWI5nl.js +3 -0
  28. package/dist/public/assets/explorer-tab-view-B0kT8Hl6.js +2 -0
  29. package/dist/public/assets/explorer-tree-BC4fBpxi.js +1 -0
  30. package/dist/public/assets/explorer-view-DIM08sdy.js +1 -0
  31. package/dist/public/assets/git-history-dialog-CuxOTngT.js +1 -0
  32. package/dist/public/assets/git-ops-button-C04zFAnF.js +2 -0
  33. package/dist/public/assets/history-view-ar7GLZ-R.js +9 -0
  34. package/dist/public/assets/index--HY4BbcM.js +90 -0
  35. package/dist/public/assets/index-DKOXV50p.css +1 -0
  36. package/dist/public/assets/mcp-server-card-Cy4RU2_Q.js +1 -0
  37. package/dist/public/assets/merged-pr-dialog-Bo07VouF.js +1 -0
  38. package/dist/public/assets/model-star-rating-BmkpdXfr.js +1 -0
  39. package/dist/public/assets/onboarding-ClZrOxX7.js +1 -0
  40. package/dist/public/assets/project-settings-view-Dm9pQAp_.js +1 -0
  41. package/dist/public/assets/providers-list-view-D5gHsjl_.js +1 -0
  42. package/dist/public/assets/pull-request-dialog-8AYlOUNX.js +1 -0
  43. package/dist/public/assets/pull-with-changes-dialog-CSa5OE-d.js +1 -0
  44. package/dist/public/assets/push-before-pr-dialog-D5W_xsqv.js +1 -0
  45. package/dist/public/assets/radio-group-CbatNaj1.js +1 -0
  46. package/dist/public/assets/react-vendor-DwQYi7es.js +16 -0
  47. package/dist/public/assets/settings-general-view-BP5ULy9A.js +1 -0
  48. package/dist/public/assets/settings-instructions-view-DMAjbi6E.js +1 -0
  49. package/dist/public/assets/settings-list-B8hiBkBz.js +1 -0
  50. package/dist/public/assets/settings-mcp-servers-view-OimQz-Rd.js +5 -0
  51. package/dist/public/assets/{settings-models-skeleton-ClrbJy_p.js → settings-models-skeleton-DPnYbg69.js} +1 -1
  52. package/dist/public/assets/settings-models-view-Fq3WtdKG.js +1 -0
  53. package/dist/public/assets/settings-rules-view-DBk7DzV2.js +8 -0
  54. package/dist/public/assets/settings-skills-view-CmOw-WMM.js +2 -0
  55. package/dist/public/assets/settings-slash-commands-view-FsrF5FkK.js +1 -0
  56. package/dist/public/assets/settings-subagents-view-D98Nxoly.js +2 -0
  57. package/dist/public/assets/{settings-system-prompt-view-Dl66VFaj.js → settings-system-prompt-view-B6Hy9ZyK.js} +1 -1
  58. package/dist/public/assets/settings-view-J-rjoRcU.js +2 -0
  59. package/dist/public/assets/skeleton-BHhGML7J.js +1 -0
  60. package/dist/public/assets/slug-utils-DyRUJ1NS.js +1 -0
  61. package/dist/public/assets/terminal-panel-DTOx74_o.js +1 -0
  62. package/dist/public/assets/{ui-components-C4RrfJEJ.js → ui-components-Jc6oi6bz.js} +1 -1
  63. package/dist/public/assets/use-deferred-search-B7EdyRbt.js +1 -0
  64. package/dist/public/assets/{utils-B7FQXlI6.js → utils-tgi5ym_d.js} +1 -1
  65. package/dist/public/assets/web-C3vJZ_3_.js +1 -0
  66. package/dist/public/assets/web-CUAWBWPy.js +1 -0
  67. package/dist/public/assets/{whisper-wasm-EGutPGND.js → whisper-wasm-CWcbC1MB.js} +1 -1
  68. package/dist/public/browser-preview-rpc.js +484 -0
  69. package/dist/public/index.html +8 -8
  70. package/package.json +4 -3
  71. package/dist/public/assets/api-DJaJqkc6.js +0 -1
  72. package/dist/public/assets/browser-tab-D7wEj-BD.js +0 -1
  73. package/dist/public/assets/commit-dialog-DdhGDH4F.js +0 -1
  74. package/dist/public/assets/context-menu-asf2g-KX.js +0 -1
  75. package/dist/public/assets/create-repo-dialog-Bltp6PKZ.js +0 -1
  76. package/dist/public/assets/diff-view-Bi545EPj.js +0 -3
  77. package/dist/public/assets/explorer-tab-view-B-P555GE.js +0 -2
  78. package/dist/public/assets/explorer-tree-CyXhVrI7.js +0 -1
  79. package/dist/public/assets/explorer-view-BAHDhIGN.js +0 -1
  80. package/dist/public/assets/git-history-dialog-Bci_iQmi.js +0 -1
  81. package/dist/public/assets/git-ops-button-1lum9QXI.js +0 -2
  82. package/dist/public/assets/history-view-CAkN8PCo.js +0 -9
  83. package/dist/public/assets/index-BLO68CQl.js +0 -69
  84. package/dist/public/assets/index-jIBJk8xl.css +0 -1
  85. package/dist/public/assets/mcp-server-card-DQUpkDFV.js +0 -1
  86. package/dist/public/assets/merged-pr-dialog-tHPrJ2CK.js +0 -1
  87. package/dist/public/assets/onboarding-Bbvb7kO4.js +0 -1
  88. package/dist/public/assets/project-settings-view-BRkHOq_q.js +0 -1
  89. package/dist/public/assets/providers-list-view-Dex879vv.js +0 -1
  90. package/dist/public/assets/pull-request-dialog-WHdmrW83.js +0 -1
  91. package/dist/public/assets/pull-with-changes-dialog-Ck3OwINV.js +0 -1
  92. package/dist/public/assets/push-before-pr-dialog-Bvqvz04U.js +0 -1
  93. package/dist/public/assets/radio-group-B0xvu5B9.js +0 -1
  94. package/dist/public/assets/react-vendor-D8PTA4EX.js +0 -16
  95. package/dist/public/assets/settings-general-view-lUxshNA9.js +0 -1
  96. package/dist/public/assets/settings-instructions-view-C57XGLha.js +0 -1
  97. package/dist/public/assets/settings-list-CHGKmGl_.js +0 -1
  98. package/dist/public/assets/settings-mcp-servers-view-DpqkhrgB.js +0 -5
  99. package/dist/public/assets/settings-models-view-QPEdnibD.js +0 -1
  100. package/dist/public/assets/settings-rules-view-WJU--cRq.js +0 -8
  101. package/dist/public/assets/settings-skills-view-mgHy4G_g.js +0 -2
  102. package/dist/public/assets/settings-slash-commands-view-LB5tVqy1.js +0 -1
  103. package/dist/public/assets/settings-subagents-view-QR2qlA_y.js +0 -2
  104. package/dist/public/assets/settings-view-BlVJv4Pz.js +0 -2
  105. package/dist/public/assets/skeleton-K-fVduHt.js +0 -1
  106. package/dist/public/assets/terminal-panel-BGQxckfH.js +0 -2
@@ -0,0 +1,544 @@
1
+ #!/usr/bin/env node
2
+ /** Generate and serve a review page for eval results. */
3
+
4
+ const fs = require("fs");
5
+ const path = require("path");
6
+ const http = require("http");
7
+ const { spawnSync } = require("child_process");
8
+
9
+ const METADATA_FILES = new Set(["transcript.md", "user_notes.md", "metrics.json"]);
10
+ const TEXT_EXTENSIONS = new Set([
11
+ ".txt",
12
+ ".md",
13
+ ".json",
14
+ ".csv",
15
+ ".py",
16
+ ".js",
17
+ ".ts",
18
+ ".tsx",
19
+ ".jsx",
20
+ ".yaml",
21
+ ".yml",
22
+ ".xml",
23
+ ".html",
24
+ ".css",
25
+ ".sh",
26
+ ".rb",
27
+ ".go",
28
+ ".rs",
29
+ ".java",
30
+ ".c",
31
+ ".cpp",
32
+ ".h",
33
+ ".hpp",
34
+ ".sql",
35
+ ".r",
36
+ ".toml",
37
+ ]);
38
+ const IMAGE_EXTENSIONS = new Set([".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp"]);
39
+ const MIME_OVERRIDES = {
40
+ ".svg": "image/svg+xml",
41
+ ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
42
+ ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
43
+ ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
44
+ };
45
+
46
+ function getMimeType(filePath) {
47
+ const ext = path.extname(filePath).toLowerCase();
48
+ if (MIME_OVERRIDES[ext]) return MIME_OVERRIDES[ext];
49
+ const mimeTypes = {
50
+ ".txt": "text/plain",
51
+ ".md": "text/markdown",
52
+ ".json": "application/json",
53
+ ".html": "text/html",
54
+ ".css": "text/css",
55
+ ".js": "text/javascript",
56
+ ".png": "image/png",
57
+ ".jpg": "image/jpeg",
58
+ ".jpeg": "image/jpeg",
59
+ ".gif": "image/gif",
60
+ ".webp": "image/webp",
61
+ ".pdf": "application/pdf",
62
+ };
63
+ return mimeTypes[ext] ?? "application/octet-stream";
64
+ }
65
+
66
+ function findRuns(workspace) {
67
+ const runs = [];
68
+ findRunsRecursive(workspace, workspace, runs);
69
+ runs.sort((a, b) => {
70
+ const aEval = a.eval_id ?? Number.POSITIVE_INFINITY;
71
+ const bEval = b.eval_id ?? Number.POSITIVE_INFINITY;
72
+ if (aEval !== bEval) return aEval - bEval;
73
+ return a.id.localeCompare(b.id);
74
+ });
75
+ return runs;
76
+ }
77
+
78
+ function findRunsRecursive(root, current, runs) {
79
+ if (!fs.existsSync(current) || !fs.statSync(current).isDirectory()) return;
80
+
81
+ const outputsDir = path.join(current, "outputs");
82
+ if (fs.existsSync(outputsDir) && fs.statSync(outputsDir).isDirectory()) {
83
+ const run = buildRun(root, current);
84
+ if (run) runs.push(run);
85
+ return;
86
+ }
87
+
88
+ const skip = new Set(["node_modules", ".git", "__pycache__", "skill", "inputs"]);
89
+ for (const childName of fs.readdirSync(current).sort()) {
90
+ if (skip.has(childName)) continue;
91
+ const child = path.join(current, childName);
92
+ if (fs.statSync(child).isDirectory()) {
93
+ findRunsRecursive(root, child, runs);
94
+ }
95
+ }
96
+ }
97
+
98
+ function buildRun(root, runDir) {
99
+ let prompt = "";
100
+ let evalId = null;
101
+
102
+ for (const candidate of [
103
+ path.join(runDir, "eval_metadata.json"),
104
+ path.join(runDir, "..", "eval_metadata.json"),
105
+ ]) {
106
+ if (!fs.existsSync(candidate)) continue;
107
+ try {
108
+ const metadata = JSON.parse(fs.readFileSync(candidate, "utf-8"));
109
+ prompt = metadata.prompt ?? "";
110
+ evalId = metadata.eval_id ?? null;
111
+ } catch {
112
+ // ignore
113
+ }
114
+ if (prompt) break;
115
+ }
116
+
117
+ if (!prompt) {
118
+ for (const candidate of [
119
+ path.join(runDir, "transcript.md"),
120
+ path.join(runDir, "outputs", "transcript.md"),
121
+ ]) {
122
+ if (!fs.existsSync(candidate)) continue;
123
+ try {
124
+ const text = fs.readFileSync(candidate, "utf-8");
125
+ const match = text.match(/## Eval Prompt\n\n([\s\S]*?)(?=\n##|$)/);
126
+ if (match) prompt = match[1].trim();
127
+ } catch {
128
+ // ignore
129
+ }
130
+ if (prompt) break;
131
+ }
132
+ }
133
+
134
+ if (!prompt) prompt = "(No prompt found)";
135
+
136
+ const runId = path.relative(root, runDir).split(path.sep).join("-");
137
+ const outputsDir = path.join(runDir, "outputs");
138
+ const outputFiles = [];
139
+ if (fs.existsSync(outputsDir) && fs.statSync(outputsDir).isDirectory()) {
140
+ for (const name of fs.readdirSync(outputsDir).sort()) {
141
+ const filePath = path.join(outputsDir, name);
142
+ if (fs.statSync(filePath).isFile() && !METADATA_FILES.has(name)) {
143
+ outputFiles.push(embedFile(filePath));
144
+ }
145
+ }
146
+ }
147
+
148
+ let grading = null;
149
+ for (const candidate of [
150
+ path.join(runDir, "grading.json"),
151
+ path.join(runDir, "..", "grading.json"),
152
+ ]) {
153
+ if (!fs.existsSync(candidate)) continue;
154
+ try {
155
+ grading = JSON.parse(fs.readFileSync(candidate, "utf-8"));
156
+ } catch {
157
+ // ignore
158
+ }
159
+ if (grading) break;
160
+ }
161
+
162
+ return {
163
+ id: runId,
164
+ prompt,
165
+ eval_id: evalId,
166
+ outputs: outputFiles,
167
+ grading,
168
+ };
169
+ }
170
+
171
+ function embedFile(filePath) {
172
+ const ext = path.extname(filePath).toLowerCase();
173
+ const mime = getMimeType(filePath);
174
+
175
+ if (TEXT_EXTENSIONS.has(ext)) {
176
+ let content;
177
+ try {
178
+ content = fs.readFileSync(filePath, "utf-8");
179
+ } catch {
180
+ content = "(Error reading file)";
181
+ }
182
+ return { name: path.basename(filePath), type: "text", content };
183
+ }
184
+
185
+ if (IMAGE_EXTENSIONS.has(ext)) {
186
+ try {
187
+ const raw = fs.readFileSync(filePath);
188
+ const b64 = raw.toString("base64");
189
+ return {
190
+ name: path.basename(filePath),
191
+ type: "image",
192
+ mime,
193
+ data_uri: `data:${mime};base64,${b64}`,
194
+ };
195
+ } catch {
196
+ return { name: path.basename(filePath), type: "error", content: "(Error reading file)" };
197
+ }
198
+ }
199
+
200
+ if (ext === ".pdf") {
201
+ try {
202
+ const raw = fs.readFileSync(filePath);
203
+ const b64 = raw.toString("base64");
204
+ return {
205
+ name: path.basename(filePath),
206
+ type: "pdf",
207
+ data_uri: `data:${mime};base64,${b64}`,
208
+ };
209
+ } catch {
210
+ return { name: path.basename(filePath), type: "error", content: "(Error reading file)" };
211
+ }
212
+ }
213
+
214
+ if (ext === ".xlsx") {
215
+ try {
216
+ const raw = fs.readFileSync(filePath);
217
+ const b64 = raw.toString("base64");
218
+ return { name: path.basename(filePath), type: "xlsx", data_b64: b64 };
219
+ } catch {
220
+ return { name: path.basename(filePath), type: "error", content: "(Error reading file)" };
221
+ }
222
+ }
223
+
224
+ try {
225
+ const raw = fs.readFileSync(filePath);
226
+ const b64 = raw.toString("base64");
227
+ return {
228
+ name: path.basename(filePath),
229
+ type: "binary",
230
+ mime,
231
+ data_uri: `data:${mime};base64,${b64}`,
232
+ };
233
+ } catch {
234
+ return { name: path.basename(filePath), type: "error", content: "(Error reading file)" };
235
+ }
236
+ }
237
+
238
+ function loadPreviousIteration(workspace) {
239
+ const result = {};
240
+ const feedbackMap = {};
241
+ const feedbackPath = path.join(workspace, "feedback.json");
242
+ if (fs.existsSync(feedbackPath)) {
243
+ try {
244
+ const data = JSON.parse(fs.readFileSync(feedbackPath, "utf-8"));
245
+ for (const review of data.reviews ?? []) {
246
+ if ((review.feedback ?? "").trim()) {
247
+ feedbackMap[review.run_id] = review.feedback;
248
+ }
249
+ }
250
+ } catch {
251
+ // ignore
252
+ }
253
+ }
254
+
255
+ const prevRuns = findRuns(workspace);
256
+ for (const run of prevRuns) {
257
+ result[run.id] = {
258
+ feedback: feedbackMap[run.id] ?? "",
259
+ outputs: run.outputs ?? [],
260
+ };
261
+ }
262
+
263
+ for (const [runId, feedback] of Object.entries(feedbackMap)) {
264
+ if (!result[runId]) {
265
+ result[runId] = { feedback, outputs: [] };
266
+ }
267
+ }
268
+
269
+ return result;
270
+ }
271
+
272
+ function generateHtml(runs, skillName, previous = null, benchmark = null) {
273
+ const templatePath = path.join(__dirname, "viewer.html");
274
+ const template = fs.readFileSync(templatePath, "utf-8");
275
+
276
+ const previousFeedback = {};
277
+ const previousOutputs = {};
278
+ if (previous) {
279
+ for (const [runId, data] of Object.entries(previous)) {
280
+ if (data.feedback) previousFeedback[runId] = data.feedback;
281
+ if (data.outputs?.length) previousOutputs[runId] = data.outputs;
282
+ }
283
+ }
284
+
285
+ const embedded = {
286
+ skill_name: skillName,
287
+ runs,
288
+ previous_feedback: previousFeedback,
289
+ previous_outputs: previousOutputs,
290
+ };
291
+ if (benchmark) embedded.benchmark = benchmark;
292
+
293
+ const dataJson = JSON.stringify(embedded);
294
+ return template.replace("/*__EMBEDDED_DATA__*/", `const EMBEDDED_DATA = ${dataJson};`);
295
+ }
296
+
297
+ function killPort(port) {
298
+ try {
299
+ const result = spawnSync("lsof", ["-ti", `:${port}`], {
300
+ encoding: "utf-8",
301
+ timeout: 5000,
302
+ });
303
+ for (const pidStr of (result.stdout ?? "").trim().split("\n")) {
304
+ if (!pidStr.trim()) continue;
305
+ try {
306
+ process.kill(parseInt(pidStr.trim(), 10), "SIGTERM");
307
+ } catch {
308
+ // ignore
309
+ }
310
+ }
311
+ if ((result.stdout ?? "").trim()) {
312
+ const sleep = spawnSync("sleep", ["0.5"], { stdio: "ignore" });
313
+ if (sleep.error) {
314
+ const deadline = Date.now() + 500;
315
+ while (Date.now() < deadline) {
316
+ // brief pause after killing port holder
317
+ }
318
+ }
319
+ }
320
+ } catch (error) {
321
+ if (error.message?.includes("ENOENT")) {
322
+ process.stderr.write("Note: lsof not found, cannot check if port is in use\n");
323
+ }
324
+ }
325
+ }
326
+
327
+ function openUrl(url) {
328
+ if (process.platform === "darwin") {
329
+ spawnSync("open", [url], { stdio: "ignore" });
330
+ } else if (process.platform === "win32") {
331
+ spawnSync("cmd", ["/c", "start", "", url], { stdio: "ignore", shell: true });
332
+ } else {
333
+ spawnSync("xdg-open", [url], { stdio: "ignore" });
334
+ }
335
+ }
336
+
337
+ function createReviewServer({ workspace, skillName, feedbackPath, previous, benchmarkPath }) {
338
+ return http.createServer((req, res) => {
339
+ const url = new URL(req.url ?? "/", "http://127.0.0.1");
340
+
341
+ if (req.method === "GET" && (url.pathname === "/" || url.pathname === "/index.html")) {
342
+ const runs = findRuns(workspace);
343
+ let benchmark = null;
344
+ if (benchmarkPath && fs.existsSync(benchmarkPath)) {
345
+ try {
346
+ benchmark = JSON.parse(fs.readFileSync(benchmarkPath, "utf-8"));
347
+ } catch {
348
+ // ignore
349
+ }
350
+ }
351
+ const html = generateHtml(runs, skillName, previous, benchmark);
352
+ const content = Buffer.from(html, "utf-8");
353
+ res.writeHead(200, {
354
+ "Content-Type": "text/html; charset=utf-8",
355
+ "Content-Length": content.length,
356
+ });
357
+ res.end(content);
358
+ return;
359
+ }
360
+
361
+ if (req.method === "GET" && url.pathname === "/api/feedback") {
362
+ const data = fs.existsSync(feedbackPath) ? fs.readFileSync(feedbackPath) : Buffer.from("{}");
363
+ res.writeHead(200, {
364
+ "Content-Type": "application/json",
365
+ "Content-Length": data.length,
366
+ });
367
+ res.end(data);
368
+ return;
369
+ }
370
+
371
+ if (req.method === "POST" && url.pathname === "/api/feedback") {
372
+ const chunks = [];
373
+ req.on("data", (chunk) => chunks.push(chunk));
374
+ req.on("end", () => {
375
+ let resp;
376
+ let status = 200;
377
+ try {
378
+ const body = Buffer.concat(chunks).toString("utf-8");
379
+ const data = JSON.parse(body);
380
+ if (!data || typeof data !== "object" || !("reviews" in data)) {
381
+ throw new Error("Expected JSON object with 'reviews' key");
382
+ }
383
+ fs.writeFileSync(feedbackPath, `${JSON.stringify(data, null, 2)}\n`);
384
+ resp = Buffer.from('{"ok":true}');
385
+ } catch (error) {
386
+ resp = Buffer.from(JSON.stringify({ error: error.message }));
387
+ status = 500;
388
+ }
389
+ res.writeHead(status, {
390
+ "Content-Type": "application/json",
391
+ "Content-Length": resp.length,
392
+ });
393
+ res.end(resp);
394
+ });
395
+ return;
396
+ }
397
+
398
+ res.writeHead(404);
399
+ res.end();
400
+ });
401
+ }
402
+
403
+ function parseArgs(argv) {
404
+ const args = {
405
+ workspace: null,
406
+ port: 3117,
407
+ skillName: null,
408
+ previousWorkspace: null,
409
+ benchmark: null,
410
+ static: null,
411
+ };
412
+ const positional = [];
413
+ for (let i = 2; i < argv.length; i++) {
414
+ const arg = argv[i];
415
+ if (arg === "--port" || arg === "-p") args.port = parseInt(argv[++i], 10);
416
+ else if (arg === "--skill-name" || arg === "-n") args.skillName = argv[++i];
417
+ else if (arg === "--previous-workspace") args.previousWorkspace = argv[++i];
418
+ else if (arg === "--benchmark") args.benchmark = argv[++i];
419
+ else if (arg === "--static" || arg === "-s") args.static = argv[++i];
420
+ else if (!arg.startsWith("-")) positional.push(arg);
421
+ }
422
+ args.workspace = positional[0] ?? null;
423
+ return args;
424
+ }
425
+
426
+ function main() {
427
+ const args = parseArgs(process.argv);
428
+ if (!args.workspace) {
429
+ console.error(
430
+ "Usage: node generate_review.js <workspace-path> [--port PORT] [--skill-name NAME] [--previous-workspace PATH] [--benchmark PATH] [--static OUTPUT.html]",
431
+ );
432
+ process.exit(1);
433
+ }
434
+
435
+ const workspace = path.resolve(args.workspace);
436
+ if (!fs.existsSync(workspace) || !fs.statSync(workspace).isDirectory()) {
437
+ console.error(`Error: ${workspace} is not a directory`);
438
+ process.exit(1);
439
+ }
440
+
441
+ const runs = findRuns(workspace);
442
+ if (!runs.length) {
443
+ console.error(`No runs found in ${workspace}`);
444
+ process.exit(1);
445
+ }
446
+
447
+ const skillName = args.skillName ?? path.basename(workspace).replace("-workspace", "");
448
+ const feedbackPath = path.join(workspace, "feedback.json");
449
+
450
+ let previous = {};
451
+ if (args.previousWorkspace) {
452
+ previous = loadPreviousIteration(path.resolve(args.previousWorkspace));
453
+ }
454
+
455
+ const benchmarkPath = args.benchmark ? path.resolve(args.benchmark) : null;
456
+ let benchmark = null;
457
+ if (benchmarkPath && fs.existsSync(benchmarkPath)) {
458
+ try {
459
+ benchmark = JSON.parse(fs.readFileSync(benchmarkPath, "utf-8"));
460
+ } catch {
461
+ // ignore
462
+ }
463
+ }
464
+
465
+ if (args.static) {
466
+ const staticPath = path.resolve(args.static);
467
+ fs.mkdirSync(path.dirname(staticPath), { recursive: true });
468
+ fs.writeFileSync(staticPath, generateHtml(runs, skillName, previous, benchmark));
469
+ console.log(`\n Static viewer written to: ${staticPath}\n`);
470
+ process.exit(0);
471
+ }
472
+
473
+ killPort(args.port);
474
+ const server = createReviewServer({
475
+ workspace,
476
+ skillName,
477
+ feedbackPath,
478
+ previous,
479
+ benchmarkPath,
480
+ });
481
+
482
+ let port = args.port;
483
+ server.on("error", () => {
484
+ // handled below via listen retry
485
+ });
486
+
487
+ function listenOnPort(targetPort) {
488
+ return new Promise((resolve, reject) => {
489
+ server.once("error", reject);
490
+ server.listen(targetPort, "127.0.0.1", () => {
491
+ server.removeListener("error", reject);
492
+ resolve(server.address().port);
493
+ });
494
+ });
495
+ }
496
+
497
+ listenOnPort(port)
498
+ .catch(() => listenOnPort(0))
499
+ .then((actualPort) => {
500
+ port = actualPort;
501
+ const url = `http://localhost:${port}`;
502
+ console.log("\n Eval Viewer");
503
+ console.log(" ─────────────────────────────────");
504
+ console.log(` URL: ${url}`);
505
+ console.log(` Workspace: ${workspace}`);
506
+ console.log(` Feedback: ${feedbackPath}`);
507
+ if (args.previousWorkspace) {
508
+ console.log(
509
+ ` Previous: ${args.previousWorkspace} (${Object.keys(previous).length} runs)`,
510
+ );
511
+ }
512
+ if (benchmarkPath) {
513
+ console.log(` Benchmark: ${benchmarkPath}`);
514
+ }
515
+ console.log("\n Press Ctrl+C to stop.\n");
516
+
517
+ openUrl(url);
518
+
519
+ process.on("SIGINT", () => {
520
+ console.log("\nStopped.");
521
+ server.close();
522
+ process.exit(0);
523
+ });
524
+ })
525
+ .catch((error) => {
526
+ console.error(error.message);
527
+ process.exit(1);
528
+ });
529
+ }
530
+
531
+ if (require.main === module) {
532
+ main();
533
+ }
534
+
535
+ module.exports = {
536
+ findRuns,
537
+ buildRun,
538
+ embedFile,
539
+ loadPreviousIteration,
540
+ generateHtml,
541
+ createReviewServer,
542
+ killPort,
543
+ openUrl,
544
+ };