crewswarm 0.9.2 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. package/README.md +22 -9
  2. package/apps/dashboard/dist/assets/{chat-core-Cx4sTxDd.js → chat-core-3KirthZA.js} +1 -1
  3. package/apps/dashboard/dist/assets/index-GSWxxEPO.js +2 -0
  4. package/apps/dashboard/dist/assets/{tab-pm-loop-tab-Bfd449B4.js → tab-pm-loop-tab-DiAPTJXu.js} +1 -1
  5. package/apps/dashboard/dist/assets/{tab-projects-tab-DhNWnlzt.js → tab-projects-tab-SFH4E--a.js} +1 -1
  6. package/apps/dashboard/dist/assets/tab-settings-tab-BselH1c0.js +1 -0
  7. package/apps/dashboard/dist/index.html +82 -11
  8. package/apps/vibe/README.md +2 -2
  9. package/apps/vibe/package.json +1 -1
  10. package/apps/vibe/server.mjs +3 -3
  11. package/crew-lead.mjs +34 -4
  12. package/lib/bridges/gateway-ws.mjs +4 -0
  13. package/lib/crew-lead/chat-handler.mjs +34 -0
  14. package/lib/crew-lead/http-server.mjs +55 -14
  15. package/lib/crew-lead/llm-caller.mjs +24 -8
  16. package/lib/crew-lead/prompts.mjs +7 -0
  17. package/lib/crew-lead/wave-dispatcher.mjs +15 -3
  18. package/lib/crew-lead/ws-router.mjs +219 -27
  19. package/lib/engines/engine-registry.mjs +9 -0
  20. package/lib/engines/rt-envelope.mjs +1 -0
  21. package/lib/engines/runners.mjs +5 -2
  22. package/lib/runtime/paths.mjs +12 -8
  23. package/package.json +35 -15
  24. package/scripts/capture-build-flow.mjs +118 -0
  25. package/scripts/coverage-report.mjs +209 -0
  26. package/scripts/coverage-summary.mjs +47 -0
  27. package/scripts/dashboard-validation.mjs +74 -0
  28. package/scripts/dashboard.mjs +560 -70
  29. package/scripts/live-bridge-matrix.mjs +79 -0
  30. package/scripts/live-cli-matrix.mjs +166 -0
  31. package/scripts/live-crewchat-check.mjs +42 -0
  32. package/scripts/live-engine-matrix.mjs +50 -0
  33. package/scripts/live-provider-failover-matrix.mjs +107 -0
  34. package/scripts/live-provider-matrix.mjs +228 -0
  35. package/scripts/restart-all-from-repo.sh +4 -4
  36. package/scripts/smoke-dispatch.mjs +4 -1
  37. package/scripts/test-blast-radius.mjs +204 -0
  38. package/scripts/test-report-summary.mjs +88 -0
  39. package/scripts/test-reporter.mjs +651 -0
  40. package/scripts/test-rerun.mjs +136 -0
  41. package/scripts/tmux-bridge +130 -0
  42. package/apps/dashboard/dist/assets/chat-core-Cx4sTxDd.js.br +0 -0
  43. package/apps/dashboard/dist/assets/cli-process-COMRNPqr.js.br +0 -0
  44. package/apps/dashboard/dist/assets/components-BS9fQjE_.js.br +0 -0
  45. package/apps/dashboard/dist/assets/core-utils-CmOkXgzi.js.br +0 -0
  46. package/apps/dashboard/dist/assets/index-CF0aJRtC.css.br +0 -0
  47. package/apps/dashboard/dist/assets/index-DnClJ1ee.js +0 -2
  48. package/apps/dashboard/dist/assets/index-DnClJ1ee.js.br +0 -0
  49. package/apps/dashboard/dist/assets/orchestration-Ca2DLWN-.js.br +0 -0
  50. package/apps/dashboard/dist/assets/setup-wizard-CA0Or47w.js.br +0 -0
  51. package/apps/dashboard/dist/assets/tab-agents-tab-BgpIsjkw.js.br +0 -0
  52. package/apps/dashboard/dist/assets/tab-comms-tab-kguqTIzD.js.br +0 -0
  53. package/apps/dashboard/dist/assets/tab-contacts-tab-DiOyMYth.js.br +0 -0
  54. package/apps/dashboard/dist/assets/tab-engines-tab-BsdZVvU0.js.br +0 -0
  55. package/apps/dashboard/dist/assets/tab-memory-tab-Cu6u13EQ.js.br +0 -0
  56. package/apps/dashboard/dist/assets/tab-models-tab-BLEjmd19.js.br +0 -0
  57. package/apps/dashboard/dist/assets/tab-pm-loop-tab-Bfd449B4.js.br +0 -0
  58. package/apps/dashboard/dist/assets/tab-projects-tab-DhNWnlzt.js.br +0 -0
  59. package/apps/dashboard/dist/assets/tab-prompts-tab-DVkUNaJd.js.br +0 -0
  60. package/apps/dashboard/dist/assets/tab-services-tab-DU_LH3uG.js.br +0 -0
  61. package/apps/dashboard/dist/assets/tab-settings-tab-Bn4nXtDe.js +0 -1
  62. package/apps/dashboard/dist/assets/tab-settings-tab-Bn4nXtDe.js.br +0 -0
  63. package/apps/dashboard/dist/assets/tab-skills-tab-BpY0uZHW.js.br +0 -0
  64. package/apps/dashboard/dist/assets/tab-spending-tab-DEccQHnt.js.br +0 -0
  65. package/apps/dashboard/dist/assets/tab-swarm-chat-tab-BNrd88-r.js.br +0 -0
  66. package/apps/dashboard/dist/assets/tab-swarm-tab-B1AcjL1W.js.br +0 -0
  67. package/apps/dashboard/dist/assets/tab-usage-tab-BIOOnB-Y.js.br +0 -0
  68. package/apps/dashboard/dist/assets/tab-waves-tab-SaJDkb4x.js.br +0 -0
  69. package/apps/dashboard/dist/assets/tab-workflows-tab-B-soSy1k.js.br +0 -0
  70. package/apps/dashboard/dist/index.html.br +0 -0
  71. package/apps/dashboard/dist/index.html.gz +0 -0
  72. package/apps/dashboard/index.html +0 -6529
  73. package/apps/dashboard/package.json +0 -15
  74. package/apps/dashboard/src/app.js +0 -2828
  75. package/apps/dashboard/src/app.js.br +0 -0
  76. package/apps/dashboard/src/app.js.gz +0 -0
  77. package/apps/dashboard/src/chat/chat-actions.js +0 -1847
  78. package/apps/dashboard/src/chat/chat-actions.js.br +0 -0
  79. package/apps/dashboard/src/chat/unified-messages.js +0 -327
  80. package/apps/dashboard/src/chat/unified-messages.js.br +0 -0
  81. package/apps/dashboard/src/cli-process.js +0 -208
  82. package/apps/dashboard/src/cli-process.js.br +0 -0
  83. package/apps/dashboard/src/cli-process.js.gz +0 -0
  84. package/apps/dashboard/src/components/active-tasks-panel.js +0 -175
  85. package/apps/dashboard/src/components/active-tasks-panel.js.br +0 -0
  86. package/apps/dashboard/src/core/api.js +0 -18
  87. package/apps/dashboard/src/core/api.js.br +0 -0
  88. package/apps/dashboard/src/core/dom.js +0 -228
  89. package/apps/dashboard/src/core/dom.js.br +0 -0
  90. package/apps/dashboard/src/core/state.js +0 -91
  91. package/apps/dashboard/src/core/state.js.br +0 -0
  92. package/apps/dashboard/src/core/task-manager.js +0 -134
  93. package/apps/dashboard/src/core/task-manager.js.br +0 -0
  94. package/apps/dashboard/src/orchestration-status.js +0 -127
  95. package/apps/dashboard/src/orchestration-status.js.br +0 -0
  96. package/apps/dashboard/src/setup-wizard.js +0 -562
  97. package/apps/dashboard/src/setup-wizard.js.br +0 -0
  98. package/apps/dashboard/src/styles.css +0 -2085
  99. package/apps/dashboard/src/styles.css.br +0 -0
  100. package/apps/dashboard/src/styles.css.gz +0 -0
  101. package/apps/dashboard/src/tabs/agents-tab.js +0 -2237
  102. package/apps/dashboard/src/tabs/agents-tab.js.br +0 -0
  103. package/apps/dashboard/src/tabs/benchmarks-tab.js +0 -229
  104. package/apps/dashboard/src/tabs/benchmarks-tab.js.br +0 -0
  105. package/apps/dashboard/src/tabs/comms-tab.js +0 -955
  106. package/apps/dashboard/src/tabs/comms-tab.js.br +0 -0
  107. package/apps/dashboard/src/tabs/contacts-tab.js +0 -654
  108. package/apps/dashboard/src/tabs/contacts-tab.js.br +0 -0
  109. package/apps/dashboard/src/tabs/engines-tab.js +0 -175
  110. package/apps/dashboard/src/tabs/engines-tab.js.br +0 -0
  111. package/apps/dashboard/src/tabs/memory-tab.js +0 -182
  112. package/apps/dashboard/src/tabs/memory-tab.js.br +0 -0
  113. package/apps/dashboard/src/tabs/models-tab.js +0 -450
  114. package/apps/dashboard/src/tabs/models-tab.js.br +0 -0
  115. package/apps/dashboard/src/tabs/pm-loop-tab.js +0 -185
  116. package/apps/dashboard/src/tabs/pm-loop-tab.js.br +0 -0
  117. package/apps/dashboard/src/tabs/projects-tab.js +0 -663
  118. package/apps/dashboard/src/tabs/projects-tab.js.br +0 -0
  119. package/apps/dashboard/src/tabs/projects-tab.js.gz +0 -0
  120. package/apps/dashboard/src/tabs/prompts-tab.js +0 -160
  121. package/apps/dashboard/src/tabs/prompts-tab.js.br +0 -0
  122. package/apps/dashboard/src/tabs/services-tab.js +0 -202
  123. package/apps/dashboard/src/tabs/services-tab.js.br +0 -0
  124. package/apps/dashboard/src/tabs/settings-tab.js +0 -861
  125. package/apps/dashboard/src/tabs/settings-tab.js.br +0 -0
  126. package/apps/dashboard/src/tabs/skills-tab.js +0 -284
  127. package/apps/dashboard/src/tabs/skills-tab.js.br +0 -0
  128. package/apps/dashboard/src/tabs/spending-tab.js +0 -173
  129. package/apps/dashboard/src/tabs/spending-tab.js.br +0 -0
  130. package/apps/dashboard/src/tabs/swarm-chat-tab.js +0 -660
  131. package/apps/dashboard/src/tabs/swarm-chat-tab.js.br +0 -0
  132. package/apps/dashboard/src/tabs/swarm-tab.js +0 -538
  133. package/apps/dashboard/src/tabs/swarm-tab.js.br +0 -0
  134. package/apps/dashboard/src/tabs/usage-tab.js +0 -390
  135. package/apps/dashboard/src/tabs/usage-tab.js.br +0 -0
  136. package/apps/dashboard/src/tabs/waves-tab.js +0 -238
  137. package/apps/dashboard/src/tabs/waves-tab.js.br +0 -0
  138. package/apps/dashboard/src/tabs/workflows-tab.js +0 -747
  139. package/apps/dashboard/src/tabs/workflows-tab.js.br +0 -0
  140. package/apps/vibe/.crew/agent-memory/pipeline.json +0 -304
  141. package/apps/vibe/.crew/cost.json +0 -17
  142. package/apps/vibe/.crew/json-parse-metrics.jsonl +0 -27
  143. package/apps/vibe/.crew/pipeline-metrics.jsonl +0 -27
  144. package/apps/vibe/.crew/pipeline-runs/pipeline-0f90c392-2425-4ae5-850c-bd9d17b1d690.jsonl +0 -5
  145. package/apps/vibe/.crew/pipeline-runs/pipeline-1c269dd9-a63f-4fba-af81-5cf08048ef06.jsonl +0 -5
  146. package/apps/vibe/.crew/pipeline-runs/pipeline-288a7765-da24-4a22-89bc-1f3cc9b0562c.jsonl +0 -5
  147. package/apps/vibe/.crew/pipeline-runs/pipeline-2c78fd22-a657-4bd1-bc49-0679fb384409.jsonl +0 -5
  148. package/apps/vibe/.crew/pipeline-runs/pipeline-3da23550-22ed-4904-9a0a-8e79c1f3024c.jsonl +0 -5
  149. package/apps/vibe/.crew/pipeline-runs/pipeline-3e6fe08d-3264-404a-8df3-aab7efef10e7.jsonl +0 -5
  150. package/apps/vibe/.crew/pipeline-runs/pipeline-42eec610-57fe-4e09-9e7e-b315038495c2.jsonl +0 -5
  151. package/apps/vibe/.crew/pipeline-runs/pipeline-4438eb4c-ae13-42b1-90e2-b043d8983be8.jsonl +0 -5
  152. package/apps/vibe/.crew/pipeline-runs/pipeline-4740a9f5-86e7-44b6-a394-de433e291727.jsonl +0 -5
  153. package/apps/vibe/.crew/pipeline-runs/pipeline-49e1da6a-957e-48fd-9220-415019e4f8e2.jsonl +0 -5
  154. package/apps/vibe/.crew/pipeline-runs/pipeline-4c9251db-be68-427b-a3fc-a264f2b5778d.jsonl +0 -5
  155. package/apps/vibe/.crew/pipeline-runs/pipeline-6413fa33-a802-4b57-a8c0-a9056ad67842.jsonl +0 -5
  156. package/apps/vibe/.crew/pipeline-runs/pipeline-65e29a57-664d-4196-8109-017e364f182e.jsonl +0 -5
  157. package/apps/vibe/.crew/pipeline-runs/pipeline-6aa04bc5-9593-4b1f-b58d-3bf2978cb602.jsonl +0 -5
  158. package/apps/vibe/.crew/pipeline-runs/pipeline-6e1cba53-9b70-457e-99e0-59199149dd21.jsonl +0 -5
  159. package/apps/vibe/.crew/pipeline-runs/pipeline-749f41cc-4dac-4204-be64-873a6080a0d2.jsonl +0 -5
  160. package/apps/vibe/.crew/pipeline-runs/pipeline-74d68121-e181-4864-bd9a-c3211341dfaf.jsonl +0 -5
  161. package/apps/vibe/.crew/pipeline-runs/pipeline-8509bc24-142d-4e07-b44a-a50bf99d1103.jsonl +0 -5
  162. package/apps/vibe/.crew/pipeline-runs/pipeline-960339c6-07ca-43ce-9900-f6e1702b39b9.jsonl +0 -5
  163. package/apps/vibe/.crew/pipeline-runs/pipeline-9bef2dd2-6122-42e5-b3d9-19f4d80f9e40.jsonl +0 -5
  164. package/apps/vibe/.crew/pipeline-runs/pipeline-9c6480a9-7031-4146-b241-825b9a2d1de1.jsonl +0 -5
  165. package/apps/vibe/.crew/pipeline-runs/pipeline-9fd42426-8492-4157-9d5f-e1537c060489.jsonl +0 -2
  166. package/apps/vibe/.crew/pipeline-runs/pipeline-ad6d40a3-2f5e-46a9-a345-47caaccc51aa.jsonl +0 -5
  167. package/apps/vibe/.crew/pipeline-runs/pipeline-bc606133-8d5b-4535-8d85-f1a29cdaa981.jsonl +0 -5
  168. package/apps/vibe/.crew/pipeline-runs/pipeline-c1418f4e-b773-4ca1-84a3-216acf36e2f2.jsonl +0 -5
  169. package/apps/vibe/.crew/pipeline-runs/pipeline-c1a13ccd-634a-4d01-a4a7-1177b8a752ff.jsonl +0 -5
  170. package/apps/vibe/.crew/pipeline-runs/pipeline-c7d27b42-249e-4bd4-8f26-6aa998110b8a.jsonl +0 -5
  171. package/apps/vibe/.crew/pipeline-runs/pipeline-cca2e9b9-4a34-4d25-a311-5c793fa7e91e.jsonl +0 -5
  172. package/apps/vibe/.crew/sandbox.json +0 -7
  173. package/apps/vibe/.crew/session.json +0 -330
  174. package/apps/vibe/.crew/training-data.jsonl +0 -0
  175. package/apps/vibe/.github/workflows/studio-quality.yml +0 -37
  176. package/apps/vibe/.studio-data/project-messages/chuck-norris.jsonl +0 -18
  177. package/apps/vibe/.studio-data/project-messages/general.jsonl +0 -81
  178. package/apps/vibe/.studio-data/project-messages/studio-local.jsonl +0 -18
  179. package/apps/vibe/ARCHITECTURE.md +0 -3393
  180. package/apps/vibe/QUICK-REFERENCE.md +0 -211
  181. package/apps/vibe/ROADMAP.md +0 -41
  182. package/apps/vibe/STUDIO-SETUP-COMPLETE.md +0 -35
  183. package/apps/vibe/VISUAL-GUIDE.md +0 -378
  184. package/apps/vibe/capture-demo.mjs +0 -160
  185. package/apps/vibe/capture-full-demo.mjs +0 -255
  186. package/apps/vibe/capture-quickstart.mjs +0 -256
  187. package/apps/vibe/capture-vibe-assets.mjs +0 -71
  188. package/apps/vibe/capture-vibe-video.mjs +0 -260
  189. package/apps/vibe/check-buttons.js +0 -41
  190. package/apps/vibe/diagnose.html +0 -106
  191. package/apps/vibe/fix-buttons.js +0 -103
  192. package/apps/vibe/index.html +0 -3404
  193. package/apps/vibe/package-lock.json +0 -920
  194. package/apps/vibe/scripts/studio-pty-host.py +0 -117
  195. package/apps/vibe/src/main.js +0 -2940
  196. package/apps/vibe/src/register-all-languages.js +0 -98
  197. package/apps/vibe/start-studio.sh +0 -11
  198. package/apps/vibe/test/accessibility-tests.js +0 -77
  199. package/apps/vibe/test/browser-performance-audit.mjs +0 -205
  200. package/apps/vibe/test/performance-tests.js +0 -120
  201. package/apps/vibe/test/security-tests.js +0 -213
  202. package/apps/vibe/tests/e2e.local.mjs +0 -54
  203. package/apps/vibe/tests/server.smoke.mjs +0 -106
  204. package/apps/vibe/update_website.mjs +0 -74
  205. package/apps/vibe/vite.config.js +0 -19
  206. package/lib/crew-lead/chat-handler.mjs.bak +0 -1274
  207. package/lib/engines/rt-envelope.mjs.backup-current +0 -870
@@ -1,229 +0,0 @@
1
- import { escHtml } from '../core/dom.js';
2
-
3
- // ── Benchmark task runner state ──────────────────────────────────────────────
4
- let _runnerTasks = []; // cached task rows from HuggingFace
5
- let _runnerAbort = null; // AbortController for active SSE stream
6
-
7
- export function showBenchmarks({ hideAllViews, setNavActive } = {}) {
8
- if (typeof hideAllViews === 'function') hideAllViews();
9
- const view = document.getElementById('benchmarksView');
10
- if (view) view.classList.add('active');
11
- if (typeof setNavActive === 'function') setNavActive('navBenchmarks');
12
- loadBenchmarkOptions().then(() => {
13
- const sel = document.getElementById('benchmarkSelect');
14
- if (sel && sel.value) loadBenchmarkLeaderboard(sel.value);
15
- });
16
- }
17
-
18
- export async function loadBenchmarkOptions() {
19
- const sel = document.getElementById('benchmarkSelect');
20
- if (!sel) return;
21
- const cur = sel.value;
22
- sel.innerHTML = '<option value="">— Loading… —</option>';
23
- try {
24
- const r = await fetch('/api/zeroeval/benchmarks');
25
- const arr = await r.json();
26
- if (!Array.isArray(arr)) throw new Error('Expected array');
27
- sel.innerHTML = '<option value="">— Pick benchmark —</option>';
28
- arr.forEach(b => {
29
- const id = typeof b === 'object' ? (b.benchmark_id || b.id) : b;
30
- const name = typeof b === 'object' ? (b.name || id) : id;
31
- const opt = document.createElement('option');
32
- opt.value = id;
33
- opt.textContent = name;
34
- sel.appendChild(opt);
35
- });
36
- if (cur && arr.some(b => (typeof b === 'object' ? b.benchmark_id : b) === cur)) {
37
- sel.value = cur;
38
- } else {
39
- const DEFAULT_BENCHMARK = 'swe-bench-verified';
40
- if (arr.some(b => (typeof b === 'object' ? b.benchmark_id : b) === DEFAULT_BENCHMARK)) {
41
- sel.value = DEFAULT_BENCHMARK;
42
- }
43
- }
44
- return sel.value;
45
- } catch (e) {
46
- sel.innerHTML = '<option value="">— Failed to load —</option>';
47
- }
48
- }
49
-
50
- export async function loadBenchmarks() {
51
- await loadBenchmarkOptions();
52
- const sel = document.getElementById('benchmarkSelect');
53
- if (sel && sel.value) loadBenchmarkLeaderboard(sel.value);
54
- }
55
-
56
- export async function loadBenchmarkLeaderboard(benchmarkId) {
57
- const tableEl = document.getElementById('benchmarkTable');
58
- const metaEl = document.getElementById('benchmarkMeta');
59
- if (!tableEl || !metaEl) return;
60
- if (!benchmarkId) {
61
- tableEl.innerHTML = '';
62
- metaEl.style.display = 'none';
63
- return;
64
- }
65
- tableEl.innerHTML = '<div class="meta" style="padding:20px;">Loading…</div>';
66
- metaEl.style.display = 'none';
67
- try {
68
- const r = await fetch('/api/zeroeval/benchmarks/' + encodeURIComponent(benchmarkId));
69
- const data = await r.json();
70
- if (!r.ok) throw new Error(data.error || data.detail || 'Failed to load');
71
- const models = data.entries || data.models || [];
72
- const totalModels = data.total_models ?? data.statistics?.total_models ?? models.length;
73
- const avgScore = data.statistics?.average_score ?? (models.length ? models.reduce((s, m) => (s + (m.normalized_score ?? m.benchmark_score ?? m.score ?? 0)), 0) / models.length : 0);
74
- const displayName = data.benchmark_name || data.name || benchmarkId;
75
- const displayDesc = data.benchmark_description || data.description || '';
76
- metaEl.innerHTML = '<b>' + escHtml(displayName) + '</b>' + (displayDesc ? ': ' + escHtml(displayDesc.slice(0, 200)) : '') + ' | ' + totalModels + ' models, avg ' + (avgScore * 100).toFixed(1) + '%';
77
- metaEl.style.display = 'block';
78
- if (!models.length) {
79
- tableEl.innerHTML = '<div class="meta" style="padding:20px;">No model scores for this benchmark.</div>';
80
- return;
81
- }
82
- const rows = models.slice(0, 100).map(m => {
83
- const score = (m.normalized_score != null ? m.normalized_score : (m.benchmark_score != null ? m.benchmark_score : m.score)) ?? 0;
84
- const pct = (score * 100).toFixed(1);
85
- const inp = m.input_cost_per_million != null ? Math.round(m.input_cost_per_million * 100) + '¢' : '—';
86
- const out = m.output_cost_per_million != null ? Math.round(m.output_cost_per_million * 100) + '¢' : '—';
87
- const inC = m.input_cost_per_million ?? 0;
88
- const outC = m.output_cost_per_million ?? 0;
89
- const centsPerPt = (inC + outC) > 0 && score > 0 ? ((inC + outC) * 100 / (score * 100)).toFixed(1) + '¢/pt' : '—';
90
- return '<tr><td style="padding:6px 10px;">' + (m.rank || '-') + '</td><td style="padding:6px 10px;">' + escHtml(m.model_name || m.model_id) + '</td><td style="padding:6px 10px;">' + escHtml(m.organization_name || '') + '</td><td style="padding:6px 10px;font-weight:600;">' + pct + '%</td><td style="padding:6px 10px;font-size:11px;" title="¢ per 1M input tokens">' + inp + '</td><td style="padding:6px 10px;font-size:11px;" title="¢ per 1M output tokens">' + out + '</td><td style="padding:6px 10px;font-size:11px;" title="¢ per score point (1M in+out / score%)">' + centsPerPt + '</td><td style="padding:6px 10px;font-size:11px;">' + (m.analysis_method || '-').slice(0, 40) + '</td></tr>';
91
- }).join('');
92
- tableEl.innerHTML = '<table style="width:100%;border-collapse:collapse;font-size:12px;"><thead><tr style="border-bottom:1px solid var(--border);"><th style="text-align:left;padding:6px 10px;">Rank</th><th style="text-align:left;padding:6px 10px;">Model</th><th style="text-align:left;padding:6px 10px;">Org</th><th style="text-align:left;padding:6px 10px;">Score</th><th style="text-align:left;padding:6px 10px;" title="¢ per 1M input">in ¢</th><th style="text-align:left;padding:6px 10px;" title="¢ per 1M output">out ¢</th><th style="text-align:left;padding:6px 10px;" title="¢ per score point">¢/pt</th><th style="text-align:left;padding:6px 10px;">Method</th></tr></thead><tbody>' + rows + '</tbody></table>';
93
- } catch (e) {
94
- tableEl.innerHTML = '<div style="color:var(--red);padding:20px;">Error: ' + escHtml(e.message) + '</div>';
95
- }
96
- }
97
-
98
- // ── Custom runner — load SWE-Bench tasks into the task picker ────────────────
99
- export async function loadBenchmarkTasks() {
100
- const sel = document.getElementById('benchmarkTaskSelect');
101
- if (!sel) return;
102
- sel.innerHTML = '<option value="">— Loading tasks… —</option>';
103
- try {
104
- const r = await fetch('/api/benchmark-tasks?benchmark=swe-bench-verified&offset=0&length=50');
105
- const data = await r.json();
106
- if (!r.ok) throw new Error(data.error || 'Failed to load tasks');
107
- const rows = data.rows || [];
108
- _runnerTasks = rows.map(r => r.row || r);
109
- sel.innerHTML = '<option value="">— Pick a task —</option>';
110
- _runnerTasks.forEach((task, i) => {
111
- const id = task.instance_id || task.id || `task-${i}`;
112
- const repo = task.repo || '';
113
- const opt = document.createElement('option');
114
- opt.value = i;
115
- opt.textContent = id + (repo ? ` (${repo})` : '');
116
- sel.appendChild(opt);
117
- });
118
- } catch (e) {
119
- sel.innerHTML = '<option value="">— Failed: ' + escHtml(e.message) + ' —</option>';
120
- }
121
- }
122
-
123
- // Show problem statement preview when a task is selected
124
- export function onBenchmarkTaskSelect(idx) {
125
- const preview = document.getElementById('benchmarkTaskPreview');
126
- if (!preview) return;
127
- if (idx === '' || idx == null || !_runnerTasks[idx]) { preview.style.display = 'none'; return; }
128
- const task = _runnerTasks[idx];
129
- const ps = task.problem_statement || task.description || '(no problem statement)';
130
- preview.textContent = ps.slice(0, 800) + (ps.length > 800 ? '\n…' : '');
131
- preview.style.display = 'block';
132
- }
133
-
134
- // ── Stream a benchmark task through an engine ────────────────────────────────
135
- export async function runBenchmarkTask() {
136
- const sel = document.getElementById('benchmarkTaskSelect');
137
- const engineSel = document.getElementById('benchmarkRunEngine');
138
- const modelInput = document.getElementById('benchmarkRunModel');
139
- const outputEl = document.getElementById('benchmarkRunOutput');
140
- const streamEl = document.getElementById('benchmarkRunStream');
141
- const statusEl = document.getElementById('benchmarkRunStatus');
142
- const stopBtn = document.getElementById('benchmarkRunStop');
143
- const runBtn = document.getElementById('benchmarkRunBtn');
144
- if (!sel || !engineSel || !outputEl || !streamEl) return;
145
-
146
- const idx = sel.value;
147
- if (idx === '' || idx == null || !_runnerTasks[idx]) {
148
- alert('Pick a task first — click "↻ Load Tasks" if the list is empty.');
149
- return;
150
- }
151
- const task = _runnerTasks[idx];
152
- const engine = engineSel.value;
153
- const model = (modelInput?.value || '').trim() || undefined;
154
-
155
- // Cancel any existing run
156
- if (_runnerAbort) { try { _runnerAbort.abort(); } catch {} }
157
- _runnerAbort = new AbortController();
158
-
159
- outputEl.style.display = 'flex';
160
- streamEl.textContent = '';
161
- statusEl.textContent = `Running on ${engine}…`;
162
- if (stopBtn) stopBtn.style.display = 'inline-block';
163
- if (runBtn) runBtn.disabled = true;
164
-
165
- try {
166
- const resp = await fetch('/api/benchmark-run', {
167
- method: 'POST',
168
- headers: { 'content-type': 'application/json' },
169
- body: JSON.stringify({
170
- instanceId: task.instance_id || task.id,
171
- problemStatement: task.problem_statement || task.description || '',
172
- repo: task.repo || '',
173
- hints: task.hints_text || '',
174
- engine,
175
- ...(model ? { model } : {}),
176
- }),
177
- signal: _runnerAbort.signal,
178
- });
179
-
180
- const reader = resp.body.getReader();
181
- const decoder = new TextDecoder();
182
- let buf = '';
183
-
184
- while (true) {
185
- const { done, value } = await reader.read();
186
- if (done) break;
187
- buf += decoder.decode(value, { stream: true });
188
- const parts = buf.split('\n\n');
189
- buf = parts.pop();
190
- for (const part of parts) {
191
- const line = part.replace(/^data:\s*/, '');
192
- if (!line) continue;
193
- try {
194
- const ev = JSON.parse(line);
195
- if (ev.type === 'chunk' && ev.text) {
196
- streamEl.textContent += ev.text;
197
- streamEl.scrollTop = streamEl.scrollHeight;
198
- } else if (ev.type === 'done') {
199
- const ok = ev.exitCode === 0 || ev.exitCode == null;
200
- statusEl.textContent = ok ? '✓ Done' : `✗ Exit ${ev.exitCode}`;
201
- statusEl.style.color = ok ? 'var(--green)' : 'var(--red)';
202
- } else if (ev.type === 'error' || ev.error) {
203
- streamEl.textContent += '\n[error] ' + (ev.error || ev.message || JSON.stringify(ev));
204
- }
205
- } catch {}
206
- }
207
- }
208
- } catch (e) {
209
- if (e.name !== 'AbortError') {
210
- streamEl.textContent += '\n[stream error] ' + e.message;
211
- statusEl.textContent = '✗ Error';
212
- statusEl.style.color = 'var(--red)';
213
- } else {
214
- statusEl.textContent = '⏹ Stopped';
215
- statusEl.style.color = 'var(--text-2)';
216
- }
217
- } finally {
218
- if (stopBtn) stopBtn.style.display = 'none';
219
- if (runBtn) runBtn.disabled = false;
220
- _runnerAbort = null;
221
- }
222
- }
223
-
224
- export function stopBenchmarkRun() {
225
- if (_runnerAbort) {
226
- try { _runnerAbort.abort(); } catch {}
227
- _runnerAbort = null;
228
- }
229
- }