crewswarm 0.9.2 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -9
- package/apps/dashboard/dist/assets/{chat-core-Cx4sTxDd.js → chat-core-3KirthZA.js} +1 -1
- package/apps/dashboard/dist/assets/index-GSWxxEPO.js +2 -0
- package/apps/dashboard/dist/assets/{tab-pm-loop-tab-Bfd449B4.js → tab-pm-loop-tab-DiAPTJXu.js} +1 -1
- package/apps/dashboard/dist/assets/{tab-projects-tab-DhNWnlzt.js → tab-projects-tab-SFH4E--a.js} +1 -1
- package/apps/dashboard/dist/assets/tab-settings-tab-BselH1c0.js +1 -0
- package/apps/dashboard/dist/index.html +82 -11
- package/apps/vibe/README.md +2 -2
- package/apps/vibe/package.json +1 -1
- package/apps/vibe/server.mjs +3 -3
- package/crew-lead.mjs +34 -4
- package/lib/bridges/gateway-ws.mjs +4 -0
- package/lib/crew-lead/chat-handler.mjs +34 -0
- package/lib/crew-lead/http-server.mjs +55 -14
- package/lib/crew-lead/llm-caller.mjs +24 -8
- package/lib/crew-lead/prompts.mjs +7 -0
- package/lib/crew-lead/wave-dispatcher.mjs +15 -3
- package/lib/crew-lead/ws-router.mjs +219 -27
- package/lib/engines/engine-registry.mjs +9 -0
- package/lib/engines/rt-envelope.mjs +1 -0
- package/lib/engines/runners.mjs +5 -2
- package/lib/runtime/paths.mjs +12 -8
- package/package.json +35 -15
- package/scripts/capture-build-flow.mjs +118 -0
- package/scripts/coverage-report.mjs +209 -0
- package/scripts/coverage-summary.mjs +47 -0
- package/scripts/dashboard-validation.mjs +74 -0
- package/scripts/dashboard.mjs +560 -70
- package/scripts/live-bridge-matrix.mjs +79 -0
- package/scripts/live-cli-matrix.mjs +166 -0
- package/scripts/live-crewchat-check.mjs +42 -0
- package/scripts/live-engine-matrix.mjs +50 -0
- package/scripts/live-provider-failover-matrix.mjs +107 -0
- package/scripts/live-provider-matrix.mjs +228 -0
- package/scripts/restart-all-from-repo.sh +4 -4
- package/scripts/smoke-dispatch.mjs +4 -1
- package/scripts/test-blast-radius.mjs +204 -0
- package/scripts/test-report-summary.mjs +88 -0
- package/scripts/test-reporter.mjs +651 -0
- package/scripts/test-rerun.mjs +136 -0
- package/scripts/tmux-bridge +130 -0
- package/apps/dashboard/dist/assets/chat-core-Cx4sTxDd.js.br +0 -0
- package/apps/dashboard/dist/assets/cli-process-COMRNPqr.js.br +0 -0
- package/apps/dashboard/dist/assets/components-BS9fQjE_.js.br +0 -0
- package/apps/dashboard/dist/assets/core-utils-CmOkXgzi.js.br +0 -0
- package/apps/dashboard/dist/assets/index-CF0aJRtC.css.br +0 -0
- package/apps/dashboard/dist/assets/index-DnClJ1ee.js +0 -2
- package/apps/dashboard/dist/assets/index-DnClJ1ee.js.br +0 -0
- package/apps/dashboard/dist/assets/orchestration-Ca2DLWN-.js.br +0 -0
- package/apps/dashboard/dist/assets/setup-wizard-CA0Or47w.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-agents-tab-BgpIsjkw.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-comms-tab-kguqTIzD.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-contacts-tab-DiOyMYth.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-engines-tab-BsdZVvU0.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-memory-tab-Cu6u13EQ.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-models-tab-BLEjmd19.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-pm-loop-tab-Bfd449B4.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-projects-tab-DhNWnlzt.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-prompts-tab-DVkUNaJd.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-services-tab-DU_LH3uG.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-settings-tab-Bn4nXtDe.js +0 -1
- package/apps/dashboard/dist/assets/tab-settings-tab-Bn4nXtDe.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-skills-tab-BpY0uZHW.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-spending-tab-DEccQHnt.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-swarm-chat-tab-BNrd88-r.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-swarm-tab-B1AcjL1W.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-usage-tab-BIOOnB-Y.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-waves-tab-SaJDkb4x.js.br +0 -0
- package/apps/dashboard/dist/assets/tab-workflows-tab-B-soSy1k.js.br +0 -0
- package/apps/dashboard/dist/index.html.br +0 -0
- package/apps/dashboard/dist/index.html.gz +0 -0
- package/apps/dashboard/index.html +0 -6529
- package/apps/dashboard/package.json +0 -15
- package/apps/dashboard/src/app.js +0 -2828
- package/apps/dashboard/src/app.js.br +0 -0
- package/apps/dashboard/src/app.js.gz +0 -0
- package/apps/dashboard/src/chat/chat-actions.js +0 -1847
- package/apps/dashboard/src/chat/chat-actions.js.br +0 -0
- package/apps/dashboard/src/chat/unified-messages.js +0 -327
- package/apps/dashboard/src/chat/unified-messages.js.br +0 -0
- package/apps/dashboard/src/cli-process.js +0 -208
- package/apps/dashboard/src/cli-process.js.br +0 -0
- package/apps/dashboard/src/cli-process.js.gz +0 -0
- package/apps/dashboard/src/components/active-tasks-panel.js +0 -175
- package/apps/dashboard/src/components/active-tasks-panel.js.br +0 -0
- package/apps/dashboard/src/core/api.js +0 -18
- package/apps/dashboard/src/core/api.js.br +0 -0
- package/apps/dashboard/src/core/dom.js +0 -228
- package/apps/dashboard/src/core/dom.js.br +0 -0
- package/apps/dashboard/src/core/state.js +0 -91
- package/apps/dashboard/src/core/state.js.br +0 -0
- package/apps/dashboard/src/core/task-manager.js +0 -134
- package/apps/dashboard/src/core/task-manager.js.br +0 -0
- package/apps/dashboard/src/orchestration-status.js +0 -127
- package/apps/dashboard/src/orchestration-status.js.br +0 -0
- package/apps/dashboard/src/setup-wizard.js +0 -562
- package/apps/dashboard/src/setup-wizard.js.br +0 -0
- package/apps/dashboard/src/styles.css +0 -2085
- package/apps/dashboard/src/styles.css.br +0 -0
- package/apps/dashboard/src/styles.css.gz +0 -0
- package/apps/dashboard/src/tabs/agents-tab.js +0 -2237
- package/apps/dashboard/src/tabs/agents-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/benchmarks-tab.js +0 -229
- package/apps/dashboard/src/tabs/benchmarks-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/comms-tab.js +0 -955
- package/apps/dashboard/src/tabs/comms-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/contacts-tab.js +0 -654
- package/apps/dashboard/src/tabs/contacts-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/engines-tab.js +0 -175
- package/apps/dashboard/src/tabs/engines-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/memory-tab.js +0 -182
- package/apps/dashboard/src/tabs/memory-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/models-tab.js +0 -450
- package/apps/dashboard/src/tabs/models-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/pm-loop-tab.js +0 -185
- package/apps/dashboard/src/tabs/pm-loop-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/projects-tab.js +0 -663
- package/apps/dashboard/src/tabs/projects-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/projects-tab.js.gz +0 -0
- package/apps/dashboard/src/tabs/prompts-tab.js +0 -160
- package/apps/dashboard/src/tabs/prompts-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/services-tab.js +0 -202
- package/apps/dashboard/src/tabs/services-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/settings-tab.js +0 -861
- package/apps/dashboard/src/tabs/settings-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/skills-tab.js +0 -284
- package/apps/dashboard/src/tabs/skills-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/spending-tab.js +0 -173
- package/apps/dashboard/src/tabs/spending-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/swarm-chat-tab.js +0 -660
- package/apps/dashboard/src/tabs/swarm-chat-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/swarm-tab.js +0 -538
- package/apps/dashboard/src/tabs/swarm-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/usage-tab.js +0 -390
- package/apps/dashboard/src/tabs/usage-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/waves-tab.js +0 -238
- package/apps/dashboard/src/tabs/waves-tab.js.br +0 -0
- package/apps/dashboard/src/tabs/workflows-tab.js +0 -747
- package/apps/dashboard/src/tabs/workflows-tab.js.br +0 -0
- package/apps/vibe/.crew/agent-memory/pipeline.json +0 -304
- package/apps/vibe/.crew/cost.json +0 -17
- package/apps/vibe/.crew/json-parse-metrics.jsonl +0 -27
- package/apps/vibe/.crew/pipeline-metrics.jsonl +0 -27
- package/apps/vibe/.crew/pipeline-runs/pipeline-0f90c392-2425-4ae5-850c-bd9d17b1d690.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-1c269dd9-a63f-4fba-af81-5cf08048ef06.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-288a7765-da24-4a22-89bc-1f3cc9b0562c.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-2c78fd22-a657-4bd1-bc49-0679fb384409.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-3da23550-22ed-4904-9a0a-8e79c1f3024c.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-3e6fe08d-3264-404a-8df3-aab7efef10e7.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-42eec610-57fe-4e09-9e7e-b315038495c2.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-4438eb4c-ae13-42b1-90e2-b043d8983be8.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-4740a9f5-86e7-44b6-a394-de433e291727.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-49e1da6a-957e-48fd-9220-415019e4f8e2.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-4c9251db-be68-427b-a3fc-a264f2b5778d.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-6413fa33-a802-4b57-a8c0-a9056ad67842.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-65e29a57-664d-4196-8109-017e364f182e.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-6aa04bc5-9593-4b1f-b58d-3bf2978cb602.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-6e1cba53-9b70-457e-99e0-59199149dd21.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-749f41cc-4dac-4204-be64-873a6080a0d2.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-74d68121-e181-4864-bd9a-c3211341dfaf.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-8509bc24-142d-4e07-b44a-a50bf99d1103.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-960339c6-07ca-43ce-9900-f6e1702b39b9.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-9bef2dd2-6122-42e5-b3d9-19f4d80f9e40.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-9c6480a9-7031-4146-b241-825b9a2d1de1.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-9fd42426-8492-4157-9d5f-e1537c060489.jsonl +0 -2
- package/apps/vibe/.crew/pipeline-runs/pipeline-ad6d40a3-2f5e-46a9-a345-47caaccc51aa.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-bc606133-8d5b-4535-8d85-f1a29cdaa981.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-c1418f4e-b773-4ca1-84a3-216acf36e2f2.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-c1a13ccd-634a-4d01-a4a7-1177b8a752ff.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-c7d27b42-249e-4bd4-8f26-6aa998110b8a.jsonl +0 -5
- package/apps/vibe/.crew/pipeline-runs/pipeline-cca2e9b9-4a34-4d25-a311-5c793fa7e91e.jsonl +0 -5
- package/apps/vibe/.crew/sandbox.json +0 -7
- package/apps/vibe/.crew/session.json +0 -330
- package/apps/vibe/.crew/training-data.jsonl +0 -0
- package/apps/vibe/.github/workflows/studio-quality.yml +0 -37
- package/apps/vibe/.studio-data/project-messages/chuck-norris.jsonl +0 -18
- package/apps/vibe/.studio-data/project-messages/general.jsonl +0 -81
- package/apps/vibe/.studio-data/project-messages/studio-local.jsonl +0 -18
- package/apps/vibe/ARCHITECTURE.md +0 -3393
- package/apps/vibe/QUICK-REFERENCE.md +0 -211
- package/apps/vibe/ROADMAP.md +0 -41
- package/apps/vibe/STUDIO-SETUP-COMPLETE.md +0 -35
- package/apps/vibe/VISUAL-GUIDE.md +0 -378
- package/apps/vibe/capture-demo.mjs +0 -160
- package/apps/vibe/capture-full-demo.mjs +0 -255
- package/apps/vibe/capture-quickstart.mjs +0 -256
- package/apps/vibe/capture-vibe-assets.mjs +0 -71
- package/apps/vibe/capture-vibe-video.mjs +0 -260
- package/apps/vibe/check-buttons.js +0 -41
- package/apps/vibe/diagnose.html +0 -106
- package/apps/vibe/fix-buttons.js +0 -103
- package/apps/vibe/index.html +0 -3404
- package/apps/vibe/package-lock.json +0 -920
- package/apps/vibe/scripts/studio-pty-host.py +0 -117
- package/apps/vibe/src/main.js +0 -2940
- package/apps/vibe/src/register-all-languages.js +0 -98
- package/apps/vibe/start-studio.sh +0 -11
- package/apps/vibe/test/accessibility-tests.js +0 -77
- package/apps/vibe/test/browser-performance-audit.mjs +0 -205
- package/apps/vibe/test/performance-tests.js +0 -120
- package/apps/vibe/test/security-tests.js +0 -213
- package/apps/vibe/tests/e2e.local.mjs +0 -54
- package/apps/vibe/tests/server.smoke.mjs +0 -106
- package/apps/vibe/update_website.mjs +0 -74
- package/apps/vibe/vite.config.js +0 -19
- package/lib/crew-lead/chat-handler.mjs.bak +0 -1274
- package/lib/engines/rt-envelope.mjs.backup-current +0 -870
|
Binary file
|
|
@@ -1,229 +0,0 @@
|
|
|
1
|
-
import { escHtml } from '../core/dom.js';
|
|
2
|
-
|
|
3
|
-
// ── Benchmark task runner state ──────────────────────────────────────────────
|
|
4
|
-
let _runnerTasks = []; // cached task rows from HuggingFace
|
|
5
|
-
let _runnerAbort = null; // AbortController for active SSE stream
|
|
6
|
-
|
|
7
|
-
export function showBenchmarks({ hideAllViews, setNavActive } = {}) {
|
|
8
|
-
if (typeof hideAllViews === 'function') hideAllViews();
|
|
9
|
-
const view = document.getElementById('benchmarksView');
|
|
10
|
-
if (view) view.classList.add('active');
|
|
11
|
-
if (typeof setNavActive === 'function') setNavActive('navBenchmarks');
|
|
12
|
-
loadBenchmarkOptions().then(() => {
|
|
13
|
-
const sel = document.getElementById('benchmarkSelect');
|
|
14
|
-
if (sel && sel.value) loadBenchmarkLeaderboard(sel.value);
|
|
15
|
-
});
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
export async function loadBenchmarkOptions() {
|
|
19
|
-
const sel = document.getElementById('benchmarkSelect');
|
|
20
|
-
if (!sel) return;
|
|
21
|
-
const cur = sel.value;
|
|
22
|
-
sel.innerHTML = '<option value="">— Loading… —</option>';
|
|
23
|
-
try {
|
|
24
|
-
const r = await fetch('/api/zeroeval/benchmarks');
|
|
25
|
-
const arr = await r.json();
|
|
26
|
-
if (!Array.isArray(arr)) throw new Error('Expected array');
|
|
27
|
-
sel.innerHTML = '<option value="">— Pick benchmark —</option>';
|
|
28
|
-
arr.forEach(b => {
|
|
29
|
-
const id = typeof b === 'object' ? (b.benchmark_id || b.id) : b;
|
|
30
|
-
const name = typeof b === 'object' ? (b.name || id) : id;
|
|
31
|
-
const opt = document.createElement('option');
|
|
32
|
-
opt.value = id;
|
|
33
|
-
opt.textContent = name;
|
|
34
|
-
sel.appendChild(opt);
|
|
35
|
-
});
|
|
36
|
-
if (cur && arr.some(b => (typeof b === 'object' ? b.benchmark_id : b) === cur)) {
|
|
37
|
-
sel.value = cur;
|
|
38
|
-
} else {
|
|
39
|
-
const DEFAULT_BENCHMARK = 'swe-bench-verified';
|
|
40
|
-
if (arr.some(b => (typeof b === 'object' ? b.benchmark_id : b) === DEFAULT_BENCHMARK)) {
|
|
41
|
-
sel.value = DEFAULT_BENCHMARK;
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
return sel.value;
|
|
45
|
-
} catch (e) {
|
|
46
|
-
sel.innerHTML = '<option value="">— Failed to load —</option>';
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
export async function loadBenchmarks() {
|
|
51
|
-
await loadBenchmarkOptions();
|
|
52
|
-
const sel = document.getElementById('benchmarkSelect');
|
|
53
|
-
if (sel && sel.value) loadBenchmarkLeaderboard(sel.value);
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
export async function loadBenchmarkLeaderboard(benchmarkId) {
|
|
57
|
-
const tableEl = document.getElementById('benchmarkTable');
|
|
58
|
-
const metaEl = document.getElementById('benchmarkMeta');
|
|
59
|
-
if (!tableEl || !metaEl) return;
|
|
60
|
-
if (!benchmarkId) {
|
|
61
|
-
tableEl.innerHTML = '';
|
|
62
|
-
metaEl.style.display = 'none';
|
|
63
|
-
return;
|
|
64
|
-
}
|
|
65
|
-
tableEl.innerHTML = '<div class="meta" style="padding:20px;">Loading…</div>';
|
|
66
|
-
metaEl.style.display = 'none';
|
|
67
|
-
try {
|
|
68
|
-
const r = await fetch('/api/zeroeval/benchmarks/' + encodeURIComponent(benchmarkId));
|
|
69
|
-
const data = await r.json();
|
|
70
|
-
if (!r.ok) throw new Error(data.error || data.detail || 'Failed to load');
|
|
71
|
-
const models = data.entries || data.models || [];
|
|
72
|
-
const totalModels = data.total_models ?? data.statistics?.total_models ?? models.length;
|
|
73
|
-
const avgScore = data.statistics?.average_score ?? (models.length ? models.reduce((s, m) => (s + (m.normalized_score ?? m.benchmark_score ?? m.score ?? 0)), 0) / models.length : 0);
|
|
74
|
-
const displayName = data.benchmark_name || data.name || benchmarkId;
|
|
75
|
-
const displayDesc = data.benchmark_description || data.description || '';
|
|
76
|
-
metaEl.innerHTML = '<b>' + escHtml(displayName) + '</b>' + (displayDesc ? ': ' + escHtml(displayDesc.slice(0, 200)) : '') + ' | ' + totalModels + ' models, avg ' + (avgScore * 100).toFixed(1) + '%';
|
|
77
|
-
metaEl.style.display = 'block';
|
|
78
|
-
if (!models.length) {
|
|
79
|
-
tableEl.innerHTML = '<div class="meta" style="padding:20px;">No model scores for this benchmark.</div>';
|
|
80
|
-
return;
|
|
81
|
-
}
|
|
82
|
-
const rows = models.slice(0, 100).map(m => {
|
|
83
|
-
const score = (m.normalized_score != null ? m.normalized_score : (m.benchmark_score != null ? m.benchmark_score : m.score)) ?? 0;
|
|
84
|
-
const pct = (score * 100).toFixed(1);
|
|
85
|
-
const inp = m.input_cost_per_million != null ? Math.round(m.input_cost_per_million * 100) + '¢' : '—';
|
|
86
|
-
const out = m.output_cost_per_million != null ? Math.round(m.output_cost_per_million * 100) + '¢' : '—';
|
|
87
|
-
const inC = m.input_cost_per_million ?? 0;
|
|
88
|
-
const outC = m.output_cost_per_million ?? 0;
|
|
89
|
-
const centsPerPt = (inC + outC) > 0 && score > 0 ? ((inC + outC) * 100 / (score * 100)).toFixed(1) + '¢/pt' : '—';
|
|
90
|
-
return '<tr><td style="padding:6px 10px;">' + (m.rank || '-') + '</td><td style="padding:6px 10px;">' + escHtml(m.model_name || m.model_id) + '</td><td style="padding:6px 10px;">' + escHtml(m.organization_name || '') + '</td><td style="padding:6px 10px;font-weight:600;">' + pct + '%</td><td style="padding:6px 10px;font-size:11px;" title="¢ per 1M input tokens">' + inp + '</td><td style="padding:6px 10px;font-size:11px;" title="¢ per 1M output tokens">' + out + '</td><td style="padding:6px 10px;font-size:11px;" title="¢ per score point (1M in+out / score%)">' + centsPerPt + '</td><td style="padding:6px 10px;font-size:11px;">' + (m.analysis_method || '-').slice(0, 40) + '</td></tr>';
|
|
91
|
-
}).join('');
|
|
92
|
-
tableEl.innerHTML = '<table style="width:100%;border-collapse:collapse;font-size:12px;"><thead><tr style="border-bottom:1px solid var(--border);"><th style="text-align:left;padding:6px 10px;">Rank</th><th style="text-align:left;padding:6px 10px;">Model</th><th style="text-align:left;padding:6px 10px;">Org</th><th style="text-align:left;padding:6px 10px;">Score</th><th style="text-align:left;padding:6px 10px;" title="¢ per 1M input">in ¢</th><th style="text-align:left;padding:6px 10px;" title="¢ per 1M output">out ¢</th><th style="text-align:left;padding:6px 10px;" title="¢ per score point">¢/pt</th><th style="text-align:left;padding:6px 10px;">Method</th></tr></thead><tbody>' + rows + '</tbody></table>';
|
|
93
|
-
} catch (e) {
|
|
94
|
-
tableEl.innerHTML = '<div style="color:var(--red);padding:20px;">Error: ' + escHtml(e.message) + '</div>';
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
// ── Custom runner — load SWE-Bench tasks into the task picker ────────────────
|
|
99
|
-
export async function loadBenchmarkTasks() {
|
|
100
|
-
const sel = document.getElementById('benchmarkTaskSelect');
|
|
101
|
-
if (!sel) return;
|
|
102
|
-
sel.innerHTML = '<option value="">— Loading tasks… —</option>';
|
|
103
|
-
try {
|
|
104
|
-
const r = await fetch('/api/benchmark-tasks?benchmark=swe-bench-verified&offset=0&length=50');
|
|
105
|
-
const data = await r.json();
|
|
106
|
-
if (!r.ok) throw new Error(data.error || 'Failed to load tasks');
|
|
107
|
-
const rows = data.rows || [];
|
|
108
|
-
_runnerTasks = rows.map(r => r.row || r);
|
|
109
|
-
sel.innerHTML = '<option value="">— Pick a task —</option>';
|
|
110
|
-
_runnerTasks.forEach((task, i) => {
|
|
111
|
-
const id = task.instance_id || task.id || `task-${i}`;
|
|
112
|
-
const repo = task.repo || '';
|
|
113
|
-
const opt = document.createElement('option');
|
|
114
|
-
opt.value = i;
|
|
115
|
-
opt.textContent = id + (repo ? ` (${repo})` : '');
|
|
116
|
-
sel.appendChild(opt);
|
|
117
|
-
});
|
|
118
|
-
} catch (e) {
|
|
119
|
-
sel.innerHTML = '<option value="">— Failed: ' + escHtml(e.message) + ' —</option>';
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
// Show problem statement preview when a task is selected
|
|
124
|
-
export function onBenchmarkTaskSelect(idx) {
|
|
125
|
-
const preview = document.getElementById('benchmarkTaskPreview');
|
|
126
|
-
if (!preview) return;
|
|
127
|
-
if (idx === '' || idx == null || !_runnerTasks[idx]) { preview.style.display = 'none'; return; }
|
|
128
|
-
const task = _runnerTasks[idx];
|
|
129
|
-
const ps = task.problem_statement || task.description || '(no problem statement)';
|
|
130
|
-
preview.textContent = ps.slice(0, 800) + (ps.length > 800 ? '\n…' : '');
|
|
131
|
-
preview.style.display = 'block';
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
// ── Stream a benchmark task through an engine ────────────────────────────────
|
|
135
|
-
export async function runBenchmarkTask() {
|
|
136
|
-
const sel = document.getElementById('benchmarkTaskSelect');
|
|
137
|
-
const engineSel = document.getElementById('benchmarkRunEngine');
|
|
138
|
-
const modelInput = document.getElementById('benchmarkRunModel');
|
|
139
|
-
const outputEl = document.getElementById('benchmarkRunOutput');
|
|
140
|
-
const streamEl = document.getElementById('benchmarkRunStream');
|
|
141
|
-
const statusEl = document.getElementById('benchmarkRunStatus');
|
|
142
|
-
const stopBtn = document.getElementById('benchmarkRunStop');
|
|
143
|
-
const runBtn = document.getElementById('benchmarkRunBtn');
|
|
144
|
-
if (!sel || !engineSel || !outputEl || !streamEl) return;
|
|
145
|
-
|
|
146
|
-
const idx = sel.value;
|
|
147
|
-
if (idx === '' || idx == null || !_runnerTasks[idx]) {
|
|
148
|
-
alert('Pick a task first — click "↻ Load Tasks" if the list is empty.');
|
|
149
|
-
return;
|
|
150
|
-
}
|
|
151
|
-
const task = _runnerTasks[idx];
|
|
152
|
-
const engine = engineSel.value;
|
|
153
|
-
const model = (modelInput?.value || '').trim() || undefined;
|
|
154
|
-
|
|
155
|
-
// Cancel any existing run
|
|
156
|
-
if (_runnerAbort) { try { _runnerAbort.abort(); } catch {} }
|
|
157
|
-
_runnerAbort = new AbortController();
|
|
158
|
-
|
|
159
|
-
outputEl.style.display = 'flex';
|
|
160
|
-
streamEl.textContent = '';
|
|
161
|
-
statusEl.textContent = `Running on ${engine}…`;
|
|
162
|
-
if (stopBtn) stopBtn.style.display = 'inline-block';
|
|
163
|
-
if (runBtn) runBtn.disabled = true;
|
|
164
|
-
|
|
165
|
-
try {
|
|
166
|
-
const resp = await fetch('/api/benchmark-run', {
|
|
167
|
-
method: 'POST',
|
|
168
|
-
headers: { 'content-type': 'application/json' },
|
|
169
|
-
body: JSON.stringify({
|
|
170
|
-
instanceId: task.instance_id || task.id,
|
|
171
|
-
problemStatement: task.problem_statement || task.description || '',
|
|
172
|
-
repo: task.repo || '',
|
|
173
|
-
hints: task.hints_text || '',
|
|
174
|
-
engine,
|
|
175
|
-
...(model ? { model } : {}),
|
|
176
|
-
}),
|
|
177
|
-
signal: _runnerAbort.signal,
|
|
178
|
-
});
|
|
179
|
-
|
|
180
|
-
const reader = resp.body.getReader();
|
|
181
|
-
const decoder = new TextDecoder();
|
|
182
|
-
let buf = '';
|
|
183
|
-
|
|
184
|
-
while (true) {
|
|
185
|
-
const { done, value } = await reader.read();
|
|
186
|
-
if (done) break;
|
|
187
|
-
buf += decoder.decode(value, { stream: true });
|
|
188
|
-
const parts = buf.split('\n\n');
|
|
189
|
-
buf = parts.pop();
|
|
190
|
-
for (const part of parts) {
|
|
191
|
-
const line = part.replace(/^data:\s*/, '');
|
|
192
|
-
if (!line) continue;
|
|
193
|
-
try {
|
|
194
|
-
const ev = JSON.parse(line);
|
|
195
|
-
if (ev.type === 'chunk' && ev.text) {
|
|
196
|
-
streamEl.textContent += ev.text;
|
|
197
|
-
streamEl.scrollTop = streamEl.scrollHeight;
|
|
198
|
-
} else if (ev.type === 'done') {
|
|
199
|
-
const ok = ev.exitCode === 0 || ev.exitCode == null;
|
|
200
|
-
statusEl.textContent = ok ? '✓ Done' : `✗ Exit ${ev.exitCode}`;
|
|
201
|
-
statusEl.style.color = ok ? 'var(--green)' : 'var(--red)';
|
|
202
|
-
} else if (ev.type === 'error' || ev.error) {
|
|
203
|
-
streamEl.textContent += '\n[error] ' + (ev.error || ev.message || JSON.stringify(ev));
|
|
204
|
-
}
|
|
205
|
-
} catch {}
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
} catch (e) {
|
|
209
|
-
if (e.name !== 'AbortError') {
|
|
210
|
-
streamEl.textContent += '\n[stream error] ' + e.message;
|
|
211
|
-
statusEl.textContent = '✗ Error';
|
|
212
|
-
statusEl.style.color = 'var(--red)';
|
|
213
|
-
} else {
|
|
214
|
-
statusEl.textContent = '⏹ Stopped';
|
|
215
|
-
statusEl.style.color = 'var(--text-2)';
|
|
216
|
-
}
|
|
217
|
-
} finally {
|
|
218
|
-
if (stopBtn) stopBtn.style.display = 'none';
|
|
219
|
-
if (runBtn) runBtn.disabled = false;
|
|
220
|
-
_runnerAbort = null;
|
|
221
|
-
}
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
export function stopBenchmarkRun() {
|
|
225
|
-
if (_runnerAbort) {
|
|
226
|
-
try { _runnerAbort.abort(); } catch {}
|
|
227
|
-
_runnerAbort = null;
|
|
228
|
-
}
|
|
229
|
-
}
|
|
Binary file
|