create-walle 0.9.11 → 0.9.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/package.json +2 -2
- package/template/bin/dev.sh +7 -1
- package/template/bin/setup.js +53 -9
- package/template/bin/sync-images.js +53 -0
- package/template/builder-journal.md +17 -0
- package/template/claude-task-manager/api-prompts.js +98 -13
- package/template/claude-task-manager/api-reviews.js +82 -5
- package/template/claude-task-manager/db.js +32 -5
- package/template/claude-task-manager/docs/session-capture-foundation-design.md +1273 -0
- package/template/claude-task-manager/lib/claude-desktop-sessions.js +696 -0
- package/template/claude-task-manager/lib/coding-agent-models.js +49 -1
- package/template/claude-task-manager/lib/session-capture.js +421 -0
- package/template/claude-task-manager/lib/session-history.js +135 -15
- package/template/claude-task-manager/lib/session-jobs.js +10 -5
- package/template/claude-task-manager/lib/session-stream.js +87 -19
- package/template/claude-task-manager/lib/setup-provider-config.js +115 -0
- package/template/claude-task-manager/lib/walle-ctm-history.js +72 -0
- package/template/claude-task-manager/lib/walle-session-context.js +61 -0
- package/template/claude-task-manager/lib/walle-transcript.js +176 -0
- package/template/claude-task-manager/public/css/setup.css +35 -8
- package/template/claude-task-manager/public/css/walle-session.css +56 -0
- package/template/claude-task-manager/public/css/walle.css +120 -0
- package/template/claude-task-manager/public/index.html +814 -181
- package/template/claude-task-manager/public/js/message-renderer.js +148 -19
- package/template/claude-task-manager/public/js/reviews.js +120 -62
- package/template/claude-task-manager/public/js/setup.js +75 -31
- package/template/claude-task-manager/public/js/stream-view.js +115 -55
- package/template/claude-task-manager/public/js/walle-session.js +84 -2
- package/template/claude-task-manager/public/js/walle.js +308 -54
- package/template/claude-task-manager/server.js +1092 -146
- package/template/claude-task-manager/session-integrity.js +181 -54
- package/template/claude-task-manager/session-utils.js +123 -41
- package/template/claude-task-manager/workers/state-detectors/codex.js +5 -2
- package/template/package.json +1 -1
- package/template/wall-e/adapters/ctm.js +39 -18
- package/template/wall-e/agent-runners/contract.js +17 -0
- package/template/wall-e/agent-runners/index.js +22 -0
- package/template/wall-e/agent-runtime/harness.js +212 -0
- package/template/wall-e/agent-runtime/index.js +8 -0
- package/template/wall-e/agent-runtime/registry.js +67 -0
- package/template/wall-e/agent-runtime/session-store.js +179 -0
- package/template/wall-e/agent-runtime/spawn.js +208 -0
- package/template/wall-e/api-walle.js +174 -7
- package/template/wall-e/brain.js +266 -28
- package/template/wall-e/channels/policy.js +88 -0
- package/template/wall-e/channels/registry.js +15 -1
- package/template/wall-e/channels/reply-dispatcher.js +70 -0
- package/template/wall-e/channels/session-bindings.js +51 -0
- package/template/wall-e/chat/code-review-context.js +29 -0
- package/template/wall-e/chat.js +188 -42
- package/template/wall-e/coding/acp-adapter.js +188 -0
- package/template/wall-e/coding/agent-catalog.js +129 -0
- package/template/wall-e/coding/compaction-service.js +247 -0
- package/template/wall-e/coding/execution-trace.js +3 -0
- package/template/wall-e/coding/instruction-service.js +224 -0
- package/template/wall-e/coding/model-message.js +67 -0
- package/template/wall-e/coding/permission-rules-store.js +111 -0
- package/template/wall-e/coding/permission-service.js +266 -0
- package/template/wall-e/coding/prompt-bundle.js +67 -0
- package/template/wall-e/coding/prompt-runtime.js +243 -0
- package/template/wall-e/coding/provider-transform.js +188 -0
- package/template/wall-e/coding/runtime-mode.js +132 -0
- package/template/wall-e/coding/snapshot-service.js +155 -0
- package/template/wall-e/coding/stream-processor.js +268 -0
- package/template/wall-e/coding/task-tool.js +255 -0
- package/template/wall-e/coding/tool-registry.js +361 -0
- package/template/wall-e/coding/transcript-writer.js +143 -0
- package/template/wall-e/coding/workspace-replay.js +324 -0
- package/template/wall-e/coding-context.js +4 -22
- package/template/wall-e/coding-orchestrator.js +307 -18
- package/template/wall-e/coding-prompts.js +44 -3
- package/template/wall-e/context/context-builder.js +43 -1
- package/template/wall-e/context/topic-matcher.js +1 -1
- package/template/wall-e/eval/agent-runner.js +59 -13
- package/template/wall-e/eval/benchmarks/memory-retrieval.json +155 -57
- package/template/wall-e/eval/benchmarks.js +100 -16
- package/template/wall-e/eval/eval-orchestrator.js +218 -8
- package/template/wall-e/eval/harvester.js +62 -5
- package/template/wall-e/eval/head-to-head.js +23 -2
- package/template/wall-e/eval/humaneval-adapter.js +30 -5
- package/template/wall-e/eval/livecodebench-adapter.js +29 -5
- package/template/wall-e/eval/manifest.js +186 -0
- package/template/wall-e/eval/run-agent-benchmarks.js +66 -2
- package/template/wall-e/eval/session-retrieval-benchmark.js +150 -0
- package/template/wall-e/eval/session-transcripts.js +57 -4
- package/template/wall-e/eval/swebench-adapter.js +109 -3
- package/template/wall-e/evaluation/agent-router.js +53 -1
- package/template/wall-e/evaluation/coding-quorum.js +48 -1
- package/template/wall-e/evaluation/router.js +4 -2
- package/template/wall-e/evaluation/tier-selector.js +11 -1
- package/template/wall-e/extraction/contradiction.js +2 -2
- package/template/wall-e/extraction/indexer.js +2 -1
- package/template/wall-e/extraction/knowledge-extractor.js +2 -2
- package/template/wall-e/hooks/cli.js +92 -0
- package/template/wall-e/hooks/discovery.js +119 -0
- package/template/wall-e/hooks/index.js +7 -0
- package/template/wall-e/hooks/manifest.js +55 -0
- package/template/wall-e/hooks/runtime.js +84 -0
- package/template/wall-e/hooks/session-memory.js +225 -0
- package/template/wall-e/http/auth.js +6 -2
- package/template/wall-e/http/chat-api.js +54 -8
- package/template/wall-e/integrations/claude-plugin/hooks/hooks.json +27 -0
- package/template/wall-e/integrations/claude-plugin/hooks/walle-precompact-hook.sh +5 -0
- package/template/wall-e/integrations/claude-plugin/hooks/walle-stop-hook.sh +5 -0
- package/template/wall-e/integrations/codex-plugin/hooks/walle-hook.sh +7 -0
- package/template/wall-e/integrations/codex-plugin/hooks.json +37 -0
- package/template/wall-e/listening/calendar.js +3 -1
- package/template/wall-e/llm/client.js +64 -10
- package/template/wall-e/llm/google.js +39 -5
- package/template/wall-e/llm/ollama.js +1 -1
- package/template/wall-e/llm/ollama.plugin.json +1 -1
- package/template/wall-e/llm/provider-availability.js +10 -0
- package/template/wall-e/llm/provider-error.js +269 -0
- package/template/wall-e/llm/tool-adapter.js +48 -12
- package/template/wall-e/loops/boot.js +2 -1
- package/template/wall-e/loops/initiative.js +2 -2
- package/template/wall-e/loops/tasks.js +8 -47
- package/template/wall-e/loops/workspace-prompts.js +20 -0
- package/template/wall-e/mcp-server.js +442 -1
- package/template/wall-e/memory/session-ingest-service.js +159 -0
- package/template/wall-e/memory/source-indexer.js +289 -0
- package/template/wall-e/plugins/discovery.js +83 -0
- package/template/wall-e/plugins/manifest-loader.js +50 -10
- package/template/wall-e/plugins/manifest-schema.js +69 -0
- package/template/wall-e/plugins/model-catalog.js +55 -0
- package/template/wall-e/prompts/coding/base.txt +2 -0
- package/template/wall-e/prompts/coding/deepseek.txt +1 -0
- package/template/wall-e/prompts/coding/memory-protocol.md +9 -0
- package/template/wall-e/prompts/coding/plan.txt +1 -0
- package/template/wall-e/runtime/execution-trace.js +220 -0
- package/template/wall-e/security/audit.js +266 -0
- package/template/wall-e/security/ssrf.js +236 -0
- package/template/wall-e/session-files.js +303 -0
- package/template/wall-e/skills/_bundled/slack-backfill/SKILL.md +3 -0
- package/template/wall-e/skills/_bundled/slack-sync/SKILL.md +3 -0
- package/template/wall-e/skills/internal-skill-registry.js +2 -2
- package/template/wall-e/skills/script-skill-runner.js +143 -0
- package/template/wall-e/skills/skill-executor.js +5 -6
- package/template/wall-e/skills/skill-fallback.js +3 -1
- package/template/wall-e/skills/skill-harness-registry.js +7 -8
- package/template/wall-e/skills/skill-planner.js +52 -4
- package/template/wall-e/skills/slack-ingest.js +11 -3
- package/template/wall-e/sources/base.js +90 -0
- package/template/wall-e/sources/builtin.js +33 -0
- package/template/wall-e/sources/claude-code-jsonl.js +78 -0
- package/template/wall-e/sources/codex-jsonl.js +125 -0
- package/template/wall-e/sources/coding-session-utils.js +117 -0
- package/template/wall-e/sources/contract-suite.js +59 -0
- package/template/wall-e/sources/gemini-jsonl.js +85 -0
- package/template/wall-e/sources/index.js +9 -0
- package/template/wall-e/sources/jsonl-utils.js +181 -0
- package/template/wall-e/sources/record-types.js +252 -0
- package/template/wall-e/sources/registry.js +92 -0
- package/template/wall-e/sources/transforms.js +100 -0
- package/template/wall-e/sources/walle-jsonl.js +108 -0
- package/template/wall-e/tools/coding-middleware.js +31 -1
- package/template/wall-e/tools/file-tracker.js +25 -1
- package/template/wall-e/tools/local-tools.js +75 -47
- package/template/wall-e/tools/session-sharing.js +68 -1
- package/template/wall-e/tools/shell-analyzer.js +1 -1
- package/template/wall-e/tools/shell-policy.js +47 -0
- package/template/wall-e/tools/snapshot.js +42 -0
- package/template/wall-e/training/harvester.js +62 -5
- package/template/wall-e/utils/repair.js +253 -1
- package/template/website/index.html +3 -3
- package/template/wall-e/skills/_bundled/slack-mentions/.watched-threads.json +0 -18
|
@@ -7,6 +7,7 @@ let timelineOffset = 0;
|
|
|
7
7
|
let _providerStatus = null;
|
|
8
8
|
let _providerPollTimer = null;
|
|
9
9
|
let _cachedActiveModel = null; // { model, provider } from /api/setup/status
|
|
10
|
+
let _activeProviderIssue = null;
|
|
10
11
|
|
|
11
12
|
const WalleCore = window.WalleCore || {};
|
|
12
13
|
const MODEL_LABELS = WalleCore.MODEL_LABELS || {};
|
|
@@ -31,6 +32,80 @@ const resolveWalleBase = WalleCore.resolveWalleBase || (() => Promise.resolve(''
|
|
|
31
32
|
// URL ('') resolves to the same place from every browser context.
|
|
32
33
|
const WALLE_BASE = '';
|
|
33
34
|
|
|
35
|
+
function _normalizeProviderIssue(source) {
|
|
36
|
+
if (!source || typeof source !== 'object') return null;
|
|
37
|
+
var issue = source.providerError || source.provider_error || source;
|
|
38
|
+
if (!issue || typeof issue !== 'object') return null;
|
|
39
|
+
var title = issue.title || (source.code === 'AI_PROVIDER_ERROR' ? 'AI provider failed' : '');
|
|
40
|
+
if (!title && !issue.rawMessage && !issue.userMessage && !/^ai_provider/.test(String(source.type || ''))) return null;
|
|
41
|
+
return {
|
|
42
|
+
code: issue.code || source.code || 'AI_PROVIDER_ERROR',
|
|
43
|
+
type: issue.type || source.type || 'provider_error',
|
|
44
|
+
severity: issue.severity || source.severity || 'error',
|
|
45
|
+
title: title || 'AI provider failed',
|
|
46
|
+
message: issue.userMessage || issue.message || source.message || source.error || 'Wall-E could not get a response from the configured AI provider.',
|
|
47
|
+
rawMessage: issue.rawMessage || issue.raw_message || source.rawMessage || '',
|
|
48
|
+
provider: issue.provider || source.provider || '',
|
|
49
|
+
model: issue.model || source.model || '',
|
|
50
|
+
status: issue.status || source.status || '',
|
|
51
|
+
retryAfter: issue.retryAfter || source.retryAfter || '',
|
|
52
|
+
actionLabel: issue.actionLabel || source.actionLabel || 'Open Setup',
|
|
53
|
+
actionUrl: issue.actionUrl || issue.action_url || source.action_url || '/setup.html',
|
|
54
|
+
createdAt: issue.createdAt || issue.created_at || source.created_at || '',
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function _rememberProviderIssue(source) {
|
|
59
|
+
var issue = _normalizeProviderIssue(source);
|
|
60
|
+
if (issue) _activeProviderIssue = issue;
|
|
61
|
+
return issue;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function _providerIssueDetails(issue) {
|
|
65
|
+
var lines = [];
|
|
66
|
+
if (issue.status) lines.push('HTTP/status: ' + issue.status);
|
|
67
|
+
if (issue.provider) lines.push('Provider: ' + issue.provider);
|
|
68
|
+
if (issue.model) lines.push('Model: ' + issue.model);
|
|
69
|
+
if (issue.retryAfter) lines.push('Retry after: ' + issue.retryAfter);
|
|
70
|
+
if (issue.rawMessage) lines.push('', issue.rawMessage);
|
|
71
|
+
return lines.join('\n').trim();
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function _renderProviderIssueBlock(issue, opts) {
|
|
75
|
+
issue = _normalizeProviderIssue(issue);
|
|
76
|
+
if (!issue) return '';
|
|
77
|
+
opts = opts || {};
|
|
78
|
+
var severity = issue.severity === 'warning' ? 'warning' : 'error';
|
|
79
|
+
var details = _providerIssueDetails(issue);
|
|
80
|
+
var action = '';
|
|
81
|
+
if (issue.actionUrl) {
|
|
82
|
+
if (/^\/setup/.test(issue.actionUrl)) {
|
|
83
|
+
action = '<button class="walle-btn primary" onclick="navTo(\'setup\')">' + esc(issue.actionLabel || 'Open Setup') + '</button>';
|
|
84
|
+
} else if (/^(\/|https?:\/\/)/.test(issue.actionUrl)) {
|
|
85
|
+
action = '<a class="walle-btn primary" href="' + esc(issue.actionUrl) + '">' + esc(issue.actionLabel || 'Open') + '</a>';
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
var dismiss = opts.dismissible
|
|
89
|
+
? '<button class="we-provider-issue-dismiss" onclick="WE._dismissProviderIssue()" title="Dismiss">×</button>'
|
|
90
|
+
: '';
|
|
91
|
+
var html = '<div class="we-provider-issue ' + severity + (opts.inline ? ' inline' : '') + '" role="alert">';
|
|
92
|
+
html += '<div class="we-provider-issue-main">';
|
|
93
|
+
html += '<div class="we-provider-issue-title">' + esc(issue.title) + '</div>';
|
|
94
|
+
html += '<div class="we-provider-issue-body">' + esc(issue.message) + '</div>';
|
|
95
|
+
if (details) {
|
|
96
|
+
html += '<details class="we-provider-issue-details"><summary>Provider details</summary><pre>' + esc(details) + '</pre></details>';
|
|
97
|
+
}
|
|
98
|
+
html += '</div>';
|
|
99
|
+
if (action || dismiss) html += '<div class="we-provider-issue-actions">' + action + dismiss + '</div>';
|
|
100
|
+
html += '</div>';
|
|
101
|
+
return html;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
WE._dismissProviderIssue = function() {
|
|
105
|
+
_activeProviderIssue = null;
|
|
106
|
+
renderChatUI();
|
|
107
|
+
};
|
|
108
|
+
|
|
34
109
|
// Render an assistant message that may begin with a `<think>...</think>`
|
|
35
110
|
// chain-of-thought block (DeepSeek V4 deep-thinking, Anthropic extended
|
|
36
111
|
// thinking, gemma4 native thinking). The thoughts render as a collapsible
|
|
@@ -1793,6 +1868,9 @@ function renderChatUI() {
|
|
|
1793
1868
|
if (!body) return;
|
|
1794
1869
|
|
|
1795
1870
|
var html = '<div class="walle-chat-container">';
|
|
1871
|
+
if (_activeProviderIssue) {
|
|
1872
|
+
html += _renderProviderIssueBlock(_activeProviderIssue, { dismissible: true });
|
|
1873
|
+
}
|
|
1796
1874
|
// Search bar + select mode toggle
|
|
1797
1875
|
html += '<div class="we-chat-search-bar">';
|
|
1798
1876
|
html += '<input class="we-chat-search-input" id="we-chat-search" placeholder="Search chat history..." value="' + esc(chatSearchQuery || '') + '" oninput="WE._onChatSearch(this.value)">';
|
|
@@ -1903,7 +1981,11 @@ function renderChatUI() {
|
|
|
1903
1981
|
i++;
|
|
1904
1982
|
html += '<div class="walle-chat-msg assistant">';
|
|
1905
1983
|
html += '<div class="walle-chat-msg-role assistant"><img src="/walle-icon.svg" width="16" height="16" class="walle-avatar"> WALL-E</div>';
|
|
1906
|
-
|
|
1984
|
+
if (chatHistory[i]._providerError) {
|
|
1985
|
+
html += _renderProviderIssueBlock(chatHistory[i]._providerError, { inline: true });
|
|
1986
|
+
} else {
|
|
1987
|
+
html += '<div class="walle-chat-msg-text we-markdown">' + renderAssistantWithThoughts(chatHistory[i].text) + '</div>';
|
|
1988
|
+
}
|
|
1907
1989
|
html += '</div>';
|
|
1908
1990
|
}
|
|
1909
1991
|
html += '</div>'; // close turn group
|
|
@@ -1917,7 +1999,11 @@ function renderChatUI() {
|
|
|
1917
1999
|
}
|
|
1918
2000
|
html += '<div class="walle-chat-msg assistant">';
|
|
1919
2001
|
html += '<div class="walle-chat-msg-role assistant"><img src="/walle-icon.svg" width="16" height="16" class="walle-avatar"> WALL-E</div>';
|
|
1920
|
-
|
|
2002
|
+
if (msg._providerError) {
|
|
2003
|
+
html += _renderProviderIssueBlock(msg._providerError, { inline: true });
|
|
2004
|
+
} else {
|
|
2005
|
+
html += '<div class="walle-chat-msg-text we-markdown">' + renderAssistantWithThoughts(msg.text) + '</div>';
|
|
2006
|
+
}
|
|
1921
2007
|
html += '</div>';
|
|
1922
2008
|
html += '</div>';
|
|
1923
2009
|
turnIdx++;
|
|
@@ -2258,8 +2344,13 @@ function _streamChat(message, pendingAttachments, sendId) {
|
|
|
2258
2344
|
}
|
|
2259
2345
|
throw new Error('Send cancelled — current provider cannot handle ' + capList + ' attachments.');
|
|
2260
2346
|
}
|
|
2261
|
-
var msg = (json && json.error) || txt || ('HTTP ' + response.status);
|
|
2262
|
-
|
|
2347
|
+
var msg = (json && (json.message || json.error)) || txt || ('HTTP ' + response.status);
|
|
2348
|
+
var err = new Error(msg);
|
|
2349
|
+
if (json && json.providerError) {
|
|
2350
|
+
err.code = json.code || 'AI_PROVIDER_ERROR';
|
|
2351
|
+
err.providerError = json.providerError;
|
|
2352
|
+
}
|
|
2353
|
+
throw err;
|
|
2263
2354
|
});
|
|
2264
2355
|
}
|
|
2265
2356
|
var reader = response.body.getReader();
|
|
@@ -2287,7 +2378,7 @@ function _streamChat(message, pendingAttachments, sendId) {
|
|
|
2287
2378
|
reader.cancel();
|
|
2288
2379
|
return;
|
|
2289
2380
|
} else if (event.type === 'error') {
|
|
2290
|
-
|
|
2381
|
+
_finishStreamingError(event, sendId, { serverReceived: true });
|
|
2291
2382
|
reader.cancel();
|
|
2292
2383
|
return;
|
|
2293
2384
|
} else {
|
|
@@ -2301,7 +2392,7 @@ function _streamChat(message, pendingAttachments, sendId) {
|
|
|
2301
2392
|
readChunk();
|
|
2302
2393
|
}).catch(function(err) {
|
|
2303
2394
|
if (err.name !== 'AbortError') {
|
|
2304
|
-
|
|
2395
|
+
_finishStreamingError({ error: err.message || 'Stream read failed' }, sendId, { serverReceived: false });
|
|
2305
2396
|
}
|
|
2306
2397
|
});
|
|
2307
2398
|
}
|
|
@@ -2315,6 +2406,14 @@ function _streamChat(message, pendingAttachments, sendId) {
|
|
|
2315
2406
|
renderChatUI();
|
|
2316
2407
|
return;
|
|
2317
2408
|
}
|
|
2409
|
+
if (err.providerError) {
|
|
2410
|
+
_finishStreamingError({
|
|
2411
|
+
error: err.message || 'AI provider failed',
|
|
2412
|
+
code: err.code || 'AI_PROVIDER_ERROR',
|
|
2413
|
+
providerError: err.providerError,
|
|
2414
|
+
}, sendId, { serverReceived: true });
|
|
2415
|
+
return;
|
|
2416
|
+
}
|
|
2318
2417
|
// Mark this user message as unsynced so it's visibly flagged and survives
|
|
2319
2418
|
// reloads. The localStorage record stays (cleared only on confirmed success
|
|
2320
2419
|
// via _finishStreaming with a reply) — on next page load it's merged back
|
|
@@ -2334,24 +2433,44 @@ function _streamChat(message, pendingAttachments, sendId) {
|
|
|
2334
2433
|
});
|
|
2335
2434
|
}
|
|
2336
2435
|
|
|
2337
|
-
function
|
|
2436
|
+
function _markSendUnsynced(sendId, message) {
|
|
2437
|
+
if (!sendId) return;
|
|
2438
|
+
for (var _i = chatHistory.length - 1; _i >= 0; _i--) {
|
|
2439
|
+
if (chatHistory[_i]._sendId === sendId) {
|
|
2440
|
+
chatHistory[_i]._unsynced = true;
|
|
2441
|
+
chatHistory[_i]._error = message || 'Failed to get response';
|
|
2442
|
+
break;
|
|
2443
|
+
}
|
|
2444
|
+
}
|
|
2445
|
+
}
|
|
2446
|
+
|
|
2447
|
+
function _confirmLastUserSend(sendId) {
|
|
2448
|
+
// Server confirmed the turn — clear the unsynced record + marker for the
|
|
2449
|
+
// most recent user turn that had a sendId.
|
|
2450
|
+
if (sendId) _clearUnsynced(sendId);
|
|
2451
|
+
for (var _fi = chatHistory.length - 1; _fi >= 0; _fi--) {
|
|
2452
|
+
if (chatHistory[_fi].role === 'user' && chatHistory[_fi]._sendId && (!sendId || chatHistory[_fi]._sendId === sendId)) {
|
|
2453
|
+
_clearUnsynced(chatHistory[_fi]._sendId);
|
|
2454
|
+
delete chatHistory[_fi]._sendId;
|
|
2455
|
+
delete chatHistory[_fi]._unsynced;
|
|
2456
|
+
delete chatHistory[_fi]._error;
|
|
2457
|
+
break;
|
|
2458
|
+
}
|
|
2459
|
+
}
|
|
2460
|
+
}
|
|
2461
|
+
|
|
2462
|
+
function _resetStreamingState() {
|
|
2338
2463
|
activeChatController = null;
|
|
2339
2464
|
_stopThinkingTimer();
|
|
2340
2465
|
chatThinkingState.isProcessing = false;
|
|
2341
2466
|
chatThinkingState.logs = [];
|
|
2342
2467
|
chatThinkingState.startTime = null;
|
|
2468
|
+
}
|
|
2469
|
+
|
|
2470
|
+
function _finishStreaming(reply) {
|
|
2471
|
+
_resetStreamingState();
|
|
2343
2472
|
if (reply) {
|
|
2344
|
-
|
|
2345
|
-
// most recent user turn that had a sendId.
|
|
2346
|
-
for (var _fi = chatHistory.length - 1; _fi >= 0; _fi--) {
|
|
2347
|
-
if (chatHistory[_fi].role === 'user' && chatHistory[_fi]._sendId) {
|
|
2348
|
-
_clearUnsynced(chatHistory[_fi]._sendId);
|
|
2349
|
-
delete chatHistory[_fi]._sendId;
|
|
2350
|
-
delete chatHistory[_fi]._unsynced;
|
|
2351
|
-
delete chatHistory[_fi]._error;
|
|
2352
|
-
break;
|
|
2353
|
-
}
|
|
2354
|
-
}
|
|
2473
|
+
_confirmLastUserSend();
|
|
2355
2474
|
chatHistory.push({ role: 'assistant', text: reply });
|
|
2356
2475
|
// Update the active branch with the assistant reply
|
|
2357
2476
|
_updateActiveBranch();
|
|
@@ -2359,6 +2478,28 @@ function _finishStreaming(reply) {
|
|
|
2359
2478
|
WE.renderChat();
|
|
2360
2479
|
}
|
|
2361
2480
|
|
|
2481
|
+
function _finishStreamingError(event, sendId, opts) {
|
|
2482
|
+
opts = opts || {};
|
|
2483
|
+
_resetStreamingState();
|
|
2484
|
+
var issue = _rememberProviderIssue(event);
|
|
2485
|
+
var serverReceived = opts.serverReceived;
|
|
2486
|
+
if (issue && issue.type === 'provider_unavailable_local') serverReceived = false;
|
|
2487
|
+
var msg = (event && (event.message || event.error)) || 'Failed to get response';
|
|
2488
|
+
if (serverReceived) _confirmLastUserSend(sendId);
|
|
2489
|
+
else _markSendUnsynced(sendId, issue ? issue.message : msg);
|
|
2490
|
+
if (issue) {
|
|
2491
|
+
chatHistory.push({
|
|
2492
|
+
role: 'assistant',
|
|
2493
|
+
text: issue.title + ': ' + issue.message,
|
|
2494
|
+
_providerError: issue,
|
|
2495
|
+
});
|
|
2496
|
+
} else {
|
|
2497
|
+
chatHistory.push({ role: 'assistant', text: 'Error: ' + msg });
|
|
2498
|
+
}
|
|
2499
|
+
_updateActiveBranch();
|
|
2500
|
+
WE.renderChat();
|
|
2501
|
+
}
|
|
2502
|
+
|
|
2362
2503
|
// Keep active branch data in sync with chatHistory
|
|
2363
2504
|
function _updateActiveBranch() {
|
|
2364
2505
|
Object.keys(chatBranches).forEach(function(k) {
|
|
@@ -5844,6 +5985,21 @@ WE._toggleActionHistory = function() {
|
|
|
5844
5985
|
function checkProviderStatus() {
|
|
5845
5986
|
api('/provider/status').then(function(d) {
|
|
5846
5987
|
_providerStatus = d;
|
|
5988
|
+
if (d && d.configured && d.anyAvailable === false) {
|
|
5989
|
+
_rememberProviderIssue({
|
|
5990
|
+
type: 'provider_unavailable_local',
|
|
5991
|
+
title: 'No AI provider available',
|
|
5992
|
+
message: 'Providers are configured, but every provider is currently marked unhealthy. Check Setup or wait for health checks to recover.',
|
|
5993
|
+
rawMessage: (d.providers || []).map(function(p) {
|
|
5994
|
+
return (p.providerType || p.providerId || 'provider') + ': ' + (p.lastError || p.status || 'unhealthy');
|
|
5995
|
+
}).join('\n'),
|
|
5996
|
+
actionUrl: '/setup.html',
|
|
5997
|
+
});
|
|
5998
|
+
if (currentView === 'chat') renderChatUI();
|
|
5999
|
+
} else if (_activeProviderIssue && _activeProviderIssue.type === 'provider_unavailable_local') {
|
|
6000
|
+
_activeProviderIssue = null;
|
|
6001
|
+
if (currentView === 'chat') renderChatUI();
|
|
6002
|
+
}
|
|
5847
6003
|
renderProviderBanner();
|
|
5848
6004
|
// Adaptive polling: fast during setup, slow in steady state
|
|
5849
6005
|
var interval = (d.setupAction === 'ready') ? 60000 : 10000;
|
|
@@ -5865,7 +6021,10 @@ function renderProviderBanner() {
|
|
|
5865
6021
|
var d = _providerStatus;
|
|
5866
6022
|
var msg = 'No AI provider configured — Wall-E is in limited mode.';
|
|
5867
6023
|
var btnHtml = '';
|
|
5868
|
-
if (d.
|
|
6024
|
+
if (d.configured && d.anyAvailable === false) {
|
|
6025
|
+
msg = 'AI providers are configured, but none are available right now.';
|
|
6026
|
+
btnHtml = '<button class="walle-btn primary" onclick="navTo(\'setup\')">Open Setup</button>';
|
|
6027
|
+
} else if (d.setupAction === 'auto-register-detected') {
|
|
5869
6028
|
var types = (d.detected || []).map(function(x) { return x.type; }).join(', ');
|
|
5870
6029
|
msg = 'Found existing API keys (' + types + ') — activate to get started.';
|
|
5871
6030
|
btnHtml = '<button class="walle-btn" onclick="WE._registerDetectedProviders()">Use detected providers</button>';
|
|
@@ -5876,8 +6035,8 @@ function renderProviderBanner() {
|
|
|
5876
6035
|
} else {
|
|
5877
6036
|
btnHtml = '<button class="walle-btn" onclick="navTo(\'setup\')">Add API key</button>';
|
|
5878
6037
|
}
|
|
5879
|
-
var html = '<div id="walle-provider-banner"
|
|
5880
|
-
+ '<span>' + msg + '</span>' + btnHtml + '</div>';
|
|
6038
|
+
var html = '<div id="walle-provider-banner" class="we-provider-setup-banner">'
|
|
6039
|
+
+ '<span>' + esc(msg) + '</span><div class="we-provider-setup-actions">' + btnHtml + '</div></div>';
|
|
5881
6040
|
if (existing) {
|
|
5882
6041
|
existing.outerHTML = html;
|
|
5883
6042
|
} else {
|
|
@@ -5939,7 +6098,17 @@ var _alertsPollTimer = null;
|
|
|
5939
6098
|
|
|
5940
6099
|
function checkServiceAlerts() {
|
|
5941
6100
|
api('/alerts').then(function(d) {
|
|
5942
|
-
|
|
6101
|
+
var alerts = d.alerts || [];
|
|
6102
|
+
var providerIssue = null;
|
|
6103
|
+
for (var i = 0; i < alerts.length; i++) {
|
|
6104
|
+
var issue = _normalizeProviderIssue(alerts[i]);
|
|
6105
|
+
if (issue) providerIssue = issue;
|
|
6106
|
+
}
|
|
6107
|
+
if (providerIssue) {
|
|
6108
|
+
_activeProviderIssue = providerIssue;
|
|
6109
|
+
if (currentView === 'chat') renderChatUI();
|
|
6110
|
+
}
|
|
6111
|
+
renderServiceAlerts(alerts);
|
|
5943
6112
|
clearTimeout(_alertsPollTimer);
|
|
5944
6113
|
_alertsPollTimer = setTimeout(checkServiceAlerts, 60000);
|
|
5945
6114
|
}).catch(function() {
|
|
@@ -5955,22 +6124,23 @@ function renderServiceAlerts(alerts) {
|
|
|
5955
6124
|
return;
|
|
5956
6125
|
}
|
|
5957
6126
|
var items = alerts.map(function(a) {
|
|
5958
|
-
var
|
|
5959
|
-
var
|
|
6127
|
+
var providerIssue = _normalizeProviderIssue(a);
|
|
6128
|
+
var kind = providerIssue ? 'provider' : (a.type === 'auth_expired' ? 'error' : a.type === 'update_available' ? 'info' : 'warning');
|
|
6129
|
+
var icon = kind === 'provider' || kind === 'error' ? '!' : kind === 'info' ? '↑' : '●';
|
|
5960
6130
|
var safeId = esc(a.id).replace(/'/g, ''');
|
|
5961
|
-
var dismissBtn = '<button onclick="WE._dismissAlert(\'' + safeId + '\')"
|
|
6131
|
+
var dismissBtn = '<button class="we-service-alert-dismiss" onclick="WE._dismissAlert(\'' + safeId + '\')" title="Dismiss">×</button>';
|
|
5962
6132
|
var actionBtn = '';
|
|
5963
6133
|
if (a.action_url && /^(\/|https?:\/\/)/.test(a.action_url)) {
|
|
5964
|
-
actionBtn = ' <a href="' + esc(a.action_url) + '"
|
|
6134
|
+
actionBtn = ' <a href="' + esc(a.action_url) + '" class="we-service-alert-action">Fix</a>';
|
|
5965
6135
|
}
|
|
5966
|
-
return '<div
|
|
5967
|
-
+ '<span
|
|
5968
|
-
+ '<span
|
|
6136
|
+
return '<div class="we-service-alert-item ' + kind + '">'
|
|
6137
|
+
+ '<span class="we-service-alert-icon">' + icon + '</span>'
|
|
6138
|
+
+ '<span class="we-service-alert-text">' + esc(providerIssue ? providerIssue.message : a.message) + actionBtn + '</span>'
|
|
5969
6139
|
+ dismissBtn + '</div>';
|
|
5970
6140
|
}).join('');
|
|
5971
6141
|
|
|
5972
|
-
var html = '<div id="walle-service-alerts"
|
|
5973
|
-
+ '<div
|
|
6142
|
+
var html = '<div id="walle-service-alerts" class="we-service-alerts">'
|
|
6143
|
+
+ '<div class="we-service-alerts-title">Service Alerts</div>' + items + '</div>';
|
|
5974
6144
|
|
|
5975
6145
|
if (existing) {
|
|
5976
6146
|
existing.outerHTML = html;
|
|
@@ -5984,7 +6154,13 @@ WE._dismissAlert = function(alertId) {
|
|
|
5984
6154
|
resolveWalleBase().then(function() {
|
|
5985
6155
|
var token = window._ctmState?.token || '';
|
|
5986
6156
|
fetch(WALLE_BASE + '/api/wall-e/alerts/' + encodeURIComponent(alertId) + '?token=' + token, { method: 'DELETE' })
|
|
5987
|
-
.then(function() {
|
|
6157
|
+
.then(function() {
|
|
6158
|
+
if (/^ai_provider:/.test(String(alertId || ''))) {
|
|
6159
|
+
_activeProviderIssue = null;
|
|
6160
|
+
if (currentView === 'chat') renderChatUI();
|
|
6161
|
+
}
|
|
6162
|
+
checkServiceAlerts();
|
|
6163
|
+
});
|
|
5988
6164
|
});
|
|
5989
6165
|
};
|
|
5990
6166
|
|
|
@@ -6102,7 +6278,7 @@ WE.renderEval = function() {
|
|
|
6102
6278
|
body.textContent = '';
|
|
6103
6279
|
|
|
6104
6280
|
Promise.all([
|
|
6105
|
-
api('/eval/dashboard').catch(function() { return { data: { providers: 0, totalEvals: 0, shadowEvals: 0, benchmarkEvals: 0, leaderboard: [] } }; }),
|
|
6281
|
+
api('/eval/dashboard').catch(function() { return { data: { providers: 0, modelGroups: 0, totalEvals: 0, shadowEvals: 0, benchmarkEvals: 0, trustedBenchmarkEvals: 0, trustedBenchmarkModels: 0, provisionalBenchmarkModels: 0, legacyBenchmarkModels: 0, leaderboard: [] } }; }),
|
|
6106
6282
|
api('/eval/runs').catch(function() { return { data: [] }; }),
|
|
6107
6283
|
api('/eval/benchmarks').catch(function() { return { data: [] }; }),
|
|
6108
6284
|
]).then(function(results) {
|
|
@@ -6120,6 +6296,35 @@ function _evalScoreColor(score) {
|
|
|
6120
6296
|
return '#f87171';
|
|
6121
6297
|
}
|
|
6122
6298
|
|
|
6299
|
+
function _evalBenchmarkScore(row) {
|
|
6300
|
+
var v = row && row.trusted_avg_score != null ? row.trusted_avg_score : row && row.avg_score;
|
|
6301
|
+
v = Number(v);
|
|
6302
|
+
return isFinite(v) ? v : 0;
|
|
6303
|
+
}
|
|
6304
|
+
|
|
6305
|
+
function _evalTrustStatus(row) {
|
|
6306
|
+
if (row && row.trust_status) return String(row.trust_status);
|
|
6307
|
+
var trusted = Number((row && row.trusted_evals) || 0);
|
|
6308
|
+
var minTrusted = Number((row && row.min_trusted_evals) || 10);
|
|
6309
|
+
if (trusted >= minTrusted) return 'trusted';
|
|
6310
|
+
return trusted > 0 ? 'provisional' : 'legacy';
|
|
6311
|
+
}
|
|
6312
|
+
|
|
6313
|
+
function _evalTrustColor(status) {
|
|
6314
|
+
if (status === 'trusted') return '#4ade80';
|
|
6315
|
+
if (status === 'provisional') return '#facc15';
|
|
6316
|
+
return '#f87171';
|
|
6317
|
+
}
|
|
6318
|
+
|
|
6319
|
+
function _evalCiLabel(row) {
|
|
6320
|
+
var low = row && row.trusted_score_confidence_low != null ? row.trusted_score_confidence_low : row && row.score_confidence_low;
|
|
6321
|
+
var high = row && row.trusted_score_confidence_high != null ? row.trusted_score_confidence_high : row && row.score_confidence_high;
|
|
6322
|
+
low = Number(low);
|
|
6323
|
+
high = Number(high);
|
|
6324
|
+
if (!isFinite(low) || !isFinite(high)) return '—';
|
|
6325
|
+
return low.toFixed(2) + '-' + high.toFixed(2);
|
|
6326
|
+
}
|
|
6327
|
+
|
|
6123
6328
|
function _evalMakeCell(text, opts) {
|
|
6124
6329
|
var td = document.createElement('td');
|
|
6125
6330
|
td.style.cssText = 'padding:' + ((opts && opts.pad) || '6px 8px') + ';' + ((opts && opts.center) ? 'text-align:center;' : '');
|
|
@@ -6140,12 +6345,15 @@ function _renderEvalDashboard(body, dashboard, runs, benchmarks) {
|
|
|
6140
6345
|
|
|
6141
6346
|
// Stats grid
|
|
6142
6347
|
var stats = document.createElement('div');
|
|
6143
|
-
stats.style.cssText = 'display:grid;grid-template-columns:repeat(
|
|
6348
|
+
stats.style.cssText = 'display:grid;grid-template-columns:repeat(auto-fit,minmax(120px,1fr));gap:12px;margin-bottom:16px;';
|
|
6144
6349
|
var statItems = [
|
|
6145
6350
|
{ label: 'Providers', value: dashboard.providers || 0 },
|
|
6351
|
+
{ label: 'Models', value: dashboard.modelGroups || (dashboard.leaderboard || []).length || 0 },
|
|
6146
6352
|
{ label: 'Total Evals', value: dashboard.totalEvals || 0 },
|
|
6147
6353
|
{ label: 'Shadow Evals', value: dashboard.shadowEvals || 0 },
|
|
6148
6354
|
{ label: 'Benchmark Evals', value: dashboard.benchmarkEvals || 0 },
|
|
6355
|
+
{ label: 'Trusted Evals', value: dashboard.trustedBenchmarkEvals || 0 },
|
|
6356
|
+
{ label: 'Trusted Models', value: dashboard.trustedBenchmarkModels || 0 },
|
|
6149
6357
|
];
|
|
6150
6358
|
statItems.forEach(function(item) {
|
|
6151
6359
|
var card = document.createElement('div');
|
|
@@ -6162,6 +6370,15 @@ function _renderEvalDashboard(body, dashboard, runs, benchmarks) {
|
|
|
6162
6370
|
});
|
|
6163
6371
|
body.appendChild(stats);
|
|
6164
6372
|
|
|
6373
|
+
if ((dashboard.provisionalBenchmarkModels || 0) || (dashboard.legacyBenchmarkModels || 0)) {
|
|
6374
|
+
var trustNote = document.createElement('div');
|
|
6375
|
+
trustNote.style.cssText = 'font-size:11px;color:var(--text-dim);margin:-6px 0 14px;';
|
|
6376
|
+
trustNote.textContent = 'Benchmark trust: ' + (dashboard.trustedBenchmarkModels || 0) + ' trusted, ' +
|
|
6377
|
+
(dashboard.provisionalBenchmarkModels || 0) + ' provisional, ' +
|
|
6378
|
+
(dashboard.legacyBenchmarkModels || 0) + ' legacy model groups.';
|
|
6379
|
+
body.appendChild(trustNote);
|
|
6380
|
+
}
|
|
6381
|
+
|
|
6165
6382
|
// Sub-nav tabs
|
|
6166
6383
|
var nav = document.createElement('div');
|
|
6167
6384
|
nav.className = 'we-eval-subnav';
|
|
@@ -6228,6 +6445,7 @@ function _renderEvalLeaderboard(container, dashboard) {
|
|
|
6228
6445
|
|
|
6229
6446
|
lb.forEach(function(row) {
|
|
6230
6447
|
if (!row.categories) return;
|
|
6448
|
+
var compositeScore = _evalBenchmarkScore(row);
|
|
6231
6449
|
var card = document.createElement('div');
|
|
6232
6450
|
card.style.cssText = 'background:var(--bg-secondary,#1a1a2e);border:1px solid var(--border);border-radius:8px;padding:10px 12px;min-width:180px;flex:1;max-width:240px;';
|
|
6233
6451
|
|
|
@@ -6238,8 +6456,8 @@ function _renderEvalLeaderboard(container, dashboard) {
|
|
|
6238
6456
|
card.appendChild(modelName);
|
|
6239
6457
|
|
|
6240
6458
|
var compositeLabel = document.createElement('div');
|
|
6241
|
-
compositeLabel.style.cssText = 'font-size:20px;font-weight:700;color:' + _evalScoreColor(
|
|
6242
|
-
compositeLabel.textContent =
|
|
6459
|
+
compositeLabel.style.cssText = 'font-size:20px;font-weight:700;color:' + _evalScoreColor(compositeScore) + ';margin-bottom:6px;font-variant-numeric:tabular-nums;';
|
|
6460
|
+
compositeLabel.textContent = compositeScore.toFixed(3);
|
|
6243
6461
|
card.appendChild(compositeLabel);
|
|
6244
6462
|
|
|
6245
6463
|
// SVG radar chart (small multiple)
|
|
@@ -6290,9 +6508,9 @@ function _renderEvalLeaderboard(container, dashboard) {
|
|
|
6290
6508
|
});
|
|
6291
6509
|
var dataPoly = document.createElementNS(ns, 'polygon');
|
|
6292
6510
|
dataPoly.setAttribute('points', dataPts.join(' '));
|
|
6293
|
-
dataPoly.setAttribute('fill', _evalScoreColor(
|
|
6511
|
+
dataPoly.setAttribute('fill', _evalScoreColor(compositeScore));
|
|
6294
6512
|
dataPoly.setAttribute('fill-opacity', '0.2');
|
|
6295
|
-
dataPoly.setAttribute('stroke', _evalScoreColor(
|
|
6513
|
+
dataPoly.setAttribute('stroke', _evalScoreColor(compositeScore));
|
|
6296
6514
|
dataPoly.setAttribute('stroke-width', '1.5');
|
|
6297
6515
|
svg.appendChild(dataPoly);
|
|
6298
6516
|
|
|
@@ -6306,7 +6524,7 @@ function _renderEvalLeaderboard(container, dashboard) {
|
|
|
6306
6524
|
dot.setAttribute('cx', dx);
|
|
6307
6525
|
dot.setAttribute('cy', dy);
|
|
6308
6526
|
dot.setAttribute('r', '2.5');
|
|
6309
|
-
dot.setAttribute('fill', _evalScoreColor(
|
|
6527
|
+
dot.setAttribute('fill', _evalScoreColor(compositeScore));
|
|
6310
6528
|
svg.appendChild(dot);
|
|
6311
6529
|
|
|
6312
6530
|
// Label
|
|
@@ -6425,8 +6643,8 @@ function _renderEvalLeaderboard(container, dashboard) {
|
|
|
6425
6643
|
var table = document.createElement('table');
|
|
6426
6644
|
table.style.cssText = 'width:100%;border-collapse:collapse;font-size:12px;';
|
|
6427
6645
|
var hdrRow = document.createElement('tr');
|
|
6428
|
-
var headers = ['Provider','Model','Composite','Evals','Errors','Throughput','Latency','Cost'];
|
|
6429
|
-
if (hasDims) headers.splice(
|
|
6646
|
+
var headers = ['Provider','Model','Composite','Trust','CI','Evals','Errors','Throughput','Latency','Cost'];
|
|
6647
|
+
if (hasDims) headers.splice(5, 0, 'Code Gen', 'Tool Use', 'Planning', 'Efficiency');
|
|
6430
6648
|
headers.forEach(function(h) {
|
|
6431
6649
|
var th = document.createElement('th');
|
|
6432
6650
|
th.style.cssText = 'text-align:left;padding:8px;border-bottom:1px solid var(--border);font-size:11px;';
|
|
@@ -6437,11 +6655,27 @@ function _renderEvalLeaderboard(container, dashboard) {
|
|
|
6437
6655
|
table.appendChild(hdrRow);
|
|
6438
6656
|
var tbody = document.createElement('tbody');
|
|
6439
6657
|
lb.forEach(function(row, idx) {
|
|
6658
|
+
var compositeScore = _evalBenchmarkScore(row);
|
|
6659
|
+
var trustStatus = _evalTrustStatus(row);
|
|
6660
|
+
var trustedEvals = Number(row.trusted_evals || 0);
|
|
6661
|
+
var minTrusted = Number(row.min_trusted_evals || 10);
|
|
6440
6662
|
var tr = document.createElement('tr');
|
|
6441
6663
|
tr.style.cssText = idx % 2 === 0 ? '' : 'background:rgba(255,255,255,0.02);';
|
|
6442
6664
|
tr.appendChild(_evalMakeCell(row.provider));
|
|
6443
6665
|
tr.appendChild(_evalMakeCell(row.model));
|
|
6444
|
-
tr.appendChild(_evalMakeCell(
|
|
6666
|
+
tr.appendChild(_evalMakeCell(compositeScore.toFixed(3), { center: true, scoreColor: true }));
|
|
6667
|
+
var trustCell = document.createElement('td');
|
|
6668
|
+
trustCell.style.cssText = 'padding:6px 8px;text-align:center;';
|
|
6669
|
+
var trustBadge = document.createElement('span');
|
|
6670
|
+
trustBadge.style.cssText = 'font-size:9px;padding:1px 5px;border-radius:3px;background:' + _evalTrustColor(trustStatus) + '22;color:' + _evalTrustColor(trustStatus) + ';font-weight:600;text-transform:uppercase;';
|
|
6671
|
+
trustBadge.textContent = trustStatus;
|
|
6672
|
+
var trustMeta = document.createElement('div');
|
|
6673
|
+
trustMeta.style.cssText = 'font-size:9px;color:var(--text-dim);margin-top:2px;';
|
|
6674
|
+
trustMeta.textContent = trustedEvals + '/' + minTrusted;
|
|
6675
|
+
trustCell.appendChild(trustBadge);
|
|
6676
|
+
trustCell.appendChild(trustMeta);
|
|
6677
|
+
tr.appendChild(trustCell);
|
|
6678
|
+
tr.appendChild(_evalMakeCell(_evalCiLabel(row), { center: true }));
|
|
6445
6679
|
if (hasDims && row.categories) {
|
|
6446
6680
|
['codeGen', 'toolUse', 'planning', 'efficiency'].forEach(function(k) {
|
|
6447
6681
|
tr.appendChild(_evalMakeCell((row.categories[k] || 0).toFixed(2), { center: true, scoreColor: true }));
|
|
@@ -6544,13 +6778,31 @@ function _renderEvalBenchmarks(container, benchmarks) {
|
|
|
6544
6778
|
name.textContent = suite.name;
|
|
6545
6779
|
var count = document.createElement('span');
|
|
6546
6780
|
count.style.cssText = 'font-size:11px;color:var(--text-dim);';
|
|
6547
|
-
|
|
6781
|
+
if (suite.adapter) {
|
|
6782
|
+
count.textContent = suite.count ? suite.count + ' tasks' : 'adapter suite';
|
|
6783
|
+
} else {
|
|
6784
|
+
count.textContent = (suite.count || 0) + ' prompts';
|
|
6785
|
+
}
|
|
6548
6786
|
top.appendChild(name);
|
|
6549
6787
|
top.appendChild(count);
|
|
6550
6788
|
card.appendChild(top);
|
|
6551
6789
|
|
|
6552
6790
|
var tags = document.createElement('div');
|
|
6553
6791
|
tags.style.cssText = 'margin-bottom:8px;';
|
|
6792
|
+
if (suite.adapter) {
|
|
6793
|
+
var adapterTag = document.createElement('span');
|
|
6794
|
+
adapterTag.className = 'we-eval-diff-tag';
|
|
6795
|
+
adapterTag.style.cssText = 'background:#60a5fa;color:#06121f;margin-right:4px;';
|
|
6796
|
+
adapterTag.textContent = 'ADAPTER';
|
|
6797
|
+
tags.appendChild(adapterTag);
|
|
6798
|
+
}
|
|
6799
|
+
if (suite.datasetVersion) {
|
|
6800
|
+
var datasetTag = document.createElement('span');
|
|
6801
|
+
datasetTag.className = 'we-eval-diff-tag';
|
|
6802
|
+
datasetTag.style.cssText = 'background:#2dd4bf;color:#06121f;margin-right:4px;';
|
|
6803
|
+
datasetTag.textContent = String(suite.datasetVersion).toUpperCase();
|
|
6804
|
+
tags.appendChild(datasetTag);
|
|
6805
|
+
}
|
|
6554
6806
|
(suite.difficulties || []).forEach(function(d) {
|
|
6555
6807
|
var tag = document.createElement('span');
|
|
6556
6808
|
tag.className = 'we-eval-diff-tag';
|
|
@@ -7317,12 +7569,13 @@ function _renderEvalComparison(container, dashboard) {
|
|
|
7317
7569
|
chartWrap.style.cssText = 'margin-bottom:20px;';
|
|
7318
7570
|
var chartTitle = document.createElement('div');
|
|
7319
7571
|
chartTitle.style.cssText = 'font-size:12px;color:var(--text-dim);margin-bottom:8px;';
|
|
7320
|
-
chartTitle.textContent = 'Composite Score by Model';
|
|
7572
|
+
chartTitle.textContent = 'Composite Score by Model (trusted mean when available)';
|
|
7321
7573
|
chartWrap.appendChild(chartTitle);
|
|
7322
7574
|
|
|
7323
|
-
var maxScore = Math.max.apply(null, lb.map(function(r) { return r
|
|
7324
|
-
lb.sort(function(a, b) { return (b
|
|
7575
|
+
var maxScore = Math.max.apply(null, lb.map(function(r) { return _evalBenchmarkScore(r); }));
|
|
7576
|
+
lb.sort(function(a, b) { return _evalBenchmarkScore(b) - _evalBenchmarkScore(a); });
|
|
7325
7577
|
lb.forEach(function(row) {
|
|
7578
|
+
var rowScore = _evalBenchmarkScore(row);
|
|
7326
7579
|
var barRow = document.createElement('div');
|
|
7327
7580
|
barRow.style.cssText = 'display:flex;align-items:center;margin-bottom:4px;gap:8px;';
|
|
7328
7581
|
|
|
@@ -7334,13 +7587,13 @@ function _renderEvalComparison(container, dashboard) {
|
|
|
7334
7587
|
var barBg = document.createElement('div');
|
|
7335
7588
|
barBg.style.cssText = 'flex:1;height:18px;background:var(--border);border-radius:3px;overflow:hidden;position:relative;';
|
|
7336
7589
|
var barFill = document.createElement('div');
|
|
7337
|
-
var pct = maxScore > 0 ? (
|
|
7338
|
-
barFill.style.cssText = 'height:100%;background:' + _evalScoreColor(
|
|
7590
|
+
var pct = maxScore > 0 ? (rowScore / maxScore) * 100 : 0;
|
|
7591
|
+
barFill.style.cssText = 'height:100%;background:' + _evalScoreColor(rowScore) + ';width:' + pct + '%;border-radius:3px;transition:width 0.3s;';
|
|
7339
7592
|
barBg.appendChild(barFill);
|
|
7340
7593
|
|
|
7341
7594
|
var scoreLabel = document.createElement('div');
|
|
7342
|
-
scoreLabel.style.cssText = 'width:60px;font-size:11px;font-weight:600;color:' + _evalScoreColor(
|
|
7343
|
-
scoreLabel.textContent =
|
|
7595
|
+
scoreLabel.style.cssText = 'width:60px;font-size:11px;font-weight:600;color:' + _evalScoreColor(rowScore) + ';font-variant-numeric:tabular-nums;';
|
|
7596
|
+
scoreLabel.textContent = rowScore.toFixed(3);
|
|
7344
7597
|
|
|
7345
7598
|
barRow.appendChild(label);
|
|
7346
7599
|
barRow.appendChild(barBg);
|
|
@@ -7460,16 +7713,17 @@ function _renderEvalComparison(container, dashboard) {
|
|
|
7460
7713
|
costTable.appendChild(cHdr);
|
|
7461
7714
|
|
|
7462
7715
|
var ranked = lb.slice().sort(function(a, b) {
|
|
7463
|
-
var aEff = (a.total_cost || 0) > 0 ? (a
|
|
7464
|
-
var bEff = (b.total_cost || 0) > 0 ? (b
|
|
7716
|
+
var aEff = (a.total_cost || 0) > 0 ? _evalBenchmarkScore(a) / a.total_cost : 999999;
|
|
7717
|
+
var bEff = (b.total_cost || 0) > 0 ? _evalBenchmarkScore(b) / b.total_cost : 999999;
|
|
7465
7718
|
return bEff - aEff;
|
|
7466
7719
|
});
|
|
7467
7720
|
ranked.forEach(function(row, i) {
|
|
7721
|
+
var rowScore = _evalBenchmarkScore(row);
|
|
7468
7722
|
var tr = document.createElement('tr');
|
|
7469
|
-
var eff = (row.total_cost || 0) > 0 ? (
|
|
7723
|
+
var eff = (row.total_cost || 0) > 0 ? (rowScore / row.total_cost).toFixed(1) : 'free';
|
|
7470
7724
|
tr.appendChild(_evalMakeCell('#' + (i + 1)));
|
|
7471
7725
|
tr.appendChild(_evalMakeCell((row.provider || '') + '/' + (row.model || '')));
|
|
7472
|
-
tr.appendChild(_evalMakeCell(
|
|
7726
|
+
tr.appendChild(_evalMakeCell(rowScore.toFixed(3), { center: true, scoreColor: true }));
|
|
7473
7727
|
tr.appendChild(_evalMakeCell('$' + (row.total_cost || 0).toFixed(4), { center: true }));
|
|
7474
7728
|
tr.appendChild(_evalMakeCell(eff, { center: true }));
|
|
7475
7729
|
costTable.appendChild(tr);
|