create-walle 0.9.11 → 0.9.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/README.md +3 -3
  2. package/package.json +2 -2
  3. package/template/bin/dev.sh +7 -1
  4. package/template/bin/setup.js +53 -9
  5. package/template/bin/sync-images.js +53 -0
  6. package/template/builder-journal.md +17 -0
  7. package/template/claude-task-manager/api-prompts.js +98 -13
  8. package/template/claude-task-manager/api-reviews.js +82 -5
  9. package/template/claude-task-manager/db.js +32 -5
  10. package/template/claude-task-manager/docs/session-capture-foundation-design.md +1273 -0
  11. package/template/claude-task-manager/lib/claude-desktop-sessions.js +696 -0
  12. package/template/claude-task-manager/lib/coding-agent-models.js +49 -1
  13. package/template/claude-task-manager/lib/session-capture.js +421 -0
  14. package/template/claude-task-manager/lib/session-history.js +135 -15
  15. package/template/claude-task-manager/lib/session-jobs.js +10 -5
  16. package/template/claude-task-manager/lib/session-stream.js +87 -19
  17. package/template/claude-task-manager/lib/setup-provider-config.js +115 -0
  18. package/template/claude-task-manager/lib/walle-ctm-history.js +72 -0
  19. package/template/claude-task-manager/lib/walle-session-context.js +61 -0
  20. package/template/claude-task-manager/lib/walle-transcript.js +176 -0
  21. package/template/claude-task-manager/public/css/setup.css +35 -8
  22. package/template/claude-task-manager/public/css/walle-session.css +56 -0
  23. package/template/claude-task-manager/public/css/walle.css +120 -0
  24. package/template/claude-task-manager/public/index.html +814 -181
  25. package/template/claude-task-manager/public/js/message-renderer.js +148 -19
  26. package/template/claude-task-manager/public/js/reviews.js +120 -62
  27. package/template/claude-task-manager/public/js/setup.js +75 -31
  28. package/template/claude-task-manager/public/js/stream-view.js +115 -55
  29. package/template/claude-task-manager/public/js/walle-session.js +84 -2
  30. package/template/claude-task-manager/public/js/walle.js +308 -54
  31. package/template/claude-task-manager/server.js +1092 -146
  32. package/template/claude-task-manager/session-integrity.js +181 -54
  33. package/template/claude-task-manager/session-utils.js +123 -41
  34. package/template/claude-task-manager/workers/state-detectors/codex.js +5 -2
  35. package/template/package.json +1 -1
  36. package/template/wall-e/adapters/ctm.js +39 -18
  37. package/template/wall-e/agent-runners/contract.js +17 -0
  38. package/template/wall-e/agent-runners/index.js +22 -0
  39. package/template/wall-e/agent-runtime/harness.js +212 -0
  40. package/template/wall-e/agent-runtime/index.js +8 -0
  41. package/template/wall-e/agent-runtime/registry.js +67 -0
  42. package/template/wall-e/agent-runtime/session-store.js +179 -0
  43. package/template/wall-e/agent-runtime/spawn.js +208 -0
  44. package/template/wall-e/api-walle.js +174 -7
  45. package/template/wall-e/brain.js +266 -28
  46. package/template/wall-e/channels/policy.js +88 -0
  47. package/template/wall-e/channels/registry.js +15 -1
  48. package/template/wall-e/channels/reply-dispatcher.js +70 -0
  49. package/template/wall-e/channels/session-bindings.js +51 -0
  50. package/template/wall-e/chat/code-review-context.js +29 -0
  51. package/template/wall-e/chat.js +188 -42
  52. package/template/wall-e/coding/acp-adapter.js +188 -0
  53. package/template/wall-e/coding/agent-catalog.js +129 -0
  54. package/template/wall-e/coding/compaction-service.js +247 -0
  55. package/template/wall-e/coding/execution-trace.js +3 -0
  56. package/template/wall-e/coding/instruction-service.js +224 -0
  57. package/template/wall-e/coding/model-message.js +67 -0
  58. package/template/wall-e/coding/permission-rules-store.js +111 -0
  59. package/template/wall-e/coding/permission-service.js +266 -0
  60. package/template/wall-e/coding/prompt-bundle.js +67 -0
  61. package/template/wall-e/coding/prompt-runtime.js +243 -0
  62. package/template/wall-e/coding/provider-transform.js +188 -0
  63. package/template/wall-e/coding/runtime-mode.js +132 -0
  64. package/template/wall-e/coding/snapshot-service.js +155 -0
  65. package/template/wall-e/coding/stream-processor.js +268 -0
  66. package/template/wall-e/coding/task-tool.js +255 -0
  67. package/template/wall-e/coding/tool-registry.js +361 -0
  68. package/template/wall-e/coding/transcript-writer.js +143 -0
  69. package/template/wall-e/coding/workspace-replay.js +324 -0
  70. package/template/wall-e/coding-context.js +4 -22
  71. package/template/wall-e/coding-orchestrator.js +307 -18
  72. package/template/wall-e/coding-prompts.js +44 -3
  73. package/template/wall-e/context/context-builder.js +43 -1
  74. package/template/wall-e/context/topic-matcher.js +1 -1
  75. package/template/wall-e/eval/agent-runner.js +59 -13
  76. package/template/wall-e/eval/benchmarks/memory-retrieval.json +155 -57
  77. package/template/wall-e/eval/benchmarks.js +100 -16
  78. package/template/wall-e/eval/eval-orchestrator.js +218 -8
  79. package/template/wall-e/eval/harvester.js +62 -5
  80. package/template/wall-e/eval/head-to-head.js +23 -2
  81. package/template/wall-e/eval/humaneval-adapter.js +30 -5
  82. package/template/wall-e/eval/livecodebench-adapter.js +29 -5
  83. package/template/wall-e/eval/manifest.js +186 -0
  84. package/template/wall-e/eval/run-agent-benchmarks.js +66 -2
  85. package/template/wall-e/eval/session-retrieval-benchmark.js +150 -0
  86. package/template/wall-e/eval/session-transcripts.js +57 -4
  87. package/template/wall-e/eval/swebench-adapter.js +109 -3
  88. package/template/wall-e/evaluation/agent-router.js +53 -1
  89. package/template/wall-e/evaluation/coding-quorum.js +48 -1
  90. package/template/wall-e/evaluation/router.js +4 -2
  91. package/template/wall-e/evaluation/tier-selector.js +11 -1
  92. package/template/wall-e/extraction/contradiction.js +2 -2
  93. package/template/wall-e/extraction/indexer.js +2 -1
  94. package/template/wall-e/extraction/knowledge-extractor.js +2 -2
  95. package/template/wall-e/hooks/cli.js +92 -0
  96. package/template/wall-e/hooks/discovery.js +119 -0
  97. package/template/wall-e/hooks/index.js +7 -0
  98. package/template/wall-e/hooks/manifest.js +55 -0
  99. package/template/wall-e/hooks/runtime.js +84 -0
  100. package/template/wall-e/hooks/session-memory.js +225 -0
  101. package/template/wall-e/http/auth.js +6 -2
  102. package/template/wall-e/http/chat-api.js +54 -8
  103. package/template/wall-e/integrations/claude-plugin/hooks/hooks.json +27 -0
  104. package/template/wall-e/integrations/claude-plugin/hooks/walle-precompact-hook.sh +5 -0
  105. package/template/wall-e/integrations/claude-plugin/hooks/walle-stop-hook.sh +5 -0
  106. package/template/wall-e/integrations/codex-plugin/hooks/walle-hook.sh +7 -0
  107. package/template/wall-e/integrations/codex-plugin/hooks.json +37 -0
  108. package/template/wall-e/listening/calendar.js +3 -1
  109. package/template/wall-e/llm/client.js +64 -10
  110. package/template/wall-e/llm/google.js +39 -5
  111. package/template/wall-e/llm/ollama.js +1 -1
  112. package/template/wall-e/llm/ollama.plugin.json +1 -1
  113. package/template/wall-e/llm/provider-availability.js +10 -0
  114. package/template/wall-e/llm/provider-error.js +269 -0
  115. package/template/wall-e/llm/tool-adapter.js +48 -12
  116. package/template/wall-e/loops/boot.js +2 -1
  117. package/template/wall-e/loops/initiative.js +2 -2
  118. package/template/wall-e/loops/tasks.js +8 -47
  119. package/template/wall-e/loops/workspace-prompts.js +20 -0
  120. package/template/wall-e/mcp-server.js +442 -1
  121. package/template/wall-e/memory/session-ingest-service.js +159 -0
  122. package/template/wall-e/memory/source-indexer.js +289 -0
  123. package/template/wall-e/plugins/discovery.js +83 -0
  124. package/template/wall-e/plugins/manifest-loader.js +50 -10
  125. package/template/wall-e/plugins/manifest-schema.js +69 -0
  126. package/template/wall-e/plugins/model-catalog.js +55 -0
  127. package/template/wall-e/prompts/coding/base.txt +2 -0
  128. package/template/wall-e/prompts/coding/deepseek.txt +1 -0
  129. package/template/wall-e/prompts/coding/memory-protocol.md +9 -0
  130. package/template/wall-e/prompts/coding/plan.txt +1 -0
  131. package/template/wall-e/runtime/execution-trace.js +220 -0
  132. package/template/wall-e/security/audit.js +266 -0
  133. package/template/wall-e/security/ssrf.js +236 -0
  134. package/template/wall-e/session-files.js +303 -0
  135. package/template/wall-e/skills/_bundled/slack-backfill/SKILL.md +3 -0
  136. package/template/wall-e/skills/_bundled/slack-sync/SKILL.md +3 -0
  137. package/template/wall-e/skills/internal-skill-registry.js +2 -2
  138. package/template/wall-e/skills/script-skill-runner.js +143 -0
  139. package/template/wall-e/skills/skill-executor.js +5 -6
  140. package/template/wall-e/skills/skill-fallback.js +3 -1
  141. package/template/wall-e/skills/skill-harness-registry.js +7 -8
  142. package/template/wall-e/skills/skill-planner.js +52 -4
  143. package/template/wall-e/skills/slack-ingest.js +11 -3
  144. package/template/wall-e/sources/base.js +90 -0
  145. package/template/wall-e/sources/builtin.js +33 -0
  146. package/template/wall-e/sources/claude-code-jsonl.js +78 -0
  147. package/template/wall-e/sources/codex-jsonl.js +125 -0
  148. package/template/wall-e/sources/coding-session-utils.js +117 -0
  149. package/template/wall-e/sources/contract-suite.js +59 -0
  150. package/template/wall-e/sources/gemini-jsonl.js +85 -0
  151. package/template/wall-e/sources/index.js +9 -0
  152. package/template/wall-e/sources/jsonl-utils.js +181 -0
  153. package/template/wall-e/sources/record-types.js +252 -0
  154. package/template/wall-e/sources/registry.js +92 -0
  155. package/template/wall-e/sources/transforms.js +100 -0
  156. package/template/wall-e/sources/walle-jsonl.js +108 -0
  157. package/template/wall-e/tools/coding-middleware.js +31 -1
  158. package/template/wall-e/tools/file-tracker.js +25 -1
  159. package/template/wall-e/tools/local-tools.js +75 -47
  160. package/template/wall-e/tools/session-sharing.js +68 -1
  161. package/template/wall-e/tools/shell-analyzer.js +1 -1
  162. package/template/wall-e/tools/shell-policy.js +47 -0
  163. package/template/wall-e/tools/snapshot.js +42 -0
  164. package/template/wall-e/training/harvester.js +62 -5
  165. package/template/wall-e/utils/repair.js +253 -1
  166. package/template/website/index.html +3 -3
  167. package/template/wall-e/skills/_bundled/slack-mentions/.watched-threads.json +0 -18
@@ -7,6 +7,7 @@ let timelineOffset = 0;
7
7
  let _providerStatus = null;
8
8
  let _providerPollTimer = null;
9
9
  let _cachedActiveModel = null; // { model, provider } from /api/setup/status
10
+ let _activeProviderIssue = null;
10
11
 
11
12
  const WalleCore = window.WalleCore || {};
12
13
  const MODEL_LABELS = WalleCore.MODEL_LABELS || {};
@@ -31,6 +32,80 @@ const resolveWalleBase = WalleCore.resolveWalleBase || (() => Promise.resolve(''
31
32
  // URL ('') resolves to the same place from every browser context.
32
33
  const WALLE_BASE = '';
33
34
 
35
+ function _normalizeProviderIssue(source) {
36
+ if (!source || typeof source !== 'object') return null;
37
+ var issue = source.providerError || source.provider_error || source;
38
+ if (!issue || typeof issue !== 'object') return null;
39
+ var title = issue.title || (source.code === 'AI_PROVIDER_ERROR' ? 'AI provider failed' : '');
40
+ if (!title && !issue.rawMessage && !issue.userMessage && !/^ai_provider/.test(String(source.type || ''))) return null;
41
+ return {
42
+ code: issue.code || source.code || 'AI_PROVIDER_ERROR',
43
+ type: issue.type || source.type || 'provider_error',
44
+ severity: issue.severity || source.severity || 'error',
45
+ title: title || 'AI provider failed',
46
+ message: issue.userMessage || issue.message || source.message || source.error || 'Wall-E could not get a response from the configured AI provider.',
47
+ rawMessage: issue.rawMessage || issue.raw_message || source.rawMessage || '',
48
+ provider: issue.provider || source.provider || '',
49
+ model: issue.model || source.model || '',
50
+ status: issue.status || source.status || '',
51
+ retryAfter: issue.retryAfter || source.retryAfter || '',
52
+ actionLabel: issue.actionLabel || source.actionLabel || 'Open Setup',
53
+ actionUrl: issue.actionUrl || issue.action_url || source.action_url || '/setup.html',
54
+ createdAt: issue.createdAt || issue.created_at || source.created_at || '',
55
+ };
56
+ }
57
+
58
+ function _rememberProviderIssue(source) {
59
+ var issue = _normalizeProviderIssue(source);
60
+ if (issue) _activeProviderIssue = issue;
61
+ return issue;
62
+ }
63
+
64
+ function _providerIssueDetails(issue) {
65
+ var lines = [];
66
+ if (issue.status) lines.push('HTTP/status: ' + issue.status);
67
+ if (issue.provider) lines.push('Provider: ' + issue.provider);
68
+ if (issue.model) lines.push('Model: ' + issue.model);
69
+ if (issue.retryAfter) lines.push('Retry after: ' + issue.retryAfter);
70
+ if (issue.rawMessage) lines.push('', issue.rawMessage);
71
+ return lines.join('\n').trim();
72
+ }
73
+
74
+ function _renderProviderIssueBlock(issue, opts) {
75
+ issue = _normalizeProviderIssue(issue);
76
+ if (!issue) return '';
77
+ opts = opts || {};
78
+ var severity = issue.severity === 'warning' ? 'warning' : 'error';
79
+ var details = _providerIssueDetails(issue);
80
+ var action = '';
81
+ if (issue.actionUrl) {
82
+ if (/^\/setup/.test(issue.actionUrl)) {
83
+ action = '<button class="walle-btn primary" onclick="navTo(\'setup\')">' + esc(issue.actionLabel || 'Open Setup') + '</button>';
84
+ } else if (/^(\/|https?:\/\/)/.test(issue.actionUrl)) {
85
+ action = '<a class="walle-btn primary" href="' + esc(issue.actionUrl) + '">' + esc(issue.actionLabel || 'Open') + '</a>';
86
+ }
87
+ }
88
+ var dismiss = opts.dismissible
89
+ ? '<button class="we-provider-issue-dismiss" onclick="WE._dismissProviderIssue()" title="Dismiss">&times;</button>'
90
+ : '';
91
+ var html = '<div class="we-provider-issue ' + severity + (opts.inline ? ' inline' : '') + '" role="alert">';
92
+ html += '<div class="we-provider-issue-main">';
93
+ html += '<div class="we-provider-issue-title">' + esc(issue.title) + '</div>';
94
+ html += '<div class="we-provider-issue-body">' + esc(issue.message) + '</div>';
95
+ if (details) {
96
+ html += '<details class="we-provider-issue-details"><summary>Provider details</summary><pre>' + esc(details) + '</pre></details>';
97
+ }
98
+ html += '</div>';
99
+ if (action || dismiss) html += '<div class="we-provider-issue-actions">' + action + dismiss + '</div>';
100
+ html += '</div>';
101
+ return html;
102
+ }
103
+
104
+ WE._dismissProviderIssue = function() {
105
+ _activeProviderIssue = null;
106
+ renderChatUI();
107
+ };
108
+
34
109
  // Render an assistant message that may begin with a `<think>...</think>`
35
110
  // chain-of-thought block (DeepSeek V4 deep-thinking, Anthropic extended
36
111
  // thinking, gemma4 native thinking). The thoughts render as a collapsible
@@ -1793,6 +1868,9 @@ function renderChatUI() {
1793
1868
  if (!body) return;
1794
1869
 
1795
1870
  var html = '<div class="walle-chat-container">';
1871
+ if (_activeProviderIssue) {
1872
+ html += _renderProviderIssueBlock(_activeProviderIssue, { dismissible: true });
1873
+ }
1796
1874
  // Search bar + select mode toggle
1797
1875
  html += '<div class="we-chat-search-bar">';
1798
1876
  html += '<input class="we-chat-search-input" id="we-chat-search" placeholder="Search chat history..." value="' + esc(chatSearchQuery || '') + '" oninput="WE._onChatSearch(this.value)">';
@@ -1903,7 +1981,11 @@ function renderChatUI() {
1903
1981
  i++;
1904
1982
  html += '<div class="walle-chat-msg assistant">';
1905
1983
  html += '<div class="walle-chat-msg-role assistant"><img src="/walle-icon.svg" width="16" height="16" class="walle-avatar"> WALL-E</div>';
1906
- html += '<div class="walle-chat-msg-text we-markdown">' + renderAssistantWithThoughts(chatHistory[i].text) + '</div>';
1984
+ if (chatHistory[i]._providerError) {
1985
+ html += _renderProviderIssueBlock(chatHistory[i]._providerError, { inline: true });
1986
+ } else {
1987
+ html += '<div class="walle-chat-msg-text we-markdown">' + renderAssistantWithThoughts(chatHistory[i].text) + '</div>';
1988
+ }
1907
1989
  html += '</div>';
1908
1990
  }
1909
1991
  html += '</div>'; // close turn group
@@ -1917,7 +1999,11 @@ function renderChatUI() {
1917
1999
  }
1918
2000
  html += '<div class="walle-chat-msg assistant">';
1919
2001
  html += '<div class="walle-chat-msg-role assistant"><img src="/walle-icon.svg" width="16" height="16" class="walle-avatar"> WALL-E</div>';
1920
- html += '<div class="walle-chat-msg-text we-markdown">' + renderAssistantWithThoughts(msg.text) + '</div>';
2002
+ if (msg._providerError) {
2003
+ html += _renderProviderIssueBlock(msg._providerError, { inline: true });
2004
+ } else {
2005
+ html += '<div class="walle-chat-msg-text we-markdown">' + renderAssistantWithThoughts(msg.text) + '</div>';
2006
+ }
1921
2007
  html += '</div>';
1922
2008
  html += '</div>';
1923
2009
  turnIdx++;
@@ -2258,8 +2344,13 @@ function _streamChat(message, pendingAttachments, sendId) {
2258
2344
  }
2259
2345
  throw new Error('Send cancelled — current provider cannot handle ' + capList + ' attachments.');
2260
2346
  }
2261
- var msg = (json && json.error) || txt || ('HTTP ' + response.status);
2262
- throw new Error(msg);
2347
+ var msg = (json && (json.message || json.error)) || txt || ('HTTP ' + response.status);
2348
+ var err = new Error(msg);
2349
+ if (json && json.providerError) {
2350
+ err.code = json.code || 'AI_PROVIDER_ERROR';
2351
+ err.providerError = json.providerError;
2352
+ }
2353
+ throw err;
2263
2354
  });
2264
2355
  }
2265
2356
  var reader = response.body.getReader();
@@ -2287,7 +2378,7 @@ function _streamChat(message, pendingAttachments, sendId) {
2287
2378
  reader.cancel();
2288
2379
  return;
2289
2380
  } else if (event.type === 'error') {
2290
- _finishStreaming('Error: ' + (event.error || 'Unknown error'));
2381
+ _finishStreamingError(event, sendId, { serverReceived: true });
2291
2382
  reader.cancel();
2292
2383
  return;
2293
2384
  } else {
@@ -2301,7 +2392,7 @@ function _streamChat(message, pendingAttachments, sendId) {
2301
2392
  readChunk();
2302
2393
  }).catch(function(err) {
2303
2394
  if (err.name !== 'AbortError') {
2304
- _finishStreaming('Error: ' + (err.message || 'Stream read failed'));
2395
+ _finishStreamingError({ error: err.message || 'Stream read failed' }, sendId, { serverReceived: false });
2305
2396
  }
2306
2397
  });
2307
2398
  }
@@ -2315,6 +2406,14 @@ function _streamChat(message, pendingAttachments, sendId) {
2315
2406
  renderChatUI();
2316
2407
  return;
2317
2408
  }
2409
+ if (err.providerError) {
2410
+ _finishStreamingError({
2411
+ error: err.message || 'AI provider failed',
2412
+ code: err.code || 'AI_PROVIDER_ERROR',
2413
+ providerError: err.providerError,
2414
+ }, sendId, { serverReceived: true });
2415
+ return;
2416
+ }
2318
2417
  // Mark this user message as unsynced so it's visibly flagged and survives
2319
2418
  // reloads. The localStorage record stays (cleared only on confirmed success
2320
2419
  // via _finishStreaming with a reply) — on next page load it's merged back
@@ -2334,24 +2433,44 @@ function _streamChat(message, pendingAttachments, sendId) {
2334
2433
  });
2335
2434
  }
2336
2435
 
2337
- function _finishStreaming(reply) {
2436
+ function _markSendUnsynced(sendId, message) {
2437
+ if (!sendId) return;
2438
+ for (var _i = chatHistory.length - 1; _i >= 0; _i--) {
2439
+ if (chatHistory[_i]._sendId === sendId) {
2440
+ chatHistory[_i]._unsynced = true;
2441
+ chatHistory[_i]._error = message || 'Failed to get response';
2442
+ break;
2443
+ }
2444
+ }
2445
+ }
2446
+
2447
+ function _confirmLastUserSend(sendId) {
2448
+ // Server confirmed the turn — clear the unsynced record + marker for the
2449
+ // most recent user turn that had a sendId.
2450
+ if (sendId) _clearUnsynced(sendId);
2451
+ for (var _fi = chatHistory.length - 1; _fi >= 0; _fi--) {
2452
+ if (chatHistory[_fi].role === 'user' && chatHistory[_fi]._sendId && (!sendId || chatHistory[_fi]._sendId === sendId)) {
2453
+ _clearUnsynced(chatHistory[_fi]._sendId);
2454
+ delete chatHistory[_fi]._sendId;
2455
+ delete chatHistory[_fi]._unsynced;
2456
+ delete chatHistory[_fi]._error;
2457
+ break;
2458
+ }
2459
+ }
2460
+ }
2461
+
2462
+ function _resetStreamingState() {
2338
2463
  activeChatController = null;
2339
2464
  _stopThinkingTimer();
2340
2465
  chatThinkingState.isProcessing = false;
2341
2466
  chatThinkingState.logs = [];
2342
2467
  chatThinkingState.startTime = null;
2468
+ }
2469
+
2470
+ function _finishStreaming(reply) {
2471
+ _resetStreamingState();
2343
2472
  if (reply) {
2344
- // Server confirmed the turn — clear the unsynced record + marker for the
2345
- // most recent user turn that had a sendId.
2346
- for (var _fi = chatHistory.length - 1; _fi >= 0; _fi--) {
2347
- if (chatHistory[_fi].role === 'user' && chatHistory[_fi]._sendId) {
2348
- _clearUnsynced(chatHistory[_fi]._sendId);
2349
- delete chatHistory[_fi]._sendId;
2350
- delete chatHistory[_fi]._unsynced;
2351
- delete chatHistory[_fi]._error;
2352
- break;
2353
- }
2354
- }
2473
+ _confirmLastUserSend();
2355
2474
  chatHistory.push({ role: 'assistant', text: reply });
2356
2475
  // Update the active branch with the assistant reply
2357
2476
  _updateActiveBranch();
@@ -2359,6 +2478,28 @@ function _finishStreaming(reply) {
2359
2478
  WE.renderChat();
2360
2479
  }
2361
2480
 
2481
+ function _finishStreamingError(event, sendId, opts) {
2482
+ opts = opts || {};
2483
+ _resetStreamingState();
2484
+ var issue = _rememberProviderIssue(event);
2485
+ var serverReceived = opts.serverReceived;
2486
+ if (issue && issue.type === 'provider_unavailable_local') serverReceived = false;
2487
+ var msg = (event && (event.message || event.error)) || 'Failed to get response';
2488
+ if (serverReceived) _confirmLastUserSend(sendId);
2489
+ else _markSendUnsynced(sendId, issue ? issue.message : msg);
2490
+ if (issue) {
2491
+ chatHistory.push({
2492
+ role: 'assistant',
2493
+ text: issue.title + ': ' + issue.message,
2494
+ _providerError: issue,
2495
+ });
2496
+ } else {
2497
+ chatHistory.push({ role: 'assistant', text: 'Error: ' + msg });
2498
+ }
2499
+ _updateActiveBranch();
2500
+ WE.renderChat();
2501
+ }
2502
+
2362
2503
  // Keep active branch data in sync with chatHistory
2363
2504
  function _updateActiveBranch() {
2364
2505
  Object.keys(chatBranches).forEach(function(k) {
@@ -5844,6 +5985,21 @@ WE._toggleActionHistory = function() {
5844
5985
  function checkProviderStatus() {
5845
5986
  api('/provider/status').then(function(d) {
5846
5987
  _providerStatus = d;
5988
+ if (d && d.configured && d.anyAvailable === false) {
5989
+ _rememberProviderIssue({
5990
+ type: 'provider_unavailable_local',
5991
+ title: 'No AI provider available',
5992
+ message: 'Providers are configured, but every provider is currently marked unhealthy. Check Setup or wait for health checks to recover.',
5993
+ rawMessage: (d.providers || []).map(function(p) {
5994
+ return (p.providerType || p.providerId || 'provider') + ': ' + (p.lastError || p.status || 'unhealthy');
5995
+ }).join('\n'),
5996
+ actionUrl: '/setup.html',
5997
+ });
5998
+ if (currentView === 'chat') renderChatUI();
5999
+ } else if (_activeProviderIssue && _activeProviderIssue.type === 'provider_unavailable_local') {
6000
+ _activeProviderIssue = null;
6001
+ if (currentView === 'chat') renderChatUI();
6002
+ }
5847
6003
  renderProviderBanner();
5848
6004
  // Adaptive polling: fast during setup, slow in steady state
5849
6005
  var interval = (d.setupAction === 'ready') ? 60000 : 10000;
@@ -5865,7 +6021,10 @@ function renderProviderBanner() {
5865
6021
  var d = _providerStatus;
5866
6022
  var msg = 'No AI provider configured — Wall-E is in limited mode.';
5867
6023
  var btnHtml = '';
5868
- if (d.setupAction === 'auto-register-detected') {
6024
+ if (d.configured && d.anyAvailable === false) {
6025
+ msg = 'AI providers are configured, but none are available right now.';
6026
+ btnHtml = '<button class="walle-btn primary" onclick="navTo(\'setup\')">Open Setup</button>';
6027
+ } else if (d.setupAction === 'auto-register-detected') {
5869
6028
  var types = (d.detected || []).map(function(x) { return x.type; }).join(', ');
5870
6029
  msg = 'Found existing API keys (' + types + ') — activate to get started.';
5871
6030
  btnHtml = '<button class="walle-btn" onclick="WE._registerDetectedProviders()">Use detected providers</button>';
@@ -5876,8 +6035,8 @@ function renderProviderBanner() {
5876
6035
  } else {
5877
6036
  btnHtml = '<button class="walle-btn" onclick="navTo(\'setup\')">Add API key</button>';
5878
6037
  }
5879
- var html = '<div id="walle-provider-banner" style="background:#fef3cd;border:1px solid #ffc107;padding:10px 16px;margin:0 0 8px 0;border-radius:8px;display:flex;align-items:center;justify-content:space-between;gap:12px;font-size:13px;">'
5880
- + '<span>' + msg + '</span>' + btnHtml + '</div>';
6038
+ var html = '<div id="walle-provider-banner" class="we-provider-setup-banner">'
6039
+ + '<span>' + esc(msg) + '</span><div class="we-provider-setup-actions">' + btnHtml + '</div></div>';
5881
6040
  if (existing) {
5882
6041
  existing.outerHTML = html;
5883
6042
  } else {
@@ -5939,7 +6098,17 @@ var _alertsPollTimer = null;
5939
6098
 
5940
6099
  function checkServiceAlerts() {
5941
6100
  api('/alerts').then(function(d) {
5942
- renderServiceAlerts(d.alerts || []);
6101
+ var alerts = d.alerts || [];
6102
+ var providerIssue = null;
6103
+ for (var i = 0; i < alerts.length; i++) {
6104
+ var issue = _normalizeProviderIssue(alerts[i]);
6105
+ if (issue) providerIssue = issue;
6106
+ }
6107
+ if (providerIssue) {
6108
+ _activeProviderIssue = providerIssue;
6109
+ if (currentView === 'chat') renderChatUI();
6110
+ }
6111
+ renderServiceAlerts(alerts);
5943
6112
  clearTimeout(_alertsPollTimer);
5944
6113
  _alertsPollTimer = setTimeout(checkServiceAlerts, 60000);
5945
6114
  }).catch(function() {
@@ -5955,22 +6124,23 @@ function renderServiceAlerts(alerts) {
5955
6124
  return;
5956
6125
  }
5957
6126
  var items = alerts.map(function(a) {
5958
- var icon = a.type === 'auth_expired' ? '&#9888;' : a.type === 'update_available' ? '&#8593;' : '&#9679;';
5959
- var color = a.type === 'auth_expired' ? '#e03131' : a.type === 'update_available' ? '#228be6' : '#fab005';
6127
+ var providerIssue = _normalizeProviderIssue(a);
6128
+ var kind = providerIssue ? 'provider' : (a.type === 'auth_expired' ? 'error' : a.type === 'update_available' ? 'info' : 'warning');
6129
+ var icon = kind === 'provider' || kind === 'error' ? '!' : kind === 'info' ? '&#8593;' : '&#9679;';
5960
6130
  var safeId = esc(a.id).replace(/'/g, '&#39;');
5961
- var dismissBtn = '<button onclick="WE._dismissAlert(\'' + safeId + '\')" style="background:none;border:none;cursor:pointer;font-size:16px;color:#999;padding:0 4px;" title="Dismiss">&times;</button>';
6131
+ var dismissBtn = '<button class="we-service-alert-dismiss" onclick="WE._dismissAlert(\'' + safeId + '\')" title="Dismiss">&times;</button>';
5962
6132
  var actionBtn = '';
5963
6133
  if (a.action_url && /^(\/|https?:\/\/)/.test(a.action_url)) {
5964
- actionBtn = ' <a href="' + esc(a.action_url) + '" style="color:' + color + ';font-weight:600;text-decoration:underline;">Fix</a>';
6134
+ actionBtn = ' <a href="' + esc(a.action_url) + '" class="we-service-alert-action">Fix</a>';
5965
6135
  }
5966
- return '<div style="display:flex;align-items:center;gap:8px;padding:6px 0;">'
5967
- + '<span style="color:' + color + ';">' + icon + '</span>'
5968
- + '<span style="flex:1;">' + esc(a.message) + actionBtn + '</span>'
6136
+ return '<div class="we-service-alert-item ' + kind + '">'
6137
+ + '<span class="we-service-alert-icon">' + icon + '</span>'
6138
+ + '<span class="we-service-alert-text">' + esc(providerIssue ? providerIssue.message : a.message) + actionBtn + '</span>'
5969
6139
  + dismissBtn + '</div>';
5970
6140
  }).join('');
5971
6141
 
5972
- var html = '<div id="walle-service-alerts" style="background:#fff3f3;border:1px solid #ffa8a8;padding:8px 14px;margin:0 0 8px 0;border-radius:8px;font-size:13px;">'
5973
- + '<div style="font-weight:600;margin-bottom:4px;">Service Alerts</div>' + items + '</div>';
6142
+ var html = '<div id="walle-service-alerts" class="we-service-alerts">'
6143
+ + '<div class="we-service-alerts-title">Service Alerts</div>' + items + '</div>';
5974
6144
 
5975
6145
  if (existing) {
5976
6146
  existing.outerHTML = html;
@@ -5984,7 +6154,13 @@ WE._dismissAlert = function(alertId) {
5984
6154
  resolveWalleBase().then(function() {
5985
6155
  var token = window._ctmState?.token || '';
5986
6156
  fetch(WALLE_BASE + '/api/wall-e/alerts/' + encodeURIComponent(alertId) + '?token=' + token, { method: 'DELETE' })
5987
- .then(function() { checkServiceAlerts(); });
6157
+ .then(function() {
6158
+ if (/^ai_provider:/.test(String(alertId || ''))) {
6159
+ _activeProviderIssue = null;
6160
+ if (currentView === 'chat') renderChatUI();
6161
+ }
6162
+ checkServiceAlerts();
6163
+ });
5988
6164
  });
5989
6165
  };
5990
6166
 
@@ -6102,7 +6278,7 @@ WE.renderEval = function() {
6102
6278
  body.textContent = '';
6103
6279
 
6104
6280
  Promise.all([
6105
- api('/eval/dashboard').catch(function() { return { data: { providers: 0, totalEvals: 0, shadowEvals: 0, benchmarkEvals: 0, leaderboard: [] } }; }),
6281
+ api('/eval/dashboard').catch(function() { return { data: { providers: 0, modelGroups: 0, totalEvals: 0, shadowEvals: 0, benchmarkEvals: 0, trustedBenchmarkEvals: 0, trustedBenchmarkModels: 0, provisionalBenchmarkModels: 0, legacyBenchmarkModels: 0, leaderboard: [] } }; }),
6106
6282
  api('/eval/runs').catch(function() { return { data: [] }; }),
6107
6283
  api('/eval/benchmarks').catch(function() { return { data: [] }; }),
6108
6284
  ]).then(function(results) {
@@ -6120,6 +6296,35 @@ function _evalScoreColor(score) {
6120
6296
  return '#f87171';
6121
6297
  }
6122
6298
 
6299
+ function _evalBenchmarkScore(row) {
6300
+ var v = row && row.trusted_avg_score != null ? row.trusted_avg_score : row && row.avg_score;
6301
+ v = Number(v);
6302
+ return isFinite(v) ? v : 0;
6303
+ }
6304
+
6305
+ function _evalTrustStatus(row) {
6306
+ if (row && row.trust_status) return String(row.trust_status);
6307
+ var trusted = Number((row && row.trusted_evals) || 0);
6308
+ var minTrusted = Number((row && row.min_trusted_evals) || 10);
6309
+ if (trusted >= minTrusted) return 'trusted';
6310
+ return trusted > 0 ? 'provisional' : 'legacy';
6311
+ }
6312
+
6313
+ function _evalTrustColor(status) {
6314
+ if (status === 'trusted') return '#4ade80';
6315
+ if (status === 'provisional') return '#facc15';
6316
+ return '#f87171';
6317
+ }
6318
+
6319
+ function _evalCiLabel(row) {
6320
+ var low = row && row.trusted_score_confidence_low != null ? row.trusted_score_confidence_low : row && row.score_confidence_low;
6321
+ var high = row && row.trusted_score_confidence_high != null ? row.trusted_score_confidence_high : row && row.score_confidence_high;
6322
+ low = Number(low);
6323
+ high = Number(high);
6324
+ if (!isFinite(low) || !isFinite(high)) return '—';
6325
+ return low.toFixed(2) + '-' + high.toFixed(2);
6326
+ }
6327
+
6123
6328
  function _evalMakeCell(text, opts) {
6124
6329
  var td = document.createElement('td');
6125
6330
  td.style.cssText = 'padding:' + ((opts && opts.pad) || '6px 8px') + ';' + ((opts && opts.center) ? 'text-align:center;' : '');
@@ -6140,12 +6345,15 @@ function _renderEvalDashboard(body, dashboard, runs, benchmarks) {
6140
6345
 
6141
6346
  // Stats grid
6142
6347
  var stats = document.createElement('div');
6143
- stats.style.cssText = 'display:grid;grid-template-columns:repeat(4,1fr);gap:12px;margin-bottom:16px;';
6348
+ stats.style.cssText = 'display:grid;grid-template-columns:repeat(auto-fit,minmax(120px,1fr));gap:12px;margin-bottom:16px;';
6144
6349
  var statItems = [
6145
6350
  { label: 'Providers', value: dashboard.providers || 0 },
6351
+ { label: 'Models', value: dashboard.modelGroups || (dashboard.leaderboard || []).length || 0 },
6146
6352
  { label: 'Total Evals', value: dashboard.totalEvals || 0 },
6147
6353
  { label: 'Shadow Evals', value: dashboard.shadowEvals || 0 },
6148
6354
  { label: 'Benchmark Evals', value: dashboard.benchmarkEvals || 0 },
6355
+ { label: 'Trusted Evals', value: dashboard.trustedBenchmarkEvals || 0 },
6356
+ { label: 'Trusted Models', value: dashboard.trustedBenchmarkModels || 0 },
6149
6357
  ];
6150
6358
  statItems.forEach(function(item) {
6151
6359
  var card = document.createElement('div');
@@ -6162,6 +6370,15 @@ function _renderEvalDashboard(body, dashboard, runs, benchmarks) {
6162
6370
  });
6163
6371
  body.appendChild(stats);
6164
6372
 
6373
+ if ((dashboard.provisionalBenchmarkModels || 0) || (dashboard.legacyBenchmarkModels || 0)) {
6374
+ var trustNote = document.createElement('div');
6375
+ trustNote.style.cssText = 'font-size:11px;color:var(--text-dim);margin:-6px 0 14px;';
6376
+ trustNote.textContent = 'Benchmark trust: ' + (dashboard.trustedBenchmarkModels || 0) + ' trusted, ' +
6377
+ (dashboard.provisionalBenchmarkModels || 0) + ' provisional, ' +
6378
+ (dashboard.legacyBenchmarkModels || 0) + ' legacy model groups.';
6379
+ body.appendChild(trustNote);
6380
+ }
6381
+
6165
6382
  // Sub-nav tabs
6166
6383
  var nav = document.createElement('div');
6167
6384
  nav.className = 'we-eval-subnav';
@@ -6228,6 +6445,7 @@ function _renderEvalLeaderboard(container, dashboard) {
6228
6445
 
6229
6446
  lb.forEach(function(row) {
6230
6447
  if (!row.categories) return;
6448
+ var compositeScore = _evalBenchmarkScore(row);
6231
6449
  var card = document.createElement('div');
6232
6450
  card.style.cssText = 'background:var(--bg-secondary,#1a1a2e);border:1px solid var(--border);border-radius:8px;padding:10px 12px;min-width:180px;flex:1;max-width:240px;';
6233
6451
 
@@ -6238,8 +6456,8 @@ function _renderEvalLeaderboard(container, dashboard) {
6238
6456
  card.appendChild(modelName);
6239
6457
 
6240
6458
  var compositeLabel = document.createElement('div');
6241
- compositeLabel.style.cssText = 'font-size:20px;font-weight:700;color:' + _evalScoreColor(row.avg_score || 0) + ';margin-bottom:6px;font-variant-numeric:tabular-nums;';
6242
- compositeLabel.textContent = (row.avg_score || 0).toFixed(3);
6459
+ compositeLabel.style.cssText = 'font-size:20px;font-weight:700;color:' + _evalScoreColor(compositeScore) + ';margin-bottom:6px;font-variant-numeric:tabular-nums;';
6460
+ compositeLabel.textContent = compositeScore.toFixed(3);
6243
6461
  card.appendChild(compositeLabel);
6244
6462
 
6245
6463
  // SVG radar chart (small multiple)
@@ -6290,9 +6508,9 @@ function _renderEvalLeaderboard(container, dashboard) {
6290
6508
  });
6291
6509
  var dataPoly = document.createElementNS(ns, 'polygon');
6292
6510
  dataPoly.setAttribute('points', dataPts.join(' '));
6293
- dataPoly.setAttribute('fill', _evalScoreColor(row.avg_score || 0));
6511
+ dataPoly.setAttribute('fill', _evalScoreColor(compositeScore));
6294
6512
  dataPoly.setAttribute('fill-opacity', '0.2');
6295
- dataPoly.setAttribute('stroke', _evalScoreColor(row.avg_score || 0));
6513
+ dataPoly.setAttribute('stroke', _evalScoreColor(compositeScore));
6296
6514
  dataPoly.setAttribute('stroke-width', '1.5');
6297
6515
  svg.appendChild(dataPoly);
6298
6516
 
@@ -6306,7 +6524,7 @@ function _renderEvalLeaderboard(container, dashboard) {
6306
6524
  dot.setAttribute('cx', dx);
6307
6525
  dot.setAttribute('cy', dy);
6308
6526
  dot.setAttribute('r', '2.5');
6309
- dot.setAttribute('fill', _evalScoreColor(row.avg_score || 0));
6527
+ dot.setAttribute('fill', _evalScoreColor(compositeScore));
6310
6528
  svg.appendChild(dot);
6311
6529
 
6312
6530
  // Label
@@ -6425,8 +6643,8 @@ function _renderEvalLeaderboard(container, dashboard) {
6425
6643
  var table = document.createElement('table');
6426
6644
  table.style.cssText = 'width:100%;border-collapse:collapse;font-size:12px;';
6427
6645
  var hdrRow = document.createElement('tr');
6428
- var headers = ['Provider','Model','Composite','Evals','Errors','Throughput','Latency','Cost'];
6429
- if (hasDims) headers.splice(3, 0, 'Code Gen', 'Tool Use', 'Planning', 'Efficiency');
6646
+ var headers = ['Provider','Model','Composite','Trust','CI','Evals','Errors','Throughput','Latency','Cost'];
6647
+ if (hasDims) headers.splice(5, 0, 'Code Gen', 'Tool Use', 'Planning', 'Efficiency');
6430
6648
  headers.forEach(function(h) {
6431
6649
  var th = document.createElement('th');
6432
6650
  th.style.cssText = 'text-align:left;padding:8px;border-bottom:1px solid var(--border);font-size:11px;';
@@ -6437,11 +6655,27 @@ function _renderEvalLeaderboard(container, dashboard) {
6437
6655
  table.appendChild(hdrRow);
6438
6656
  var tbody = document.createElement('tbody');
6439
6657
  lb.forEach(function(row, idx) {
6658
+ var compositeScore = _evalBenchmarkScore(row);
6659
+ var trustStatus = _evalTrustStatus(row);
6660
+ var trustedEvals = Number(row.trusted_evals || 0);
6661
+ var minTrusted = Number(row.min_trusted_evals || 10);
6440
6662
  var tr = document.createElement('tr');
6441
6663
  tr.style.cssText = idx % 2 === 0 ? '' : 'background:rgba(255,255,255,0.02);';
6442
6664
  tr.appendChild(_evalMakeCell(row.provider));
6443
6665
  tr.appendChild(_evalMakeCell(row.model));
6444
- tr.appendChild(_evalMakeCell((row.avg_score || 0).toFixed(3), { center: true, scoreColor: true }));
6666
+ tr.appendChild(_evalMakeCell(compositeScore.toFixed(3), { center: true, scoreColor: true }));
6667
+ var trustCell = document.createElement('td');
6668
+ trustCell.style.cssText = 'padding:6px 8px;text-align:center;';
6669
+ var trustBadge = document.createElement('span');
6670
+ trustBadge.style.cssText = 'font-size:9px;padding:1px 5px;border-radius:3px;background:' + _evalTrustColor(trustStatus) + '22;color:' + _evalTrustColor(trustStatus) + ';font-weight:600;text-transform:uppercase;';
6671
+ trustBadge.textContent = trustStatus;
6672
+ var trustMeta = document.createElement('div');
6673
+ trustMeta.style.cssText = 'font-size:9px;color:var(--text-dim);margin-top:2px;';
6674
+ trustMeta.textContent = trustedEvals + '/' + minTrusted;
6675
+ trustCell.appendChild(trustBadge);
6676
+ trustCell.appendChild(trustMeta);
6677
+ tr.appendChild(trustCell);
6678
+ tr.appendChild(_evalMakeCell(_evalCiLabel(row), { center: true }));
6445
6679
  if (hasDims && row.categories) {
6446
6680
  ['codeGen', 'toolUse', 'planning', 'efficiency'].forEach(function(k) {
6447
6681
  tr.appendChild(_evalMakeCell((row.categories[k] || 0).toFixed(2), { center: true, scoreColor: true }));
@@ -6544,13 +6778,31 @@ function _renderEvalBenchmarks(container, benchmarks) {
6544
6778
  name.textContent = suite.name;
6545
6779
  var count = document.createElement('span');
6546
6780
  count.style.cssText = 'font-size:11px;color:var(--text-dim);';
6547
- count.textContent = suite.count + ' prompts';
6781
+ if (suite.adapter) {
6782
+ count.textContent = suite.count ? suite.count + ' tasks' : 'adapter suite';
6783
+ } else {
6784
+ count.textContent = (suite.count || 0) + ' prompts';
6785
+ }
6548
6786
  top.appendChild(name);
6549
6787
  top.appendChild(count);
6550
6788
  card.appendChild(top);
6551
6789
 
6552
6790
  var tags = document.createElement('div');
6553
6791
  tags.style.cssText = 'margin-bottom:8px;';
6792
+ if (suite.adapter) {
6793
+ var adapterTag = document.createElement('span');
6794
+ adapterTag.className = 'we-eval-diff-tag';
6795
+ adapterTag.style.cssText = 'background:#60a5fa;color:#06121f;margin-right:4px;';
6796
+ adapterTag.textContent = 'ADAPTER';
6797
+ tags.appendChild(adapterTag);
6798
+ }
6799
+ if (suite.datasetVersion) {
6800
+ var datasetTag = document.createElement('span');
6801
+ datasetTag.className = 'we-eval-diff-tag';
6802
+ datasetTag.style.cssText = 'background:#2dd4bf;color:#06121f;margin-right:4px;';
6803
+ datasetTag.textContent = String(suite.datasetVersion).toUpperCase();
6804
+ tags.appendChild(datasetTag);
6805
+ }
6554
6806
  (suite.difficulties || []).forEach(function(d) {
6555
6807
  var tag = document.createElement('span');
6556
6808
  tag.className = 'we-eval-diff-tag';
@@ -7317,12 +7569,13 @@ function _renderEvalComparison(container, dashboard) {
7317
7569
  chartWrap.style.cssText = 'margin-bottom:20px;';
7318
7570
  var chartTitle = document.createElement('div');
7319
7571
  chartTitle.style.cssText = 'font-size:12px;color:var(--text-dim);margin-bottom:8px;';
7320
- chartTitle.textContent = 'Composite Score by Model';
7572
+ chartTitle.textContent = 'Composite Score by Model (trusted mean when available)';
7321
7573
  chartWrap.appendChild(chartTitle);
7322
7574
 
7323
- var maxScore = Math.max.apply(null, lb.map(function(r) { return r.avg_score || 0; }));
7324
- lb.sort(function(a, b) { return (b.avg_score || 0) - (a.avg_score || 0); });
7575
+ var maxScore = Math.max.apply(null, lb.map(function(r) { return _evalBenchmarkScore(r); }));
7576
+ lb.sort(function(a, b) { return _evalBenchmarkScore(b) - _evalBenchmarkScore(a); });
7325
7577
  lb.forEach(function(row) {
7578
+ var rowScore = _evalBenchmarkScore(row);
7326
7579
  var barRow = document.createElement('div');
7327
7580
  barRow.style.cssText = 'display:flex;align-items:center;margin-bottom:4px;gap:8px;';
7328
7581
 
@@ -7334,13 +7587,13 @@ function _renderEvalComparison(container, dashboard) {
7334
7587
  var barBg = document.createElement('div');
7335
7588
  barBg.style.cssText = 'flex:1;height:18px;background:var(--border);border-radius:3px;overflow:hidden;position:relative;';
7336
7589
  var barFill = document.createElement('div');
7337
- var pct = maxScore > 0 ? ((row.avg_score || 0) / maxScore) * 100 : 0;
7338
- barFill.style.cssText = 'height:100%;background:' + _evalScoreColor(row.avg_score || 0) + ';width:' + pct + '%;border-radius:3px;transition:width 0.3s;';
7590
+ var pct = maxScore > 0 ? (rowScore / maxScore) * 100 : 0;
7591
+ barFill.style.cssText = 'height:100%;background:' + _evalScoreColor(rowScore) + ';width:' + pct + '%;border-radius:3px;transition:width 0.3s;';
7339
7592
  barBg.appendChild(barFill);
7340
7593
 
7341
7594
  var scoreLabel = document.createElement('div');
7342
- scoreLabel.style.cssText = 'width:60px;font-size:11px;font-weight:600;color:' + _evalScoreColor(row.avg_score || 0) + ';font-variant-numeric:tabular-nums;';
7343
- scoreLabel.textContent = (row.avg_score || 0).toFixed(3);
7595
+ scoreLabel.style.cssText = 'width:60px;font-size:11px;font-weight:600;color:' + _evalScoreColor(rowScore) + ';font-variant-numeric:tabular-nums;';
7596
+ scoreLabel.textContent = rowScore.toFixed(3);
7344
7597
 
7345
7598
  barRow.appendChild(label);
7346
7599
  barRow.appendChild(barBg);
@@ -7460,16 +7713,17 @@ function _renderEvalComparison(container, dashboard) {
7460
7713
  costTable.appendChild(cHdr);
7461
7714
 
7462
7715
  var ranked = lb.slice().sort(function(a, b) {
7463
- var aEff = (a.total_cost || 0) > 0 ? (a.avg_score || 0) / a.total_cost : 999999;
7464
- var bEff = (b.total_cost || 0) > 0 ? (b.avg_score || 0) / b.total_cost : 999999;
7716
+ var aEff = (a.total_cost || 0) > 0 ? _evalBenchmarkScore(a) / a.total_cost : 999999;
7717
+ var bEff = (b.total_cost || 0) > 0 ? _evalBenchmarkScore(b) / b.total_cost : 999999;
7465
7718
  return bEff - aEff;
7466
7719
  });
7467
7720
  ranked.forEach(function(row, i) {
7721
+ var rowScore = _evalBenchmarkScore(row);
7468
7722
  var tr = document.createElement('tr');
7469
- var eff = (row.total_cost || 0) > 0 ? ((row.avg_score || 0) / row.total_cost).toFixed(1) : 'free';
7723
+ var eff = (row.total_cost || 0) > 0 ? (rowScore / row.total_cost).toFixed(1) : 'free';
7470
7724
  tr.appendChild(_evalMakeCell('#' + (i + 1)));
7471
7725
  tr.appendChild(_evalMakeCell((row.provider || '') + '/' + (row.model || '')));
7472
- tr.appendChild(_evalMakeCell((row.avg_score || 0).toFixed(3), { center: true, scoreColor: true }));
7726
+ tr.appendChild(_evalMakeCell(rowScore.toFixed(3), { center: true, scoreColor: true }));
7473
7727
  tr.appendChild(_evalMakeCell('$' + (row.total_cost || 0).toFixed(4), { center: true }));
7474
7728
  tr.appendChild(_evalMakeCell(eff, { center: true }));
7475
7729
  costTable.appendChild(tr);