@darkrishabh/bench-ai 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +333 -0
  2. package/dist/cli/app.d.ts +11 -0
  3. package/dist/cli/app.d.ts.map +1 -0
  4. package/dist/cli/app.js +48 -0
  5. package/dist/cli/app.js.map +1 -0
  6. package/dist/cli/components/DiffView.d.ts +5 -0
  7. package/dist/cli/components/DiffView.d.ts.map +1 -0
  8. package/dist/cli/components/DiffView.js +14 -0
  9. package/dist/cli/components/DiffView.js.map +1 -0
  10. package/dist/cli/components/EvalView.d.ts +6 -0
  11. package/dist/cli/components/EvalView.d.ts.map +1 -0
  12. package/dist/cli/components/EvalView.js +82 -0
  13. package/dist/cli/components/EvalView.js.map +1 -0
  14. package/dist/cli/components/Spinner.d.ts +4 -0
  15. package/dist/cli/components/Spinner.d.ts.map +1 -0
  16. package/dist/cli/components/Spinner.js +15 -0
  17. package/dist/cli/components/Spinner.js.map +1 -0
  18. package/dist/cli/index.d.ts +3 -0
  19. package/dist/cli/index.d.ts.map +1 -0
  20. package/dist/cli/index.js +117 -0
  21. package/dist/cli/index.js.map +1 -0
  22. package/dist/cli/run-command.d.ts +11 -0
  23. package/dist/cli/run-command.d.ts.map +1 -0
  24. package/dist/cli/run-command.js +119 -0
  25. package/dist/cli/run-command.js.map +1 -0
  26. package/dist/engine/cost.d.ts +3 -0
  27. package/dist/engine/cost.d.ts.map +1 -0
  28. package/dist/engine/cost.js +52 -0
  29. package/dist/engine/cost.js.map +1 -0
  30. package/dist/engine/diff.d.ts +6 -0
  31. package/dist/engine/diff.d.ts.map +1 -0
  32. package/dist/engine/diff.js +43 -0
  33. package/dist/engine/diff.js.map +1 -0
  34. package/dist/engine/eval.d.ts +14 -0
  35. package/dist/engine/eval.d.ts.map +1 -0
  36. package/dist/engine/eval.js +194 -0
  37. package/dist/engine/eval.js.map +1 -0
  38. package/dist/engine/index.d.ts +15 -0
  39. package/dist/engine/index.d.ts.map +1 -0
  40. package/dist/engine/index.js +10 -0
  41. package/dist/engine/index.js.map +1 -0
  42. package/dist/engine/providers/base.d.ts +7 -0
  43. package/dist/engine/providers/base.d.ts.map +1 -0
  44. package/dist/engine/providers/base.js +2 -0
  45. package/dist/engine/providers/base.js.map +1 -0
  46. package/dist/engine/providers/claude.d.ts +15 -0
  47. package/dist/engine/providers/claude.d.ts.map +1 -0
  48. package/dist/engine/providers/claude.js +53 -0
  49. package/dist/engine/providers/claude.js.map +1 -0
  50. package/dist/engine/providers/minimax.d.ts +16 -0
  51. package/dist/engine/providers/minimax.d.ts.map +1 -0
  52. package/dist/engine/providers/minimax.js +67 -0
  53. package/dist/engine/providers/minimax.js.map +1 -0
  54. package/dist/engine/providers/ollama.d.ts +14 -0
  55. package/dist/engine/providers/ollama.d.ts.map +1 -0
  56. package/dist/engine/providers/ollama.js +60 -0
  57. package/dist/engine/providers/ollama.js.map +1 -0
  58. package/dist/engine/providers/openai-compatible.d.ts +19 -0
  59. package/dist/engine/providers/openai-compatible.d.ts.map +1 -0
  60. package/dist/engine/providers/openai-compatible.js +109 -0
  61. package/dist/engine/providers/openai-compatible.js.map +1 -0
  62. package/dist/engine/providers/subprocess.d.ts +55 -0
  63. package/dist/engine/providers/subprocess.d.ts.map +1 -0
  64. package/dist/engine/providers/subprocess.js +111 -0
  65. package/dist/engine/providers/subprocess.js.map +1 -0
  66. package/dist/engine/suite-loader.d.ts +11 -0
  67. package/dist/engine/suite-loader.d.ts.map +1 -0
  68. package/dist/engine/suite-loader.js +75 -0
  69. package/dist/engine/suite-loader.js.map +1 -0
  70. package/dist/engine/types.d.ts +104 -0
  71. package/dist/engine/types.d.ts.map +1 -0
  72. package/dist/engine/types.js +2 -0
  73. package/dist/engine/types.js.map +1 -0
  74. package/next-env.d.ts +6 -0
  75. package/next.config.ts +26 -0
  76. package/package.json +72 -0
  77. package/public/icon.svg +14 -0
  78. package/src/app/api/diff/route.ts +135 -0
  79. package/src/app/api/models/route.ts +96 -0
  80. package/src/app/api/suite/route.ts +314 -0
  81. package/src/app/globals.css +215 -0
  82. package/src/app/icon.svg +14 -0
  83. package/src/app/layout.tsx +44 -0
  84. package/src/app/opengraph-image.tsx +73 -0
  85. package/src/app/page.tsx +952 -0
  86. package/src/app/suite/layout.tsx +12 -0
  87. package/src/app/suite/page.tsx +206 -0
  88. package/src/app/twitter-image.tsx +1 -0
  89. package/src/components/BenchAiLogo.tsx +38 -0
  90. package/src/components/ComparePanel.tsx +643 -0
  91. package/src/components/ConfigPanel.tsx +809 -0
  92. package/src/components/MarkdownOutput.tsx +16 -0
  93. package/src/components/ModelResponseCard.tsx +313 -0
  94. package/src/components/QuickComparisonBar.tsx +184 -0
  95. package/src/components/ResponsesLineDiff.tsx +149 -0
  96. package/src/components/SettingsPanel.tsx +591 -0
  97. package/src/components/SuitePanel.tsx +875 -0
  98. package/src/lib/brand.ts +4 -0
  99. package/src/lib/config-yaml.ts +70 -0
  100. package/src/lib/consume-suite-sse.ts +70 -0
  101. package/src/lib/describe-judge.ts +23 -0
  102. package/src/lib/model-chip-palette.ts +9 -0
  103. package/src/lib/openai-model-list.ts +33 -0
  104. package/src/lib/provider-ui.ts +30 -0
  105. package/src/lib/resolve-credentials.ts +80 -0
  106. package/src/lib/run-history.ts +66 -0
  107. package/src/lib/simple-line-diff.ts +50 -0
  108. package/src/lib/storage.ts +100 -0
  109. package/src/lib/suite-judge-meta.ts +13 -0
  110. package/src/lib/suite-run-history.ts +81 -0
  111. package/src/types.ts +170 -0
  112. package/vercel.json +5 -0
@@ -0,0 +1,952 @@
1
+ "use client";
2
+
3
+ import Link from "next/link";
4
+ import { useState, useEffect, useRef, useMemo } from "react";
5
+ import type { JudgeSettings, LLMInstance, SecretsMap, WebDiffResult, WebProviderResult } from "../types";
6
+ import { DEFAULT_JUDGE_SETTINGS } from "../types";
7
+ import {
8
+ loadInstances,
9
+ saveInstances,
10
+ loadSecrets,
11
+ saveSecrets,
12
+ loadJudgeSettings,
13
+ saveJudgeSettings,
14
+ } from "../lib/storage";
15
+ import { BRAND_NAME, BRAND_TAGLINE } from "../lib/brand";
16
+ import { BenchAiLogo } from "../components/BenchAiLogo";
17
+ import { MODEL_CHIP_PALETTE } from "../lib/model-chip-palette";
18
+ import { resolveInstancesForApi } from "../lib/resolve-credentials";
19
+ import { SettingsPanel } from "../components/SettingsPanel";
20
+ import { ComparePanel } from "../components/ComparePanel";
21
+ import { ModelResponseCard } from "../components/ModelResponseCard";
22
+ import { QuickComparisonBar } from "../components/QuickComparisonBar";
23
+ import { ResponsesLineDiff } from "../components/ResponsesLineDiff";
24
+ import { appendRunHistory, loadRunHistory } from "../lib/run-history";
25
+ import type { RunHistoryEntry } from "../lib/run-history";
26
+
27
+ const RATINGS_KEY = "bench-ai:response-ratings";
28
+ const LEGACY_PROMPT_DIFF_RATINGS = "prompt-diff:response-ratings";
29
+ const LEGACY_LLM_DIFF_RATINGS = "llm-diff:response-ratings";
30
+
31
+ type MainTab = "responses" | "compare" | "history";
32
+ type ResponsesView = "grid" | "sideBySide" | "diff";
33
+
34
+ function ratingKey(ranAt: string, instanceId: string) {
35
+ return `${ranAt}|${instanceId}`;
36
+ }
37
+
38
+ type BadgeVariant = "good" | "warn" | "muted";
39
+
40
+ function computeCardHighlights(
41
+ results: WebProviderResult[],
42
+ ratings: Record<string, number>,
43
+ runAt: string
44
+ ): {
45
+ badgeById: Map<string, { label: string; variant: BadgeVariant }>;
46
+ latencyTone: (r: WebProviderResult) => "fast" | "slow" | "neutral";
47
+ } {
48
+ const valid = results.filter((r) => !r.error && r.output.length > 0);
49
+ const rk = (id: string) => ratingKey(runAt, id);
50
+
51
+ const latency = valid.map((r) => r.latencyMs);
52
+ const maxL = latency.length ? Math.max(...latency) : 0;
53
+ const minL = latency.length ? Math.min(...latency) : 0;
54
+ const costs = valid.map((r) => r.costUsd);
55
+ const minC = costs.length ? Math.min(...costs) : 0;
56
+
57
+ let bestStar = 0;
58
+ for (const r of valid) {
59
+ bestStar = Math.max(bestStar, ratings[rk(r.instanceId)] ?? 0);
60
+ }
61
+
62
+ const badgeById = new Map<string, { label: string; variant: BadgeVariant }>();
63
+
64
+ for (const r of valid) {
65
+ const key = rk(r.instanceId);
66
+ const opts: { label: string; variant: BadgeVariant; priority: number }[] = [];
67
+ if (bestStar > 0 && (ratings[key] ?? 0) === bestStar) {
68
+ opts.push({ label: "best rated", variant: "muted", priority: 4 });
69
+ }
70
+ if (valid.length > 1 && r.latencyMs === minL && minL !== maxL) {
71
+ opts.push({ label: "fastest", variant: "good", priority: 3 });
72
+ }
73
+ if (r.costUsd === minC) {
74
+ opts.push({ label: "cheapest", variant: "muted", priority: 2 });
75
+ }
76
+ if (valid.length > 1 && r.latencyMs === maxL && minL !== maxL) {
77
+ opts.push({ label: "slowest", variant: "warn", priority: 1 });
78
+ }
79
+ opts.sort((a, b) => b.priority - a.priority);
80
+ if (opts[0]) badgeById.set(r.instanceId, { label: opts[0].label, variant: opts[0].variant });
81
+ }
82
+
83
+ const latencyTone = (r: WebProviderResult): "fast" | "slow" | "neutral" => {
84
+ if (r.error || valid.length < 2) return "neutral";
85
+ if (r.latencyMs === minL && minL !== maxL) return "fast";
86
+ if (r.latencyMs === maxL && minL !== maxL) return "slow";
87
+ return "neutral";
88
+ };
89
+
90
+ return { badgeById, latencyTone };
91
+ }
92
+
93
+ function TabBar({
94
+ active,
95
+ onChange,
96
+ }: {
97
+ active: MainTab;
98
+ onChange: (t: MainTab) => void;
99
+ }) {
100
+ const tabs: { id: MainTab; label: string }[] = [
101
+ { id: "responses", label: "Responses" },
102
+ { id: "compare", label: "Compare & evaluate" },
103
+ { id: "history", label: "History" },
104
+ ];
105
+
106
+ return (
107
+ <nav
108
+ style={{
109
+ display: "flex",
110
+ gap: "0.15rem",
111
+ borderBottom: "1px solid var(--border)",
112
+ paddingBottom: 0,
113
+ width: "100%",
114
+ maxWidth: "100%",
115
+ overflowX: "auto",
116
+ }}
117
+ aria-label="Main"
118
+ >
119
+ {tabs.map((t) => {
120
+ const on = active === t.id;
121
+ return (
122
+ <button
123
+ key={t.id}
124
+ type="button"
125
+ onClick={() => onChange(t.id)}
126
+ style={{
127
+ padding: "0.55rem 0.2rem",
128
+ marginRight: "1.35rem",
129
+ marginBottom: -1,
130
+ border: "none",
131
+ borderBottom: on ? "2px solid var(--text-1)" : "2px solid transparent",
132
+ background: "transparent",
133
+ color: on ? "var(--text-1)" : "var(--text-3)",
134
+ fontWeight: on ? 600 : 500,
135
+ fontSize: "0.875rem",
136
+ cursor: "pointer",
137
+ transition: "color 0.15s, border-color 0.15s",
138
+ fontFamily: "inherit",
139
+ whiteSpace: "nowrap",
140
+ }}
141
+ >
142
+ {t.label}
143
+ </button>
144
+ );
145
+ })}
146
+ </nav>
147
+ );
148
+ }
149
+
150
+ function ViewToggle({
151
+ value,
152
+ onChange,
153
+ }: {
154
+ value: ResponsesView;
155
+ onChange: (v: ResponsesView) => void;
156
+ }) {
157
+ const opts: { id: ResponsesView; label: string }[] = [
158
+ { id: "grid", label: "Grid" },
159
+ { id: "sideBySide", label: "Side-by-side" },
160
+ { id: "diff", label: "Diff" },
161
+ ];
162
+ return (
163
+ <div
164
+ role="group"
165
+ aria-label="Response layout"
166
+ style={{
167
+ display: "inline-flex",
168
+ padding: 4,
169
+ borderRadius: 12,
170
+ background: "var(--surface-muted)",
171
+ border: "1px solid var(--border)",
172
+ gap: 3,
173
+ }}
174
+ >
175
+ {opts.map((o) => {
176
+ const on = value === o.id;
177
+ return (
178
+ <button
179
+ key={o.id}
180
+ type="button"
181
+ onClick={() => onChange(o.id)}
182
+ style={{
183
+ padding: "0.4rem 0.85rem",
184
+ borderRadius: 9,
185
+ border: "none",
186
+ background: on ? "var(--surface)" : "transparent",
187
+ color: on ? "var(--text-1)" : "var(--text-3)",
188
+ fontWeight: on ? 600 : 500,
189
+ fontSize: "0.78rem",
190
+ cursor: "pointer",
191
+ fontFamily: "inherit",
192
+ boxShadow: on ? "var(--shadow-xs)" : "none",
193
+ }}
194
+ >
195
+ {o.label}
196
+ </button>
197
+ );
198
+ })}
199
+ </div>
200
+ );
201
+ }
202
+
203
+ export default function Home() {
204
+ const [prompt, setPrompt] = useState("");
205
+ const [instances, setInstances] = useState<LLMInstance[]>([]);
206
+ const [secrets, setSecrets] = useState<SecretsMap>({});
207
+ const [judge, setJudge] = useState<JudgeSettings>(DEFAULT_JUDGE_SETTINGS);
208
+ const [configOpen, setConfigOpen] = useState(false);
209
+ const [loading, setLoading] = useState(false);
210
+ const [result, setResult] = useState<WebDiffResult | null>(null);
211
+ const [error, setError] = useState<string | null>(null);
212
+ const [tab, setTab] = useState<MainTab>("responses");
213
+ const [responsesView, setResponsesView] = useState<ResponsesView>("grid");
214
+ const [ratings, setRatings] = useState<Record<string, number>>({});
215
+ const [diffLeftId, setDiffLeftId] = useState("");
216
+ const [diffRightId, setDiffRightId] = useState("");
217
+ const [historyVersion, setHistoryVersion] = useState(0);
218
+ const textareaRef = useRef<HTMLTextAreaElement>(null);
219
+ const appendedHistoryForRun = useRef<string | null>(null);
220
+
221
+ useEffect(() => {
222
+ setInstances(loadInstances());
223
+ setSecrets(loadSecrets());
224
+ setJudge(loadJudgeSettings());
225
+ try {
226
+ let raw = sessionStorage.getItem(RATINGS_KEY);
227
+ if (!raw) {
228
+ for (const lk of [LEGACY_PROMPT_DIFF_RATINGS, LEGACY_LLM_DIFF_RATINGS]) {
229
+ raw = sessionStorage.getItem(lk);
230
+ if (raw) {
231
+ sessionStorage.setItem(RATINGS_KEY, raw);
232
+ break;
233
+ }
234
+ }
235
+ }
236
+ if (raw) {
237
+ const parsed = JSON.parse(raw) as Record<string, number>;
238
+ if (parsed && typeof parsed === "object") setRatings(parsed);
239
+ }
240
+ } catch {
241
+ /* ignore */
242
+ }
243
+ }, []);
244
+
245
+ useEffect(() => {
246
+ if (!result?.results.length) return;
247
+ const ok = result.results.filter((r) => !r.error && r.output.trim());
248
+ if (ok.length >= 2) {
249
+ setDiffLeftId(ok[0].instanceId);
250
+ setDiffRightId(ok[1].instanceId);
251
+ } else if (ok.length === 1) {
252
+ setDiffLeftId(ok[0].instanceId);
253
+ setDiffRightId(ok[0].instanceId);
254
+ }
255
+ }, [result?.ranAt]);
256
+
257
+ useEffect(() => {
258
+ if (!result) return;
259
+ if (appendedHistoryForRun.current === result.ranAt) return;
260
+ appendedHistoryForRun.current = result.ranAt;
261
+ appendRunHistory(result);
262
+ setHistoryVersion((v) => v + 1);
263
+ }, [result]);
264
+
265
+ const historyEntries = useMemo(() => loadRunHistory(), [historyVersion]);
266
+
267
+ const persistRating = (next: Record<string, number>) => {
268
+ try {
269
+ sessionStorage.setItem(RATINGS_KEY, JSON.stringify(next));
270
+ } catch {
271
+ /* ignore */
272
+ }
273
+ };
274
+
275
+ const setStarRating = (runAt: string, instanceId: string, stars: number) => {
276
+ const k = ratingKey(runAt, instanceId);
277
+ setRatings((prev) => {
278
+ const out = { ...prev };
279
+ if (stars <= 0) delete out[k];
280
+ else out[k] = stars;
281
+ persistRating(out);
282
+ return out;
283
+ });
284
+ };
285
+
286
+ const updateInstances = (next: LLMInstance[]) => {
287
+ setInstances(next);
288
+ saveInstances(next);
289
+ };
290
+
291
+ const updateSecrets = (next: SecretsMap) => {
292
+ setSecrets(next);
293
+ saveSecrets(next);
294
+ };
295
+
296
+ const updateJudge = (next: JudgeSettings) => {
297
+ setJudge(next);
298
+ saveJudgeSettings(next);
299
+ };
300
+
301
+ const enabled = instances.filter((i) => i.enabled);
302
+
303
+ const newRun = () => {
304
+ setResult(null);
305
+ setError(null);
306
+ setTab("responses");
307
+ setResponsesView("grid");
308
+ };
309
+
310
+ const restoreHistoryEntry = (entry: RunHistoryEntry) => {
311
+ setResult(entry.result);
312
+ setPrompt(entry.result.prompt);
313
+ setTab("responses");
314
+ setResponsesView("grid");
315
+ appendedHistoryForRun.current = entry.result.ranAt;
316
+ };
317
+
318
+ const run = async () => {
319
+ if (!prompt.trim() || enabled.length === 0 || loading) return;
320
+ setLoading(true);
321
+ setError(null);
322
+ setResult(null);
323
+ setTab("responses");
324
+ setResponsesView("grid");
325
+ try {
326
+ const res = await fetch("/api/diff", {
327
+ method: "POST",
328
+ headers: { "Content-Type": "application/json" },
329
+ body: JSON.stringify({
330
+ prompt,
331
+ instances: resolveInstancesForApi(instances, secrets),
332
+ }),
333
+ });
334
+ if (!res.ok) throw new Error(await res.text());
335
+ setResult(await res.json());
336
+ } catch (err) {
337
+ setError(err instanceof Error ? err.message : String(err));
338
+ } finally {
339
+ setLoading(false);
340
+ }
341
+ };
342
+
343
+ const highlights = result
344
+ ? computeCardHighlights(result.results, ratings, result.ranAt)
345
+ : null;
346
+
347
+ const wallClockSec = useMemo(() => {
348
+ if (!result) return 0;
349
+ const ms = result.results.filter((r) => !r.error).map((r) => r.latencyMs);
350
+ if (!ms.length) return 0;
351
+ return Math.max(...ms) / 1000;
352
+ }, [result]);
353
+
354
+ const showQuickBar = Boolean(result && tab === "responses");
355
+ const mainPadBottom = showQuickBar ? "5.5rem" : "3rem";
356
+
357
+ return (
358
+ <div style={{ minHeight: "100vh", background: "var(--bg-gradient)" }}>
359
+ <header
360
+ style={{
361
+ background: "var(--surface)",
362
+ borderBottom: "1px solid var(--border)",
363
+ position: "sticky",
364
+ top: 0,
365
+ zIndex: 30,
366
+ boxShadow: "var(--shadow-xs)",
367
+ }}
368
+ >
369
+ <div
370
+ style={{
371
+ maxWidth: 1120,
372
+ margin: "0 auto",
373
+ padding: "0 1.5rem",
374
+ minHeight: 58,
375
+ display: "flex",
376
+ alignItems: "center",
377
+ justifyContent: "space-between",
378
+ gap: "1rem",
379
+ }}
380
+ >
381
+ <div style={{ display: "flex", alignItems: "center", gap: "0.65rem", flexWrap: "wrap", minWidth: 0 }}>
382
+ <BenchAiLogo size={30} />
383
+ <span
384
+ style={{
385
+ fontWeight: 700,
386
+ fontSize: "1.05rem",
387
+ letterSpacing: "-0.03em",
388
+ color: "var(--text-1)",
389
+ }}
390
+ >
391
+ {BRAND_NAME}
392
+ </span>
393
+ <span
394
+ style={{
395
+ fontSize: "0.65rem",
396
+ fontWeight: 600,
397
+ color: "var(--text-3)",
398
+ letterSpacing: "0.04em",
399
+ }}
400
+ >
401
+ v0.1
402
+ </span>
403
+ </div>
404
+
405
+ <div style={{ display: "flex", alignItems: "center", gap: "0.55rem", flexShrink: 0, flexWrap: "wrap", justifyContent: "flex-end" }}>
406
+ <Link
407
+ href="/suite"
408
+ style={{
409
+ fontSize: "0.8125rem",
410
+ fontWeight: 600,
411
+ color: "var(--text-2)",
412
+ textDecoration: "none",
413
+ padding: "0.45rem 0.85rem",
414
+ borderRadius: "var(--r-md)",
415
+ border: "1px solid var(--border)",
416
+ background: "var(--surface)",
417
+ boxShadow: "var(--shadow-xs)",
418
+ transition: "background 0.15s, border-color 0.15s",
419
+ whiteSpace: "nowrap",
420
+ }}
421
+ >
422
+ Test suites
423
+ </Link>
424
+ <button
425
+ type="button"
426
+ onClick={() => setConfigOpen(true)}
427
+ style={{
428
+ padding: "0.45rem 1rem",
429
+ borderRadius: "var(--r-md)",
430
+ border: "1px solid var(--border)",
431
+ background: "var(--surface)",
432
+ color: "var(--text-1)",
433
+ cursor: "pointer",
434
+ fontSize: "0.8125rem",
435
+ fontWeight: 600,
436
+ display: "flex",
437
+ alignItems: "center",
438
+ gap: "0.5rem",
439
+ fontFamily: "inherit",
440
+ transition: "background 0.15s, border-color 0.15s, box-shadow 0.15s",
441
+ boxShadow: "var(--shadow-xs)",
442
+ flexShrink: 0,
443
+ }}
444
+ onMouseEnter={(e) => {
445
+ e.currentTarget.style.background = "var(--surface-hover)";
446
+ e.currentTarget.style.borderColor = "var(--border-strong)";
447
+ }}
448
+ onMouseLeave={(e) => {
449
+ e.currentTarget.style.background = "var(--surface)";
450
+ e.currentTarget.style.borderColor = "var(--border)";
451
+ }}
452
+ >
453
+ <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" aria-hidden>
454
+ <path d="M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.39a2 2 0 0 0-.73-2.73l-.15-.08a2 2 0 0 1-1-1.74v-.5a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z" />
455
+ <circle cx="12" cy="12" r="3" />
456
+ </svg>
457
+ Settings
458
+ {enabled.length > 0 && (
459
+ <span
460
+ style={{
461
+ background: "var(--accent)",
462
+ color: "#fff",
463
+ borderRadius: 999,
464
+ minWidth: 22,
465
+ height: 22,
466
+ padding: "0 6px",
467
+ fontSize: "0.7rem",
468
+ fontWeight: 700,
469
+ display: "inline-flex",
470
+ alignItems: "center",
471
+ justifyContent: "center",
472
+ }}
473
+ >
474
+ {enabled.length}
475
+ </span>
476
+ )}
477
+ </button>
478
+ <button
479
+ type="button"
480
+ onClick={newRun}
481
+ style={{
482
+ padding: "0.45rem 1rem",
483
+ borderRadius: "var(--r-md)",
484
+ border: "none",
485
+ background: "var(--accent)",
486
+ color: "#fff",
487
+ cursor: "pointer",
488
+ fontSize: "0.8125rem",
489
+ fontWeight: 600,
490
+ fontFamily: "inherit",
491
+ boxShadow: "0 1px 3px rgba(37, 99, 235, 0.35)",
492
+ whiteSpace: "nowrap",
493
+ }}
494
+ >
495
+ + New run
496
+ </button>
497
+ </div>
498
+ </div>
499
+ </header>
500
+
501
+ <main style={{ maxWidth: 1120, margin: "0 auto", padding: `1.75rem 1.5rem ${mainPadBottom}` }}>
502
+ <div
503
+ style={{
504
+ background: "var(--surface)",
505
+ border: "1px solid var(--border)",
506
+ borderRadius: "var(--r-2xl)",
507
+ boxShadow: "var(--shadow-sm)",
508
+ overflow: "hidden",
509
+ marginBottom: "1.5rem",
510
+ }}
511
+ >
512
+ <div style={{ padding: "0.7rem 1.25rem 0.35rem", background: "var(--surface)" }}>
513
+ <span style={{ fontSize: "0.68rem", fontWeight: 700, color: "var(--text-3)", letterSpacing: "0.08em", textTransform: "uppercase" }}>
514
+ Prompt
515
+ </span>
516
+ </div>
517
+ <textarea
518
+ ref={textareaRef}
519
+ value={prompt}
520
+ onChange={(e) => setPrompt(e.target.value)}
521
+ onKeyDown={(e) => {
522
+ if (e.key === "Enter" && (e.metaKey || e.ctrlKey)) run();
523
+ }}
524
+ placeholder="Ask a question, paste code to review, or describe a task…"
525
+ rows={5}
526
+ style={{
527
+ display: "block",
528
+ width: "100%",
529
+ background: "var(--surface)",
530
+ border: "none",
531
+ color: "var(--text-1)",
532
+ fontSize: "0.97rem",
533
+ lineHeight: 1.65,
534
+ padding: "0.35rem 1.25rem 1.15rem",
535
+ minHeight: 140,
536
+ resize: "vertical",
537
+ outline: "none",
538
+ fontFamily: "inherit",
539
+ }}
540
+ />
541
+
542
+ <div
543
+ style={{
544
+ padding: "0.7rem 1.15rem",
545
+ borderTop: "1px solid var(--border)",
546
+ background: "var(--surface-muted)",
547
+ display: "flex",
548
+ alignItems: "center",
549
+ gap: "0.65rem",
550
+ flexWrap: "wrap",
551
+ }}
552
+ >
553
+ <div style={{ display: "flex", gap: "0.35rem", flex: 1, flexWrap: "wrap", alignItems: "center" }}>
554
+ {enabled.length === 0 ? (
555
+ <button
556
+ type="button"
557
+ onClick={() => setConfigOpen(true)}
558
+ style={{
559
+ background: "var(--surface)",
560
+ border: "1px dashed var(--border-strong)",
561
+ color: "var(--text-2)",
562
+ borderRadius: 8,
563
+ padding: "0.35rem 0.85rem",
564
+ fontSize: "0.78rem",
565
+ fontWeight: 600,
566
+ cursor: "pointer",
567
+ fontFamily: "inherit",
568
+ transition: "border-color 0.15s, color 0.15s",
569
+ }}
570
+ >
571
+ + Add models in Settings
572
+ </button>
573
+ ) : (
574
+ enabled.map((i, chipIdx) => {
575
+ const dot = MODEL_CHIP_PALETTE[chipIdx % MODEL_CHIP_PALETTE.length];
576
+ return (
577
+ <span
578
+ key={i.id}
579
+ title={`${i.provider} · ${i.model}`}
580
+ style={{
581
+ display: "inline-flex",
582
+ alignItems: "center",
583
+ gap: "0.4rem",
584
+ padding: "0.32rem 0.7rem",
585
+ borderRadius: 999,
586
+ fontSize: "0.78rem",
587
+ fontWeight: 600,
588
+ background: "var(--surface)",
589
+ border: "1px solid var(--border)",
590
+ color: "var(--text-1)",
591
+ whiteSpace: "nowrap",
592
+ boxShadow: "var(--shadow-xs)",
593
+ }}
594
+ >
595
+ <span
596
+ style={{
597
+ width: 7,
598
+ height: 7,
599
+ borderRadius: "50%",
600
+ background: dot,
601
+ flexShrink: 0,
602
+ }}
603
+ aria-hidden
604
+ />
605
+ <span style={{ fontFamily: "var(--font-mono)", fontWeight: 600, fontSize: "0.72rem" }}>{i.model}</span>
606
+ </span>
607
+ );
608
+ })
609
+ )}
610
+ <button
611
+ type="button"
612
+ onClick={() => setConfigOpen(true)}
613
+ style={{
614
+ background: "var(--surface)",
615
+ border: "1px dashed var(--border-strong)",
616
+ color: "var(--text-2)",
617
+ borderRadius: 8,
618
+ padding: "0.28rem 0.65rem",
619
+ fontSize: "0.72rem",
620
+ fontWeight: 600,
621
+ cursor: "pointer",
622
+ fontFamily: "inherit",
623
+ }}
624
+ >
625
+ + add model
626
+ </button>
627
+ </div>
628
+
629
+ <button
630
+ type="button"
631
+ onClick={run}
632
+ disabled={loading || !prompt.trim() || enabled.length === 0}
633
+ style={{
634
+ padding: "0.55rem 1.35rem",
635
+ borderRadius: "var(--r-md)",
636
+ border: "none",
637
+ background:
638
+ loading || !prompt.trim() || enabled.length === 0
639
+ ? "var(--surface-hover)"
640
+ : "var(--accent)",
641
+ color:
642
+ loading || !prompt.trim() || enabled.length === 0
643
+ ? "var(--text-3)"
644
+ : "#fff",
645
+ fontWeight: 600,
646
+ fontSize: "0.875rem",
647
+ cursor:
648
+ loading || !prompt.trim() || enabled.length === 0
649
+ ? "not-allowed"
650
+ : "pointer",
651
+ fontFamily: "inherit",
652
+ transition: "background 0.15s, box-shadow 0.15s",
653
+ display: "flex",
654
+ alignItems: "center",
655
+ gap: "0.45rem",
656
+ whiteSpace: "nowrap",
657
+ boxShadow:
658
+ !loading && prompt.trim() && enabled.length > 0
659
+ ? "0 2px 10px rgba(37, 99, 235, 0.28)"
660
+ : "none",
661
+ }}
662
+ onMouseEnter={(e) => {
663
+ if (!loading && prompt.trim() && enabled.length > 0) {
664
+ e.currentTarget.style.background = "var(--accent-hover)";
665
+ }
666
+ }}
667
+ onMouseLeave={(e) => {
668
+ if (!loading && prompt.trim() && enabled.length > 0) {
669
+ e.currentTarget.style.background = "var(--accent)";
670
+ }
671
+ }}
672
+ >
673
+ {loading ? (
674
+ <>
675
+ <svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2.5" strokeLinecap="round" style={{ animation: "spin 1s linear infinite" }}>
676
+ <path d="M12 2v4M12 18v4M4.93 4.93l2.83 2.83M16.24 16.24l2.83 2.83M2 12h4M18 12h4M4.93 19.07l2.83-2.83M16.24 7.76l2.83-2.83" />
677
+ </svg>
678
+ Running…
679
+ </>
680
+ ) : (
681
+ <>Run <kbd style={{ fontSize: "0.68rem", opacity: 0.8, fontFamily: "inherit" }}>⌘↵</kbd></>
682
+ )}
683
+ </button>
684
+ </div>
685
+ </div>
686
+
687
+ <div style={{ marginBottom: "1.25rem" }}>
688
+ <TabBar active={tab} onChange={setTab} />
689
+ </div>
690
+
691
+ {error && (
692
+ <div
693
+ style={{
694
+ background: "var(--red-subtle)",
695
+ border: "1px solid rgba(185, 28, 28, 0.2)",
696
+ color: "var(--red)",
697
+ borderRadius: "var(--r-lg)",
698
+ padding: "0.85rem 1.1rem",
699
+ fontSize: "0.875rem",
700
+ marginBottom: "1.35rem",
701
+ lineHeight: 1.55,
702
+ fontWeight: 500,
703
+ }}
704
+ >
705
+ {error}
706
+ </div>
707
+ )}
708
+
709
+ {tab === "history" && (
710
+ <div
711
+ style={{
712
+ background: "var(--surface)",
713
+ border: "1px solid var(--border)",
714
+ borderRadius: "var(--r-xl)",
715
+ boxShadow: "var(--shadow-md)",
716
+ overflow: "hidden",
717
+ }}
718
+ >
719
+ <div style={{ padding: "0.85rem 1.15rem", borderBottom: "1px solid var(--border)", background: "var(--surface-subtle)" }}>
720
+ <span style={{ fontSize: "0.72rem", fontWeight: 600, color: "var(--text-3)", textTransform: "uppercase", letterSpacing: "0.06em" }}>
721
+ Recent runs
722
+ </span>
723
+ </div>
724
+ {historyEntries.length === 0 ? (
725
+ <div style={{ padding: "2.5rem 1.25rem", textAlign: "center", color: "var(--text-3)", fontSize: "0.875rem" }}>
726
+ No runs yet. Execute a prompt to build history (stored in this browser).
727
+ </div>
728
+ ) : (
729
+ <ul style={{ listStyle: "none", margin: 0, padding: 0 }}>
730
+ {historyEntries.map((entry) => (
731
+ <li key={entry.id} style={{ borderBottom: "1px solid var(--border)" }}>
732
+ <button
733
+ type="button"
734
+ onClick={() => restoreHistoryEntry(entry)}
735
+ style={{
736
+ width: "100%",
737
+ textAlign: "left",
738
+ padding: "0.85rem 1.15rem",
739
+ border: "none",
740
+ background: "transparent",
741
+ cursor: "pointer",
742
+ fontFamily: "inherit",
743
+ display: "block",
744
+ }}
745
+ >
746
+ <div style={{ fontSize: "0.78rem", color: "var(--text-3)", marginBottom: "0.25rem" }}>
747
+ {new Date(entry.ranAt).toLocaleString()} · {entry.result.results.length} model
748
+ {entry.result.results.length !== 1 ? "s" : ""}
749
+ </div>
750
+ <div style={{ fontSize: "0.9rem", color: "var(--text-1)", fontWeight: 500, lineHeight: 1.45 }}>
751
+ {entry.promptPreview}
752
+ </div>
753
+ </button>
754
+ </li>
755
+ ))}
756
+ </ul>
757
+ )}
758
+ </div>
759
+ )}
760
+
761
+ {tab === "compare" && !result && (
762
+ <div style={{ textAlign: "center", padding: "2.5rem", color: "var(--text-3)", fontSize: "0.875rem" }}>
763
+ Run a prompt to open the full compare &amp; evaluate panel.
764
+ </div>
765
+ )}
766
+
767
+ {result && tab === "compare" && <ComparePanel results={result.results} />}
768
+
769
+ {result && tab === "responses" && (
770
+ <div>
771
+ <div
772
+ style={{
773
+ display: "flex",
774
+ flexWrap: "wrap",
775
+ alignItems: "center",
776
+ justifyContent: "space-between",
777
+ gap: "0.75rem",
778
+ marginBottom: "1.1rem",
779
+ }}
780
+ >
781
+ <span
782
+ style={{
783
+ fontSize: "0.75rem",
784
+ color: "var(--text-2)",
785
+ fontWeight: 500,
786
+ padding: "0.35rem 0.75rem",
787
+ background: "var(--surface)",
788
+ border: "1px solid var(--border)",
789
+ borderRadius: 999,
790
+ boxShadow: "var(--shadow-xs)",
791
+ }}
792
+ >
793
+ {result.results.filter((r) => !r.error).length}/{result.results.length} succeeded
794
+ <span style={{ color: "var(--text-3)", margin: "0 0.5rem" }}>·</span>
795
+ {new Date(result.ranAt).toLocaleTimeString()}
796
+ {wallClockSec > 0 && (
797
+ <>
798
+ <span style={{ color: "var(--text-3)", margin: "0 0.5rem" }}>·</span>
799
+ {wallClockSec < 10 ? wallClockSec.toFixed(1) : Math.round(wallClockSec)}
800
+ s total
801
+ </>
802
+ )}
803
+ </span>
804
+ <ViewToggle value={responsesView} onChange={setResponsesView} />
805
+ </div>
806
+
807
+ {responsesView === "grid" && highlights && (
808
+ <div
809
+ style={{
810
+ display: "grid",
811
+ gridTemplateColumns: "repeat(auto-fill, minmax(min(100%, 500px), 1fr))",
812
+ gap: "1.15rem",
813
+ alignItems: "stretch",
814
+ }}
815
+ >
816
+ {result.results.map((r) => (
817
+ <ModelResponseCard
818
+ key={r.instanceId}
819
+ r={r}
820
+ badge={highlights.badgeById.get(r.instanceId) ?? null}
821
+ latencyTone={highlights.latencyTone(r)}
822
+ rating={ratings[ratingKey(result.ranAt, r.instanceId)] ?? 0}
823
+ onRate={(n) => setStarRating(result.ranAt, r.instanceId, n)}
824
+ />
825
+ ))}
826
+ </div>
827
+ )}
828
+
829
+ {responsesView === "sideBySide" && highlights && (
830
+ <div
831
+ style={{
832
+ display: "flex",
833
+ gap: "1rem",
834
+ overflowX: "auto",
835
+ paddingBottom: "0.35rem",
836
+ WebkitOverflowScrolling: "touch",
837
+ }}
838
+ >
839
+ {result.results.map((r) => (
840
+ <div
841
+ key={r.instanceId}
842
+ style={{
843
+ flex: "0 0 min(420px, 85vw)",
844
+ minWidth: "min(420px, 85vw)",
845
+ maxWidth: "100%",
846
+ }}
847
+ >
848
+ <ModelResponseCard
849
+ r={r}
850
+ badge={highlights.badgeById.get(r.instanceId) ?? null}
851
+ latencyTone={highlights.latencyTone(r)}
852
+ rating={ratings[ratingKey(result.ranAt, r.instanceId)] ?? 0}
853
+ onRate={(n) => setStarRating(result.ranAt, r.instanceId, n)}
854
+ />
855
+ </div>
856
+ ))}
857
+ </div>
858
+ )}
859
+
860
+ {responsesView === "diff" && (
861
+ <div
862
+ style={{
863
+ background: "var(--surface)",
864
+ border: "1px solid var(--border)",
865
+ borderRadius: "var(--r-xl)",
866
+ padding: "1.15rem 1.25rem",
867
+ boxShadow: "var(--shadow-md)",
868
+ }}
869
+ >
870
+ <ResponsesLineDiff
871
+ results={result.results}
872
+ leftId={diffLeftId}
873
+ rightId={diffRightId}
874
+ onLeftId={setDiffLeftId}
875
+ onRightId={setDiffRightId}
876
+ />
877
+ </div>
878
+ )}
879
+ </div>
880
+ )}
881
+
882
+ {!result && !error && !loading && tab !== "history" && (
883
+ <div
884
+ style={{
885
+ textAlign: "center",
886
+ padding: "3.5rem 1.5rem 4rem",
887
+ color: "var(--text-3)",
888
+ }}
889
+ >
890
+ <div
891
+ style={{
892
+ width: 56,
893
+ height: 56,
894
+ margin: "0 auto 1.25rem",
895
+ borderRadius: "var(--r-lg)",
896
+ background: "var(--accent-subtle)",
897
+ border: "1px solid rgba(30, 64, 175, 0.12)",
898
+ display: "flex",
899
+ alignItems: "center",
900
+ justifyContent: "center",
901
+ }}
902
+ aria-hidden
903
+ >
904
+ <svg width="26" height="26" viewBox="0 0 24 24" fill="none" stroke="var(--accent)" strokeWidth="1.75" strokeLinecap="round" strokeLinejoin="round">
905
+ <path d="M12 2v4M12 18v4M4.93 4.93l2.83 2.83M16.24 16.24l2.83 2.83M2 12h4M18 12h4" />
906
+ <circle cx="12" cy="12" r="3" />
907
+ </svg>
908
+ </div>
909
+ <p style={{ fontSize: "1rem", fontWeight: 600, color: "var(--text-1)", marginBottom: "0.35rem", letterSpacing: "-0.02em" }}>
910
+ Ready when you are
911
+ </p>
912
+ <p style={{ fontSize: "0.8rem", color: "var(--text-3)", maxWidth: 420, margin: "0 auto 0.75rem", lineHeight: 1.5 }}>
913
+ {BRAND_TAGLINE}
914
+ </p>
915
+ <p style={{ fontSize: "0.875rem", color: "var(--text-2)", maxWidth: 400, margin: "0 auto", lineHeight: 1.6 }}>
916
+ Add models under Settings, enter a prompt, then run. Use Grid, Side-by-side, or Diff on the Responses tab, and open History to reload past runs.
917
+ </p>
918
+ </div>
919
+ )}
920
+ </main>
921
+
922
+ {showQuickBar && result && (
923
+ <QuickComparisonBar results={result.results} onFullCompare={() => setTab("compare")} />
924
+ )}
925
+
926
+ {configOpen && (
927
+ <SettingsPanel
928
+ open={configOpen}
929
+ onClose={() => setConfigOpen(false)}
930
+ instances={instances}
931
+ onUpdateInstances={updateInstances}
932
+ secrets={secrets}
933
+ onUpdateSecrets={updateSecrets}
934
+ judge={judge}
935
+ onUpdateJudge={updateJudge}
936
+ />
937
+ )}
938
+
939
+ <style>{`
940
+ @keyframes spin {
941
+ from { transform: rotate(0deg); }
942
+ to { transform: rotate(360deg); }
943
+ }
944
+ ::-webkit-scrollbar { width: 6px; height: 6px; }
945
+ ::-webkit-scrollbar-track { background: transparent; }
946
+ ::-webkit-scrollbar-thumb { background: var(--border-strong); border-radius: 3px; }
947
+ ::-webkit-scrollbar-thumb:hover { background: var(--text-3); }
948
+ textarea::placeholder { color: var(--text-3); }
949
+ `}</style>
950
+ </div>
951
+ );
952
+ }