@darkrishabh/bench-ai 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +333 -0
- package/dist/cli/app.d.ts +11 -0
- package/dist/cli/app.d.ts.map +1 -0
- package/dist/cli/app.js +48 -0
- package/dist/cli/app.js.map +1 -0
- package/dist/cli/components/DiffView.d.ts +5 -0
- package/dist/cli/components/DiffView.d.ts.map +1 -0
- package/dist/cli/components/DiffView.js +14 -0
- package/dist/cli/components/DiffView.js.map +1 -0
- package/dist/cli/components/EvalView.d.ts +6 -0
- package/dist/cli/components/EvalView.d.ts.map +1 -0
- package/dist/cli/components/EvalView.js +82 -0
- package/dist/cli/components/EvalView.js.map +1 -0
- package/dist/cli/components/Spinner.d.ts +4 -0
- package/dist/cli/components/Spinner.d.ts.map +1 -0
- package/dist/cli/components/Spinner.js +15 -0
- package/dist/cli/components/Spinner.js.map +1 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +117 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/run-command.d.ts +11 -0
- package/dist/cli/run-command.d.ts.map +1 -0
- package/dist/cli/run-command.js +119 -0
- package/dist/cli/run-command.js.map +1 -0
- package/dist/engine/cost.d.ts +3 -0
- package/dist/engine/cost.d.ts.map +1 -0
- package/dist/engine/cost.js +52 -0
- package/dist/engine/cost.js.map +1 -0
- package/dist/engine/diff.d.ts +6 -0
- package/dist/engine/diff.d.ts.map +1 -0
- package/dist/engine/diff.js +43 -0
- package/dist/engine/diff.js.map +1 -0
- package/dist/engine/eval.d.ts +14 -0
- package/dist/engine/eval.d.ts.map +1 -0
- package/dist/engine/eval.js +194 -0
- package/dist/engine/eval.js.map +1 -0
- package/dist/engine/index.d.ts +15 -0
- package/dist/engine/index.d.ts.map +1 -0
- package/dist/engine/index.js +10 -0
- package/dist/engine/index.js.map +1 -0
- package/dist/engine/providers/base.d.ts +7 -0
- package/dist/engine/providers/base.d.ts.map +1 -0
- package/dist/engine/providers/base.js +2 -0
- package/dist/engine/providers/base.js.map +1 -0
- package/dist/engine/providers/claude.d.ts +15 -0
- package/dist/engine/providers/claude.d.ts.map +1 -0
- package/dist/engine/providers/claude.js +53 -0
- package/dist/engine/providers/claude.js.map +1 -0
- package/dist/engine/providers/minimax.d.ts +16 -0
- package/dist/engine/providers/minimax.d.ts.map +1 -0
- package/dist/engine/providers/minimax.js +67 -0
- package/dist/engine/providers/minimax.js.map +1 -0
- package/dist/engine/providers/ollama.d.ts +14 -0
- package/dist/engine/providers/ollama.d.ts.map +1 -0
- package/dist/engine/providers/ollama.js +60 -0
- package/dist/engine/providers/ollama.js.map +1 -0
- package/dist/engine/providers/openai-compatible.d.ts +19 -0
- package/dist/engine/providers/openai-compatible.d.ts.map +1 -0
- package/dist/engine/providers/openai-compatible.js +109 -0
- package/dist/engine/providers/openai-compatible.js.map +1 -0
- package/dist/engine/providers/subprocess.d.ts +55 -0
- package/dist/engine/providers/subprocess.d.ts.map +1 -0
- package/dist/engine/providers/subprocess.js +111 -0
- package/dist/engine/providers/subprocess.js.map +1 -0
- package/dist/engine/suite-loader.d.ts +11 -0
- package/dist/engine/suite-loader.d.ts.map +1 -0
- package/dist/engine/suite-loader.js +75 -0
- package/dist/engine/suite-loader.js.map +1 -0
- package/dist/engine/types.d.ts +104 -0
- package/dist/engine/types.d.ts.map +1 -0
- package/dist/engine/types.js +2 -0
- package/dist/engine/types.js.map +1 -0
- package/next-env.d.ts +6 -0
- package/next.config.ts +26 -0
- package/package.json +72 -0
- package/public/icon.svg +14 -0
- package/src/app/api/diff/route.ts +135 -0
- package/src/app/api/models/route.ts +96 -0
- package/src/app/api/suite/route.ts +314 -0
- package/src/app/globals.css +215 -0
- package/src/app/icon.svg +14 -0
- package/src/app/layout.tsx +44 -0
- package/src/app/opengraph-image.tsx +73 -0
- package/src/app/page.tsx +952 -0
- package/src/app/suite/layout.tsx +12 -0
- package/src/app/suite/page.tsx +206 -0
- package/src/app/twitter-image.tsx +1 -0
- package/src/components/BenchAiLogo.tsx +38 -0
- package/src/components/ComparePanel.tsx +643 -0
- package/src/components/ConfigPanel.tsx +809 -0
- package/src/components/MarkdownOutput.tsx +16 -0
- package/src/components/ModelResponseCard.tsx +313 -0
- package/src/components/QuickComparisonBar.tsx +184 -0
- package/src/components/ResponsesLineDiff.tsx +149 -0
- package/src/components/SettingsPanel.tsx +591 -0
- package/src/components/SuitePanel.tsx +875 -0
- package/src/lib/brand.ts +4 -0
- package/src/lib/config-yaml.ts +70 -0
- package/src/lib/consume-suite-sse.ts +70 -0
- package/src/lib/describe-judge.ts +23 -0
- package/src/lib/model-chip-palette.ts +9 -0
- package/src/lib/openai-model-list.ts +33 -0
- package/src/lib/provider-ui.ts +30 -0
- package/src/lib/resolve-credentials.ts +80 -0
- package/src/lib/run-history.ts +66 -0
- package/src/lib/simple-line-diff.ts +50 -0
- package/src/lib/storage.ts +100 -0
- package/src/lib/suite-judge-meta.ts +13 -0
- package/src/lib/suite-run-history.ts +81 -0
- package/src/types.ts +170 -0
- package/vercel.json +5 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { Metadata } from "next";
|
|
2
|
+
import type { ReactNode } from "react";
|
|
3
|
+
import { BRAND_NAME, BRAND_SUITE_SUBTITLE } from "../../lib/brand";
|
|
4
|
+
|
|
5
|
+
export const metadata: Metadata = {
|
|
6
|
+
title: `Test suites · ${BRAND_NAME}`,
|
|
7
|
+
description: `${BRAND_SUITE_SUBTITLE} — judge-backed rubrics and assertions.`,
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export default function SuiteLayout({ children }: { children: ReactNode }) {
|
|
11
|
+
return children;
|
|
12
|
+
}
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"use client";
|
|
2
|
+
|
|
3
|
+
import Link from "next/link";
|
|
4
|
+
import { useState, useEffect } from "react";
|
|
5
|
+
import type { JudgeSettings, LLMInstance, SecretsMap } from "../../types";
|
|
6
|
+
import { DEFAULT_JUDGE_SETTINGS } from "../../types";
|
|
7
|
+
import {
|
|
8
|
+
loadInstances,
|
|
9
|
+
saveInstances,
|
|
10
|
+
loadSecrets,
|
|
11
|
+
saveSecrets,
|
|
12
|
+
loadJudgeSettings,
|
|
13
|
+
saveJudgeSettings,
|
|
14
|
+
} from "../../lib/storage";
|
|
15
|
+
import { SettingsPanel } from "../../components/SettingsPanel";
|
|
16
|
+
import { BRAND_NAME, BRAND_SUITE_SUBTITLE } from "../../lib/brand";
|
|
17
|
+
import { BenchAiLogo } from "../../components/BenchAiLogo";
|
|
18
|
+
import { SuitePanel } from "../../components/SuitePanel";
|
|
19
|
+
|
|
20
|
+
export default function SuitePage() {
|
|
21
|
+
const [instances, setInstances] = useState<LLMInstance[]>([]);
|
|
22
|
+
const [secrets, setSecrets] = useState<SecretsMap>({});
|
|
23
|
+
const [judge, setJudge] = useState<JudgeSettings>(DEFAULT_JUDGE_SETTINGS);
|
|
24
|
+
const [configOpen, setConfigOpen] = useState(false);
|
|
25
|
+
|
|
26
|
+
useEffect(() => {
|
|
27
|
+
setInstances(loadInstances());
|
|
28
|
+
setSecrets(loadSecrets());
|
|
29
|
+
setJudge(loadJudgeSettings());
|
|
30
|
+
}, []);
|
|
31
|
+
|
|
32
|
+
const updateInstances = (next: LLMInstance[]) => {
|
|
33
|
+
setInstances(next);
|
|
34
|
+
saveInstances(next);
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
const updateSecrets = (next: SecretsMap) => {
|
|
38
|
+
setSecrets(next);
|
|
39
|
+
saveSecrets(next);
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
const updateJudge = (next: JudgeSettings) => {
|
|
43
|
+
setJudge(next);
|
|
44
|
+
saveJudgeSettings(next);
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
const enabled = instances.filter((i) => i.enabled);
|
|
48
|
+
|
|
49
|
+
return (
|
|
50
|
+
<div style={{ minHeight: "100vh", background: "var(--bg-gradient)" }}>
|
|
51
|
+
<header
|
|
52
|
+
style={{
|
|
53
|
+
background: "var(--surface)",
|
|
54
|
+
borderBottom: "1px solid var(--border)",
|
|
55
|
+
position: "sticky",
|
|
56
|
+
top: 0,
|
|
57
|
+
zIndex: 30,
|
|
58
|
+
boxShadow: "var(--shadow-xs)",
|
|
59
|
+
}}
|
|
60
|
+
>
|
|
61
|
+
<div
|
|
62
|
+
style={{
|
|
63
|
+
maxWidth: 1120,
|
|
64
|
+
margin: "0 auto",
|
|
65
|
+
padding: "0 1.5rem",
|
|
66
|
+
minHeight: 58,
|
|
67
|
+
display: "flex",
|
|
68
|
+
alignItems: "center",
|
|
69
|
+
justifyContent: "space-between",
|
|
70
|
+
gap: "1rem",
|
|
71
|
+
}}
|
|
72
|
+
>
|
|
73
|
+
<div style={{ display: "flex", flexDirection: "column", gap: "0.1rem", minWidth: 0 }}>
|
|
74
|
+
<div style={{ display: "flex", alignItems: "center", gap: "0.65rem", flexWrap: "wrap" }}>
|
|
75
|
+
<Link
|
|
76
|
+
href="/"
|
|
77
|
+
style={{
|
|
78
|
+
display: "flex",
|
|
79
|
+
alignItems: "center",
|
|
80
|
+
gap: "0.65rem",
|
|
81
|
+
textDecoration: "none",
|
|
82
|
+
color: "var(--text-1)",
|
|
83
|
+
}}
|
|
84
|
+
>
|
|
85
|
+
<BenchAiLogo size={30} />
|
|
86
|
+
<span
|
|
87
|
+
style={{
|
|
88
|
+
fontWeight: 700,
|
|
89
|
+
fontSize: "1.05rem",
|
|
90
|
+
letterSpacing: "-0.03em",
|
|
91
|
+
}}
|
|
92
|
+
>
|
|
93
|
+
{BRAND_NAME}
|
|
94
|
+
</span>
|
|
95
|
+
</Link>
|
|
96
|
+
<span
|
|
97
|
+
style={{
|
|
98
|
+
fontSize: "0.65rem",
|
|
99
|
+
fontWeight: 600,
|
|
100
|
+
color: "var(--text-3)",
|
|
101
|
+
letterSpacing: "0.04em",
|
|
102
|
+
}}
|
|
103
|
+
>
|
|
104
|
+
Test suites
|
|
105
|
+
</span>
|
|
106
|
+
</div>
|
|
107
|
+
<span style={{ fontSize: "0.75rem", color: "var(--text-3)", fontWeight: 500 }}>
|
|
108
|
+
{BRAND_SUITE_SUBTITLE}
|
|
109
|
+
</span>
|
|
110
|
+
</div>
|
|
111
|
+
|
|
112
|
+
<div style={{ display: "flex", alignItems: "center", gap: "0.65rem", flexShrink: 0 }}>
|
|
113
|
+
<Link
|
|
114
|
+
href="/"
|
|
115
|
+
style={{
|
|
116
|
+
fontSize: "0.8125rem",
|
|
117
|
+
fontWeight: 600,
|
|
118
|
+
color: "var(--text-2)",
|
|
119
|
+
textDecoration: "none",
|
|
120
|
+
padding: "0.45rem 0.85rem",
|
|
121
|
+
borderRadius: "var(--r-md)",
|
|
122
|
+
border: "1px solid var(--border)",
|
|
123
|
+
background: "var(--surface)",
|
|
124
|
+
boxShadow: "var(--shadow-xs)",
|
|
125
|
+
transition: "background 0.15s, border-color 0.15s",
|
|
126
|
+
}}
|
|
127
|
+
>
|
|
128
|
+
← Compare
|
|
129
|
+
</Link>
|
|
130
|
+
<button
|
|
131
|
+
type="button"
|
|
132
|
+
onClick={() => setConfigOpen(true)}
|
|
133
|
+
style={{
|
|
134
|
+
padding: "0.45rem 1rem",
|
|
135
|
+
borderRadius: "var(--r-md)",
|
|
136
|
+
border: "1px solid var(--border)",
|
|
137
|
+
background: "var(--surface)",
|
|
138
|
+
color: "var(--text-1)",
|
|
139
|
+
cursor: "pointer",
|
|
140
|
+
fontSize: "0.8125rem",
|
|
141
|
+
fontWeight: 600,
|
|
142
|
+
display: "flex",
|
|
143
|
+
alignItems: "center",
|
|
144
|
+
gap: "0.5rem",
|
|
145
|
+
fontFamily: "inherit",
|
|
146
|
+
transition: "background 0.15s, border-color 0.15s, box-shadow 0.15s",
|
|
147
|
+
boxShadow: "var(--shadow-xs)",
|
|
148
|
+
flexShrink: 0,
|
|
149
|
+
}}
|
|
150
|
+
>
|
|
151
|
+
Settings
|
|
152
|
+
{enabled.length > 0 && (
|
|
153
|
+
<span
|
|
154
|
+
style={{
|
|
155
|
+
background: "var(--accent)",
|
|
156
|
+
color: "#fff",
|
|
157
|
+
borderRadius: 999,
|
|
158
|
+
minWidth: 22,
|
|
159
|
+
height: 22,
|
|
160
|
+
padding: "0 6px",
|
|
161
|
+
fontSize: "0.7rem",
|
|
162
|
+
fontWeight: 700,
|
|
163
|
+
display: "inline-flex",
|
|
164
|
+
alignItems: "center",
|
|
165
|
+
justifyContent: "center",
|
|
166
|
+
}}
|
|
167
|
+
>
|
|
168
|
+
{enabled.length}
|
|
169
|
+
</span>
|
|
170
|
+
)}
|
|
171
|
+
</button>
|
|
172
|
+
</div>
|
|
173
|
+
</div>
|
|
174
|
+
</header>
|
|
175
|
+
|
|
176
|
+
<main style={{ maxWidth: 1120, margin: "0 auto", padding: "1.75rem 1.5rem 3rem" }}>
|
|
177
|
+
<SuitePanel
|
|
178
|
+
instances={instances}
|
|
179
|
+
secrets={secrets}
|
|
180
|
+
judge={judge}
|
|
181
|
+
onOpenSettings={() => setConfigOpen(true)}
|
|
182
|
+
/>
|
|
183
|
+
</main>
|
|
184
|
+
|
|
185
|
+
{configOpen && (
|
|
186
|
+
<SettingsPanel
|
|
187
|
+
open={configOpen}
|
|
188
|
+
onClose={() => setConfigOpen(false)}
|
|
189
|
+
instances={instances}
|
|
190
|
+
onUpdateInstances={updateInstances}
|
|
191
|
+
secrets={secrets}
|
|
192
|
+
onUpdateSecrets={updateSecrets}
|
|
193
|
+
judge={judge}
|
|
194
|
+
onUpdateJudge={updateJudge}
|
|
195
|
+
/>
|
|
196
|
+
)}
|
|
197
|
+
|
|
198
|
+
<style>{`
|
|
199
|
+
@keyframes spin {
|
|
200
|
+
from { transform: rotate(0deg); }
|
|
201
|
+
to { transform: rotate(360deg); }
|
|
202
|
+
}
|
|
203
|
+
`}</style>
|
|
204
|
+
</div>
|
|
205
|
+
);
|
|
206
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { default, alt, size, contentType } from "./opengraph-image";
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"use client";
|
|
2
|
+
|
|
3
|
+
import { useId } from "react";
|
|
4
|
+
|
|
5
|
+
/** App mark: side-by-side panels (compare / diff). Matches `src/app/icon.svg`. */
|
|
6
|
+
|
|
7
|
+
export function BenchAiLogo({ size = 32 }: { size?: number }) {
|
|
8
|
+
const gid = useId().replace(/:/g, "");
|
|
9
|
+
const gradId = `bench-ai-logo-gradient-${gid}`;
|
|
10
|
+
return (
|
|
11
|
+
<svg
|
|
12
|
+
xmlns="http://www.w3.org/2000/svg"
|
|
13
|
+
viewBox="0 0 32 32"
|
|
14
|
+
width={size}
|
|
15
|
+
height={size}
|
|
16
|
+
aria-hidden
|
|
17
|
+
focusable="false"
|
|
18
|
+
>
|
|
19
|
+
<defs>
|
|
20
|
+
<linearGradient
|
|
21
|
+
id={gradId}
|
|
22
|
+
x1="4"
|
|
23
|
+
y1="2"
|
|
24
|
+
x2="28"
|
|
25
|
+
y2="30"
|
|
26
|
+
gradientUnits="userSpaceOnUse"
|
|
27
|
+
>
|
|
28
|
+
<stop stopColor="#1e40af" />
|
|
29
|
+
<stop offset="1" stopColor="#172554" />
|
|
30
|
+
</linearGradient>
|
|
31
|
+
</defs>
|
|
32
|
+
<rect width="32" height="32" rx="8" fill={`url(#${gradId})`} />
|
|
33
|
+
<rect x="7" y="9" width="7" height="14" rx="2" fill="#ffffff" fillOpacity="0.95" />
|
|
34
|
+
<rect x="18" y="9" width="7" height="14" rx="2" fill="#ffffff" fillOpacity="0.78" />
|
|
35
|
+
<rect x="15" y="8" width="2" height="16" rx="1" fill="#ffffff" fillOpacity="0.35" />
|
|
36
|
+
</svg>
|
|
37
|
+
);
|
|
38
|
+
}
|