@vercel/agent-eval-playground 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -0
- package/app/compare/page.tsx +40 -0
- package/app/evals/[name]/page.tsx +22 -0
- package/app/evals/page.tsx +18 -0
- package/app/experiments/[name]/[timestamp]/page.tsx +23 -0
- package/app/experiments/page.tsx +28 -0
- package/app/globals.css +126 -0
- package/app/layout.tsx +102 -0
- package/app/page.tsx +179 -0
- package/app/transcript/[experiment]/[timestamp]/[evalName]/[run]/page.tsx +43 -0
- package/bin.mjs +86 -0
- package/components/ComparePage.tsx +312 -0
- package/components/EvalDetail.tsx +114 -0
- package/components/EvalsPage.tsx +80 -0
- package/components/ExperimentDetail.tsx +162 -0
- package/components/ExperimentList.tsx +103 -0
- package/components/O11ySummary.tsx +114 -0
- package/components/RunResultCard.tsx +72 -0
- package/components/ShowMore.tsx +60 -0
- package/components/TranscriptPage.tsx +46 -0
- package/components/TranscriptViewer.tsx +201 -0
- package/components/ui/alert-dialog.tsx +184 -0
- package/components/ui/badge.tsx +45 -0
- package/components/ui/button.tsx +60 -0
- package/components/ui/card.tsx +94 -0
- package/components/ui/collapsible.tsx +34 -0
- package/components/ui/combobox.tsx +297 -0
- package/components/ui/dropdown-menu.tsx +269 -0
- package/components/ui/field.tsx +227 -0
- package/components/ui/input-group.tsx +147 -0
- package/components/ui/input.tsx +19 -0
- package/components/ui/label.tsx +24 -0
- package/components/ui/progress.tsx +31 -0
- package/components/ui/scroll-area.tsx +58 -0
- package/components/ui/select.tsx +191 -0
- package/components/ui/separator.tsx +28 -0
- package/components/ui/table.tsx +116 -0
- package/components/ui/tabs.tsx +91 -0
- package/components/ui/textarea.tsx +18 -0
- package/components/ui/tooltip.tsx +57 -0
- package/components.json +25 -0
- package/lib/data.ts +297 -0
- package/lib/types.ts +113 -0
- package/lib/utils.ts +6 -0
- package/next.config.ts +5 -0
- package/package.json +51 -0
- package/postcss.config.mjs +7 -0
- package/public/vercel.svg +1 -0
- package/tsconfig.json +42 -0
package/bin.mjs
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { spawn } from "child_process";
|
|
4
|
+
import { resolve, dirname } from "path";
|
|
5
|
+
import { fileURLToPath } from "url";
|
|
6
|
+
import { createRequire } from "module";
|
|
7
|
+
|
|
8
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
9
|
+
const require = createRequire(import.meta.url);
|
|
10
|
+
|
|
11
|
+
// Parse CLI arguments
|
|
12
|
+
const args = process.argv.slice(2);
|
|
13
|
+
let resultsDir = "./results";
|
|
14
|
+
let evalsDir = "./evals";
|
|
15
|
+
let port = "3000";
|
|
16
|
+
let watch = false;
|
|
17
|
+
|
|
18
|
+
for (let i = 0; i < args.length; i++) {
|
|
19
|
+
switch (args[i]) {
|
|
20
|
+
case "--results-dir":
|
|
21
|
+
resultsDir = args[++i];
|
|
22
|
+
break;
|
|
23
|
+
case "--evals-dir":
|
|
24
|
+
evalsDir = args[++i];
|
|
25
|
+
break;
|
|
26
|
+
case "--port":
|
|
27
|
+
case "-p":
|
|
28
|
+
port = args[++i];
|
|
29
|
+
break;
|
|
30
|
+
case "--watch":
|
|
31
|
+
watch = true;
|
|
32
|
+
break;
|
|
33
|
+
case "--help":
|
|
34
|
+
case "-h":
|
|
35
|
+
console.log(`
|
|
36
|
+
Usage: agent-eval-playground [options]
|
|
37
|
+
|
|
38
|
+
Options:
|
|
39
|
+
--results-dir <dir> Path to results directory (default: ./results)
|
|
40
|
+
--evals-dir <dir> Path to evals directory (default: ./evals)
|
|
41
|
+
--port, -p <port> HTTP server port (default: 3000)
|
|
42
|
+
--watch Enable live mode — watch results for changes
|
|
43
|
+
--help, -h Show this help message
|
|
44
|
+
`);
|
|
45
|
+
process.exit(0);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Set environment variables for the Next.js app
|
|
50
|
+
process.env.RESULTS_DIR = resolve(resultsDir);
|
|
51
|
+
process.env.EVALS_DIR = resolve(evalsDir);
|
|
52
|
+
if (watch) {
|
|
53
|
+
process.env.WATCH = "true";
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Find the next binary from this package's dependencies
|
|
57
|
+
let nextBin;
|
|
58
|
+
try {
|
|
59
|
+
const nextPkgPath = require.resolve("next/package.json");
|
|
60
|
+
nextBin = resolve(dirname(nextPkgPath), "dist", "bin", "next");
|
|
61
|
+
} catch {
|
|
62
|
+
console.error(
|
|
63
|
+
'Error: "next" package not found. Make sure dependencies are installed.'
|
|
64
|
+
);
|
|
65
|
+
process.exit(1);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
console.log(`Agent Eval Playground`);
|
|
69
|
+
console.log(` Results: ${process.env.RESULTS_DIR}`);
|
|
70
|
+
console.log(` Evals: ${process.env.EVALS_DIR}`);
|
|
71
|
+
console.log(` Port: ${port}`);
|
|
72
|
+
if (watch) console.log(` Watch: enabled`);
|
|
73
|
+
console.log();
|
|
74
|
+
|
|
75
|
+
// Run next dev from the package directory
|
|
76
|
+
const child = spawn(process.execPath, [nextBin, "dev", "-p", port], {
|
|
77
|
+
cwd: __dirname,
|
|
78
|
+
stdio: "inherit",
|
|
79
|
+
env: process.env,
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
child.on("exit", (code) => process.exit(code ?? 0));
|
|
83
|
+
|
|
84
|
+
// Forward signals
|
|
85
|
+
process.on("SIGINT", () => child.kill("SIGINT"));
|
|
86
|
+
process.on("SIGTERM", () => child.kill("SIGTERM"));
|
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
"use client";
|
|
2
|
+
|
|
3
|
+
import { useState } from "react";
|
|
4
|
+
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
|
5
|
+
import { Badge } from "@/components/ui/badge";
|
|
6
|
+
import {
|
|
7
|
+
Select,
|
|
8
|
+
SelectContent,
|
|
9
|
+
SelectItem,
|
|
10
|
+
SelectTrigger,
|
|
11
|
+
SelectValue,
|
|
12
|
+
} from "@/components/ui/select";
|
|
13
|
+
import {
|
|
14
|
+
Table,
|
|
15
|
+
TableBody,
|
|
16
|
+
TableCell,
|
|
17
|
+
TableHead,
|
|
18
|
+
TableHeader,
|
|
19
|
+
TableRow,
|
|
20
|
+
} from "@/components/ui/table";
|
|
21
|
+
import { Separator } from "@/components/ui/separator";
|
|
22
|
+
|
|
23
|
+
interface EvalDetail {
|
|
24
|
+
name: string;
|
|
25
|
+
totalRuns: number;
|
|
26
|
+
passedRuns: number;
|
|
27
|
+
passRate: number;
|
|
28
|
+
meanDuration: number;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
interface ExperimentDetailData {
|
|
32
|
+
name: string;
|
|
33
|
+
timestamp: string;
|
|
34
|
+
evals: EvalDetail[];
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
interface SelectOption {
|
|
38
|
+
value: string;
|
|
39
|
+
label: string;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
interface ComparePageProps {
|
|
43
|
+
options: SelectOption[];
|
|
44
|
+
detailsMap: Record<string, ExperimentDetailData | null>;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function ComparePage({ options, detailsMap }: ComparePageProps) {
|
|
48
|
+
|
|
49
|
+
// Pre-select the two most recent runs if available
|
|
50
|
+
const defaultLeft = options.length >= 1 ? options[0].value : "";
|
|
51
|
+
const defaultRight = options.length >= 2 ? options[1].value : "";
|
|
52
|
+
|
|
53
|
+
const [leftValue, setLeftValue] = useState(defaultLeft);
|
|
54
|
+
const [rightValue, setRightValue] = useState(defaultRight);
|
|
55
|
+
|
|
56
|
+
// Look up detail data directly from the pre-loaded map
|
|
57
|
+
const leftData = leftValue ? (detailsMap[leftValue] ?? null) : null;
|
|
58
|
+
const rightData = rightValue ? (detailsMap[rightValue] ?? null) : null;
|
|
59
|
+
|
|
60
|
+
// Merge eval names from both sides
|
|
61
|
+
const allEvalNames = new Set<string>();
|
|
62
|
+
leftData?.evals.forEach((e) => allEvalNames.add(e.name));
|
|
63
|
+
rightData?.evals.forEach((e) => allEvalNames.add(e.name));
|
|
64
|
+
const evalNames = Array.from(allEvalNames).sort();
|
|
65
|
+
|
|
66
|
+
const leftMap = new Map(leftData?.evals.map((e) => [e.name, e]) ?? []);
|
|
67
|
+
const rightMap = new Map(rightData?.evals.map((e) => [e.name, e]) ?? []);
|
|
68
|
+
|
|
69
|
+
return (
|
|
70
|
+
<div className="space-y-6">
|
|
71
|
+
<div>
|
|
72
|
+
<h1 className="text-2xl font-bold tracking-tight">Compare</h1>
|
|
73
|
+
<p className="text-muted-foreground mt-1">
|
|
74
|
+
Compare two experiment runs side-by-side.
|
|
75
|
+
</p>
|
|
76
|
+
</div>
|
|
77
|
+
|
|
78
|
+
{/* Selection */}
|
|
79
|
+
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
|
|
80
|
+
<Card>
|
|
81
|
+
<CardHeader className="pb-3">
|
|
82
|
+
<CardTitle className="text-sm text-muted-foreground">
|
|
83
|
+
Left
|
|
84
|
+
</CardTitle>
|
|
85
|
+
</CardHeader>
|
|
86
|
+
<CardContent>
|
|
87
|
+
<Select
|
|
88
|
+
value={leftValue || undefined}
|
|
89
|
+
onValueChange={setLeftValue}
|
|
90
|
+
>
|
|
91
|
+
<SelectTrigger>
|
|
92
|
+
<SelectValue placeholder="Select experiment run..." />
|
|
93
|
+
</SelectTrigger>
|
|
94
|
+
<SelectContent>
|
|
95
|
+
{options.map((opt) => (
|
|
96
|
+
<SelectItem key={opt.value} value={opt.value}>
|
|
97
|
+
{opt.label}
|
|
98
|
+
</SelectItem>
|
|
99
|
+
))}
|
|
100
|
+
</SelectContent>
|
|
101
|
+
</Select>
|
|
102
|
+
</CardContent>
|
|
103
|
+
</Card>
|
|
104
|
+
|
|
105
|
+
<Card>
|
|
106
|
+
<CardHeader className="pb-3">
|
|
107
|
+
<CardTitle className="text-sm text-muted-foreground">
|
|
108
|
+
Right
|
|
109
|
+
</CardTitle>
|
|
110
|
+
</CardHeader>
|
|
111
|
+
<CardContent>
|
|
112
|
+
<Select
|
|
113
|
+
value={rightValue || undefined}
|
|
114
|
+
onValueChange={setRightValue}
|
|
115
|
+
>
|
|
116
|
+
<SelectTrigger>
|
|
117
|
+
<SelectValue placeholder="Select experiment run..." />
|
|
118
|
+
</SelectTrigger>
|
|
119
|
+
<SelectContent>
|
|
120
|
+
{options.map((opt) => (
|
|
121
|
+
<SelectItem key={opt.value} value={opt.value}>
|
|
122
|
+
{opt.label}
|
|
123
|
+
</SelectItem>
|
|
124
|
+
))}
|
|
125
|
+
</SelectContent>
|
|
126
|
+
</Select>
|
|
127
|
+
</CardContent>
|
|
128
|
+
</Card>
|
|
129
|
+
</div>
|
|
130
|
+
|
|
131
|
+
{/* Comparison table */}
|
|
132
|
+
{leftData && rightData && evalNames.length > 0 && (
|
|
133
|
+
<>
|
|
134
|
+
<Separator />
|
|
135
|
+
|
|
136
|
+
{/* Summary comparison */}
|
|
137
|
+
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
|
|
138
|
+
<ComparisonStat
|
|
139
|
+
label="Overall Pass Rate"
|
|
140
|
+
left={avgPassRate(leftData.evals)}
|
|
141
|
+
right={avgPassRate(rightData.evals)}
|
|
142
|
+
format={(v) => `${v.toFixed(0)}%`}
|
|
143
|
+
higherIsBetter
|
|
144
|
+
/>
|
|
145
|
+
<ComparisonStat
|
|
146
|
+
label="Avg Duration"
|
|
147
|
+
left={avgDuration(leftData.evals)}
|
|
148
|
+
right={avgDuration(rightData.evals)}
|
|
149
|
+
format={(v) => `${v.toFixed(1)}s`}
|
|
150
|
+
higherIsBetter={false}
|
|
151
|
+
/>
|
|
152
|
+
<ComparisonStat
|
|
153
|
+
label="Evals Passed"
|
|
154
|
+
left={leftData.evals.filter((e) => e.passedRuns === e.totalRuns).length}
|
|
155
|
+
right={rightData.evals.filter((e) => e.passedRuns === e.totalRuns).length}
|
|
156
|
+
format={(v) => `${v}`}
|
|
157
|
+
higherIsBetter
|
|
158
|
+
/>
|
|
159
|
+
</div>
|
|
160
|
+
|
|
161
|
+
{/* Per-eval comparison table */}
|
|
162
|
+
<Card>
|
|
163
|
+
<CardHeader>
|
|
164
|
+
<CardTitle className="text-base">Per-Eval Comparison</CardTitle>
|
|
165
|
+
</CardHeader>
|
|
166
|
+
<CardContent>
|
|
167
|
+
<Table>
|
|
168
|
+
<TableHeader>
|
|
169
|
+
<TableRow>
|
|
170
|
+
<TableHead>Eval</TableHead>
|
|
171
|
+
<TableHead className="text-center">Left Pass Rate</TableHead>
|
|
172
|
+
<TableHead className="text-center">Right Pass Rate</TableHead>
|
|
173
|
+
<TableHead className="text-center">Delta</TableHead>
|
|
174
|
+
<TableHead className="text-center">Left Duration</TableHead>
|
|
175
|
+
<TableHead className="text-center">Right Duration</TableHead>
|
|
176
|
+
</TableRow>
|
|
177
|
+
</TableHeader>
|
|
178
|
+
<TableBody>
|
|
179
|
+
{evalNames.map((name) => {
|
|
180
|
+
const left = leftMap.get(name);
|
|
181
|
+
const right = rightMap.get(name);
|
|
182
|
+
const leftRate = left?.passRate ?? 0;
|
|
183
|
+
const rightRate = right?.passRate ?? 0;
|
|
184
|
+
const delta = rightRate - leftRate;
|
|
185
|
+
|
|
186
|
+
return (
|
|
187
|
+
<TableRow key={name}>
|
|
188
|
+
<TableCell className="font-medium">{name}</TableCell>
|
|
189
|
+
<TableCell className="text-center">
|
|
190
|
+
{left ? (
|
|
191
|
+
<Badge
|
|
192
|
+
variant={
|
|
193
|
+
left.passRate === 100 ? "default" : "destructive"
|
|
194
|
+
}
|
|
195
|
+
>
|
|
196
|
+
{left.passRate.toFixed(0)}%
|
|
197
|
+
</Badge>
|
|
198
|
+
) : (
|
|
199
|
+
<span className="text-muted-foreground">--</span>
|
|
200
|
+
)}
|
|
201
|
+
</TableCell>
|
|
202
|
+
<TableCell className="text-center">
|
|
203
|
+
{right ? (
|
|
204
|
+
<Badge
|
|
205
|
+
variant={
|
|
206
|
+
right.passRate === 100 ? "default" : "destructive"
|
|
207
|
+
}
|
|
208
|
+
>
|
|
209
|
+
{right.passRate.toFixed(0)}%
|
|
210
|
+
</Badge>
|
|
211
|
+
) : (
|
|
212
|
+
<span className="text-muted-foreground">--</span>
|
|
213
|
+
)}
|
|
214
|
+
</TableCell>
|
|
215
|
+
<TableCell className="text-center">
|
|
216
|
+
{left && right ? (
|
|
217
|
+
<span
|
|
218
|
+
className={
|
|
219
|
+
delta > 0
|
|
220
|
+
? "text-green-500"
|
|
221
|
+
: delta < 0
|
|
222
|
+
? "text-red-500"
|
|
223
|
+
: "text-muted-foreground"
|
|
224
|
+
}
|
|
225
|
+
>
|
|
226
|
+
{delta > 0 ? "+" : ""}
|
|
227
|
+
{delta.toFixed(0)}%
|
|
228
|
+
</span>
|
|
229
|
+
) : (
|
|
230
|
+
<span className="text-muted-foreground">--</span>
|
|
231
|
+
)}
|
|
232
|
+
</TableCell>
|
|
233
|
+
<TableCell className="text-center text-sm text-muted-foreground">
|
|
234
|
+
{left ? `${left.meanDuration.toFixed(1)}s` : "--"}
|
|
235
|
+
</TableCell>
|
|
236
|
+
<TableCell className="text-center text-sm text-muted-foreground">
|
|
237
|
+
{right ? `${right.meanDuration.toFixed(1)}s` : "--"}
|
|
238
|
+
</TableCell>
|
|
239
|
+
</TableRow>
|
|
240
|
+
);
|
|
241
|
+
})}
|
|
242
|
+
</TableBody>
|
|
243
|
+
</Table>
|
|
244
|
+
</CardContent>
|
|
245
|
+
</Card>
|
|
246
|
+
</>
|
|
247
|
+
)}
|
|
248
|
+
|
|
249
|
+
{/* Prompt to select both */}
|
|
250
|
+
{(!leftData || !rightData) && (
|
|
251
|
+
<Card>
|
|
252
|
+
<CardContent className="py-12 text-center">
|
|
253
|
+
<p className="text-muted-foreground">
|
|
254
|
+
Select two experiment runs above to compare them.
|
|
255
|
+
</p>
|
|
256
|
+
</CardContent>
|
|
257
|
+
</Card>
|
|
258
|
+
)}
|
|
259
|
+
</div>
|
|
260
|
+
);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function ComparisonStat({
|
|
264
|
+
label,
|
|
265
|
+
left,
|
|
266
|
+
right,
|
|
267
|
+
format,
|
|
268
|
+
higherIsBetter,
|
|
269
|
+
}: {
|
|
270
|
+
label: string;
|
|
271
|
+
left: number;
|
|
272
|
+
right: number;
|
|
273
|
+
format: (v: number) => string;
|
|
274
|
+
higherIsBetter: boolean;
|
|
275
|
+
}) {
|
|
276
|
+
const delta = right - left;
|
|
277
|
+
const improved = higherIsBetter ? delta > 0 : delta < 0;
|
|
278
|
+
const regressed = higherIsBetter ? delta < 0 : delta > 0;
|
|
279
|
+
|
|
280
|
+
return (
|
|
281
|
+
<Card>
|
|
282
|
+
<CardContent className="py-4 px-4">
|
|
283
|
+
<div className="text-xs text-muted-foreground">{label}</div>
|
|
284
|
+
<div className="flex items-baseline gap-4 mt-1">
|
|
285
|
+
<span className="text-lg font-medium">{format(left)}</span>
|
|
286
|
+
<span className="text-muted-foreground">→</span>
|
|
287
|
+
<span className="text-lg font-medium">{format(right)}</span>
|
|
288
|
+
{delta !== 0 && (
|
|
289
|
+
<Badge
|
|
290
|
+
variant={improved ? "default" : regressed ? "destructive" : "secondary"}
|
|
291
|
+
className="text-xs"
|
|
292
|
+
>
|
|
293
|
+
{delta > 0 ? "+" : ""}
|
|
294
|
+
{format(Math.abs(delta))}
|
|
295
|
+
</Badge>
|
|
296
|
+
)}
|
|
297
|
+
</div>
|
|
298
|
+
</CardContent>
|
|
299
|
+
</Card>
|
|
300
|
+
);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
function avgPassRate(evals: EvalDetail[]): number {
|
|
304
|
+
if (evals.length === 0) return 0;
|
|
305
|
+
return evals.reduce((s, e) => s + e.passRate, 0) / evals.length;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
function avgDuration(evals: EvalDetail[]): number {
|
|
309
|
+
if (evals.length === 0) return 0;
|
|
310
|
+
return evals.reduce((s, e) => s + e.meanDuration, 0) / evals.length;
|
|
311
|
+
}
|
|
312
|
+
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import Link from "next/link";
|
|
2
|
+
import { Badge } from "@/components/ui/badge";
|
|
3
|
+
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
|
|
4
|
+
import { ScrollArea } from "@/components/ui/scroll-area";
|
|
5
|
+
|
|
6
|
+
interface EvalDetailData {
|
|
7
|
+
name: string;
|
|
8
|
+
prompt: string;
|
|
9
|
+
files: string[];
|
|
10
|
+
fileContents?: Record<string, string>;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
interface EvalDetailProps {
|
|
14
|
+
data: EvalDetailData;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function EvalDetail({ data }: EvalDetailProps) {
|
|
18
|
+
const evalFile = data.fileContents?.["EVAL.ts"] || data.fileContents?.["EVAL.tsx"];
|
|
19
|
+
const evalFileName = data.fileContents?.["EVAL.ts"] ? "EVAL.ts" : "EVAL.tsx";
|
|
20
|
+
const packageJson = data.fileContents?.["package.json"];
|
|
21
|
+
|
|
22
|
+
return (
|
|
23
|
+
<div className="space-y-6">
|
|
24
|
+
{/* Breadcrumb */}
|
|
25
|
+
<div className="flex items-center gap-2 text-sm text-muted-foreground">
|
|
26
|
+
<Link href="/evals" className="cursor-pointer hover:underline underline-offset-4">
|
|
27
|
+
Evals
|
|
28
|
+
</Link>
|
|
29
|
+
<span>/</span>
|
|
30
|
+
<span>{data.name}</span>
|
|
31
|
+
</div>
|
|
32
|
+
|
|
33
|
+
<div>
|
|
34
|
+
<h1 className="text-2xl font-bold tracking-tight">{data.name}</h1>
|
|
35
|
+
</div>
|
|
36
|
+
|
|
37
|
+
<Tabs defaultValue="prompt" className="w-full">
|
|
38
|
+
<TabsList>
|
|
39
|
+
<TabsTrigger value="prompt">PROMPT.md</TabsTrigger>
|
|
40
|
+
{evalFile && <TabsTrigger value="eval">{evalFileName}</TabsTrigger>}
|
|
41
|
+
{packageJson && <TabsTrigger value="package">package.json</TabsTrigger>}
|
|
42
|
+
<TabsTrigger value="files">Files</TabsTrigger>
|
|
43
|
+
</TabsList>
|
|
44
|
+
|
|
45
|
+
<TabsContent value="prompt" className="mt-4">
|
|
46
|
+
<ScrollArea className="h-[calc(100vh-300px)]">
|
|
47
|
+
<pre className="text-sm font-mono whitespace-pre-wrap rounded-lg bg-muted p-4">
|
|
48
|
+
{data.prompt || "No PROMPT.md found."}
|
|
49
|
+
</pre>
|
|
50
|
+
</ScrollArea>
|
|
51
|
+
</TabsContent>
|
|
52
|
+
|
|
53
|
+
{evalFile && (
|
|
54
|
+
<TabsContent value="eval" className="mt-4">
|
|
55
|
+
<ScrollArea className="h-[calc(100vh-300px)]">
|
|
56
|
+
<pre className="text-sm font-mono whitespace-pre-wrap rounded-lg bg-muted p-4 overflow-x-auto">
|
|
57
|
+
{evalFile}
|
|
58
|
+
</pre>
|
|
59
|
+
</ScrollArea>
|
|
60
|
+
</TabsContent>
|
|
61
|
+
)}
|
|
62
|
+
|
|
63
|
+
{packageJson && (
|
|
64
|
+
<TabsContent value="package" className="mt-4">
|
|
65
|
+
<ScrollArea className="h-[calc(100vh-300px)]">
|
|
66
|
+
<pre className="text-sm font-mono whitespace-pre-wrap rounded-lg bg-muted p-4 overflow-x-auto">
|
|
67
|
+
{packageJson}
|
|
68
|
+
</pre>
|
|
69
|
+
</ScrollArea>
|
|
70
|
+
</TabsContent>
|
|
71
|
+
)}
|
|
72
|
+
|
|
73
|
+
<TabsContent value="files" className="mt-4">
|
|
74
|
+
<div className="text-sm text-muted-foreground mb-3">
|
|
75
|
+
{data.files.length} files
|
|
76
|
+
</div>
|
|
77
|
+
<div className="space-y-1">
|
|
78
|
+
{data.files.map((file) => (
|
|
79
|
+
<div
|
|
80
|
+
key={file}
|
|
81
|
+
className="flex items-center gap-2 text-sm font-mono bg-muted rounded-lg px-3 py-1.5"
|
|
82
|
+
>
|
|
83
|
+
<FileIcon filename={file} />
|
|
84
|
+
<span>{file}</span>
|
|
85
|
+
</div>
|
|
86
|
+
))}
|
|
87
|
+
</div>
|
|
88
|
+
</TabsContent>
|
|
89
|
+
</Tabs>
|
|
90
|
+
</div>
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function FileIcon({ filename }: { filename: string }) {
|
|
95
|
+
const ext = filename.split(".").pop()?.toLowerCase();
|
|
96
|
+
|
|
97
|
+
const iconMap: Record<string, string> = {
|
|
98
|
+
ts: "TS",
|
|
99
|
+
tsx: "TX",
|
|
100
|
+
js: "JS",
|
|
101
|
+
json: "{}",
|
|
102
|
+
md: "MD",
|
|
103
|
+
css: "CS",
|
|
104
|
+
html: "HT",
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
const label = iconMap[ext || ""] || "F";
|
|
108
|
+
|
|
109
|
+
return (
|
|
110
|
+
<Badge variant="outline" className="text-[10px] font-mono px-1 py-0 h-5 w-6 flex items-center justify-center">
|
|
111
|
+
{label}
|
|
112
|
+
</Badge>
|
|
113
|
+
);
|
|
114
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import Link from "next/link";
|
|
2
|
+
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
|
3
|
+
import { Badge } from "@/components/ui/badge";
|
|
4
|
+
import { ShowMore } from "@/components/ShowMore";
|
|
5
|
+
|
|
6
|
+
interface EvalInfo {
|
|
7
|
+
name: string;
|
|
8
|
+
prompt: string;
|
|
9
|
+
files: string[];
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
interface EvalsPageProps {
|
|
13
|
+
evals: EvalInfo[];
|
|
14
|
+
total: number;
|
|
15
|
+
showAll: boolean;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function EvalsPage({ evals, total, showAll }: EvalsPageProps) {
|
|
19
|
+
return (
|
|
20
|
+
<div className="space-y-6">
|
|
21
|
+
<div>
|
|
22
|
+
<h1 className="text-2xl font-bold tracking-tight">Evals</h1>
|
|
23
|
+
<p className="text-muted-foreground mt-1">
|
|
24
|
+
Browse all eval fixtures in your project.
|
|
25
|
+
</p>
|
|
26
|
+
</div>
|
|
27
|
+
|
|
28
|
+
{evals.length === 0 ? (
|
|
29
|
+
<Card>
|
|
30
|
+
<CardContent className="py-12 text-center">
|
|
31
|
+
<p className="text-muted-foreground text-lg">No evals found</p>
|
|
32
|
+
<p className="text-muted-foreground text-sm mt-2">
|
|
33
|
+
Create evals in your{" "}
|
|
34
|
+
<code className="text-foreground bg-muted px-1.5 py-0.5 rounded text-xs">
|
|
35
|
+
evals/
|
|
36
|
+
</code>{" "}
|
|
37
|
+
directory.
|
|
38
|
+
</p>
|
|
39
|
+
</CardContent>
|
|
40
|
+
</Card>
|
|
41
|
+
) : (
|
|
42
|
+
<ShowMore total={total} showAllHref={showAll ? undefined : "/evals?all"} className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
|
43
|
+
{evals.map((evalInfo) => (
|
|
44
|
+
<Link key={evalInfo.name} href={`/evals/${encodeURIComponent(evalInfo.name)}`} className="block cursor-pointer">
|
|
45
|
+
<Card className="transition-colors hover:bg-muted">
|
|
46
|
+
<CardHeader>
|
|
47
|
+
<CardTitle className="text-base">{evalInfo.name}</CardTitle>
|
|
48
|
+
</CardHeader>
|
|
49
|
+
<CardContent>
|
|
50
|
+
{evalInfo.prompt && (
|
|
51
|
+
<p className="text-sm text-muted-foreground line-clamp-3 mb-3">
|
|
52
|
+
{evalInfo.prompt.slice(0, 200)}
|
|
53
|
+
{evalInfo.prompt.length > 200 ? "..." : ""}
|
|
54
|
+
</p>
|
|
55
|
+
)}
|
|
56
|
+
<div className="flex flex-wrap gap-1">
|
|
57
|
+
{evalInfo.files.slice(0, 5).map((file) => (
|
|
58
|
+
<Badge
|
|
59
|
+
key={file}
|
|
60
|
+
variant="secondary"
|
|
61
|
+
className="text-xs font-mono"
|
|
62
|
+
>
|
|
63
|
+
{file}
|
|
64
|
+
</Badge>
|
|
65
|
+
))}
|
|
66
|
+
{evalInfo.files.length > 5 && (
|
|
67
|
+
<Badge variant="outline" className="text-xs">
|
|
68
|
+
+{evalInfo.files.length - 5} more
|
|
69
|
+
</Badge>
|
|
70
|
+
)}
|
|
71
|
+
</div>
|
|
72
|
+
</CardContent>
|
|
73
|
+
</Card>
|
|
74
|
+
</Link>
|
|
75
|
+
))}
|
|
76
|
+
</ShowMore>
|
|
77
|
+
)}
|
|
78
|
+
</div>
|
|
79
|
+
);
|
|
80
|
+
}
|