@vercel/agent-eval-playground 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +50 -0
  2. package/app/compare/page.tsx +40 -0
  3. package/app/evals/[name]/page.tsx +22 -0
  4. package/app/evals/page.tsx +18 -0
  5. package/app/experiments/[name]/[timestamp]/page.tsx +23 -0
  6. package/app/experiments/page.tsx +28 -0
  7. package/app/globals.css +126 -0
  8. package/app/layout.tsx +102 -0
  9. package/app/page.tsx +179 -0
  10. package/app/transcript/[experiment]/[timestamp]/[evalName]/[run]/page.tsx +43 -0
  11. package/bin.mjs +86 -0
  12. package/components/ComparePage.tsx +312 -0
  13. package/components/EvalDetail.tsx +114 -0
  14. package/components/EvalsPage.tsx +80 -0
  15. package/components/ExperimentDetail.tsx +162 -0
  16. package/components/ExperimentList.tsx +103 -0
  17. package/components/O11ySummary.tsx +114 -0
  18. package/components/RunResultCard.tsx +72 -0
  19. package/components/ShowMore.tsx +60 -0
  20. package/components/TranscriptPage.tsx +46 -0
  21. package/components/TranscriptViewer.tsx +201 -0
  22. package/components/ui/alert-dialog.tsx +184 -0
  23. package/components/ui/badge.tsx +45 -0
  24. package/components/ui/button.tsx +60 -0
  25. package/components/ui/card.tsx +94 -0
  26. package/components/ui/collapsible.tsx +34 -0
  27. package/components/ui/combobox.tsx +297 -0
  28. package/components/ui/dropdown-menu.tsx +269 -0
  29. package/components/ui/field.tsx +227 -0
  30. package/components/ui/input-group.tsx +147 -0
  31. package/components/ui/input.tsx +19 -0
  32. package/components/ui/label.tsx +24 -0
  33. package/components/ui/progress.tsx +31 -0
  34. package/components/ui/scroll-area.tsx +58 -0
  35. package/components/ui/select.tsx +191 -0
  36. package/components/ui/separator.tsx +28 -0
  37. package/components/ui/table.tsx +116 -0
  38. package/components/ui/tabs.tsx +91 -0
  39. package/components/ui/textarea.tsx +18 -0
  40. package/components/ui/tooltip.tsx +57 -0
  41. package/components.json +25 -0
  42. package/lib/data.ts +297 -0
  43. package/lib/types.ts +113 -0
  44. package/lib/utils.ts +6 -0
  45. package/next.config.ts +5 -0
  46. package/package.json +51 -0
  47. package/postcss.config.mjs +7 -0
  48. package/public/vercel.svg +1 -0
  49. package/tsconfig.json +42 -0
@@ -0,0 +1,28 @@
1
+ "use client"
2
+
3
+ import * as React from "react"
4
+ import { Separator as SeparatorPrimitive } from "radix-ui"
5
+
6
+ import { cn } from "@/lib/utils"
7
+
8
+ function Separator({
9
+ className,
10
+ orientation = "horizontal",
11
+ decorative = true,
12
+ ...props
13
+ }: React.ComponentProps<typeof SeparatorPrimitive.Root>) {
14
+ return (
15
+ <SeparatorPrimitive.Root
16
+ data-slot="separator"
17
+ decorative={decorative}
18
+ orientation={orientation}
19
+ className={cn(
20
+ "bg-border shrink-0 data-horizontal:h-px data-horizontal:w-full data-vertical:w-px data-vertical:self-stretch",
21
+ className
22
+ )}
23
+ {...props}
24
+ />
25
+ )
26
+ }
27
+
28
+ export { Separator }
@@ -0,0 +1,116 @@
1
+ "use client"
2
+
3
+ import * as React from "react"
4
+
5
+ import { cn } from "@/lib/utils"
6
+
7
+ function Table({ className, ...props }: React.ComponentProps<"table">) {
8
+ return (
9
+ <div
10
+ data-slot="table-container"
11
+ className="relative w-full overflow-x-auto"
12
+ >
13
+ <table
14
+ data-slot="table"
15
+ className={cn("w-full caption-bottom text-sm", className)}
16
+ {...props}
17
+ />
18
+ </div>
19
+ )
20
+ }
21
+
22
+ function TableHeader({ className, ...props }: React.ComponentProps<"thead">) {
23
+ return (
24
+ <thead
25
+ data-slot="table-header"
26
+ className={cn("[&_tr]:border-b", className)}
27
+ {...props}
28
+ />
29
+ )
30
+ }
31
+
32
+ function TableBody({ className, ...props }: React.ComponentProps<"tbody">) {
33
+ return (
34
+ <tbody
35
+ data-slot="table-body"
36
+ className={cn("[&_tr:last-child]:border-0", className)}
37
+ {...props}
38
+ />
39
+ )
40
+ }
41
+
42
+ function TableFooter({ className, ...props }: React.ComponentProps<"tfoot">) {
43
+ return (
44
+ <tfoot
45
+ data-slot="table-footer"
46
+ className={cn(
47
+ "bg-muted/50 border-t font-medium [&>tr]:last:border-b-0",
48
+ className
49
+ )}
50
+ {...props}
51
+ />
52
+ )
53
+ }
54
+
55
+ function TableRow({ className, ...props }: React.ComponentProps<"tr">) {
56
+ return (
57
+ <tr
58
+ data-slot="table-row"
59
+ className={cn(
60
+ "hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors",
61
+ className
62
+ )}
63
+ {...props}
64
+ />
65
+ )
66
+ }
67
+
68
+ function TableHead({ className, ...props }: React.ComponentProps<"th">) {
69
+ return (
70
+ <th
71
+ data-slot="table-head"
72
+ className={cn(
73
+ "text-foreground h-10 px-2 text-left align-middle font-medium whitespace-nowrap [&:has([role=checkbox])]:pr-0 [&>[role=checkbox]]:translate-y-[2px]",
74
+ className
75
+ )}
76
+ {...props}
77
+ />
78
+ )
79
+ }
80
+
81
+ function TableCell({ className, ...props }: React.ComponentProps<"td">) {
82
+ return (
83
+ <td
84
+ data-slot="table-cell"
85
+ className={cn(
86
+ "p-2 align-middle whitespace-nowrap [&:has([role=checkbox])]:pr-0 [&>[role=checkbox]]:translate-y-[2px]",
87
+ className
88
+ )}
89
+ {...props}
90
+ />
91
+ )
92
+ }
93
+
94
+ function TableCaption({
95
+ className,
96
+ ...props
97
+ }: React.ComponentProps<"caption">) {
98
+ return (
99
+ <caption
100
+ data-slot="table-caption"
101
+ className={cn("text-muted-foreground mt-4 text-sm", className)}
102
+ {...props}
103
+ />
104
+ )
105
+ }
106
+
107
+ export {
108
+ Table,
109
+ TableHeader,
110
+ TableBody,
111
+ TableFooter,
112
+ TableHead,
113
+ TableRow,
114
+ TableCell,
115
+ TableCaption,
116
+ }
@@ -0,0 +1,91 @@
1
+ "use client"
2
+
3
+ import * as React from "react"
4
+ import { cva, type VariantProps } from "class-variance-authority"
5
+ import { Tabs as TabsPrimitive } from "radix-ui"
6
+
7
+ import { cn } from "@/lib/utils"
8
+
9
+ function Tabs({
10
+ className,
11
+ orientation = "horizontal",
12
+ ...props
13
+ }: React.ComponentProps<typeof TabsPrimitive.Root>) {
14
+ return (
15
+ <TabsPrimitive.Root
16
+ data-slot="tabs"
17
+ data-orientation={orientation}
18
+ orientation={orientation}
19
+ className={cn(
20
+ "group/tabs flex gap-2 data-[orientation=horizontal]:flex-col",
21
+ className
22
+ )}
23
+ {...props}
24
+ />
25
+ )
26
+ }
27
+
28
+ const tabsListVariants = cva(
29
+ "rounded-lg p-[3px] group-data-[orientation=horizontal]/tabs:h-9 data-[variant=line]:rounded-none group/tabs-list text-muted-foreground inline-flex w-fit items-center justify-center group-data-[orientation=vertical]/tabs:h-fit group-data-[orientation=vertical]/tabs:flex-col",
30
+ {
31
+ variants: {
32
+ variant: {
33
+ default: "bg-muted",
34
+ line: "gap-1 bg-transparent",
35
+ },
36
+ },
37
+ defaultVariants: {
38
+ variant: "default",
39
+ },
40
+ }
41
+ )
42
+
43
+ function TabsList({
44
+ className,
45
+ variant = "default",
46
+ ...props
47
+ }: React.ComponentProps<typeof TabsPrimitive.List> &
48
+ VariantProps<typeof tabsListVariants>) {
49
+ return (
50
+ <TabsPrimitive.List
51
+ data-slot="tabs-list"
52
+ data-variant={variant}
53
+ className={cn(tabsListVariants({ variant }), className)}
54
+ {...props}
55
+ />
56
+ )
57
+ }
58
+
59
+ function TabsTrigger({
60
+ className,
61
+ ...props
62
+ }: React.ComponentProps<typeof TabsPrimitive.Trigger>) {
63
+ return (
64
+ <TabsPrimitive.Trigger
65
+ data-slot="tabs-trigger"
66
+ className={cn(
67
+ "cursor-pointer focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:outline-ring text-foreground/60 hover:text-foreground dark:text-muted-foreground dark:hover:text-foreground relative inline-flex h-[calc(100%-1px)] flex-1 items-center justify-center gap-1.5 rounded-md border border-transparent px-2 py-1 text-sm font-medium whitespace-nowrap transition-all group-data-[orientation=vertical]/tabs:w-full group-data-[orientation=vertical]/tabs:justify-start focus-visible:ring-[3px] focus-visible:outline-1 disabled:pointer-events-none disabled:opacity-50 group-data-[variant=default]/tabs-list:data-[state=active]:shadow-sm group-data-[variant=line]/tabs-list:data-[state=active]:shadow-none [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
68
+ "group-data-[variant=line]/tabs-list:bg-transparent group-data-[variant=line]/tabs-list:data-[state=active]:bg-transparent dark:group-data-[variant=line]/tabs-list:data-[state=active]:border-transparent dark:group-data-[variant=line]/tabs-list:data-[state=active]:bg-transparent",
69
+ "data-[state=active]:bg-background dark:data-[state=active]:text-foreground dark:data-[state=active]:border-input dark:data-[state=active]:bg-input/30 data-[state=active]:text-foreground",
70
+ "after:bg-foreground after:absolute after:opacity-0 after:transition-opacity group-data-[orientation=horizontal]/tabs:after:inset-x-0 group-data-[orientation=horizontal]/tabs:after:bottom-[-5px] group-data-[orientation=horizontal]/tabs:after:h-0.5 group-data-[orientation=vertical]/tabs:after:inset-y-0 group-data-[orientation=vertical]/tabs:after:-right-1 group-data-[orientation=vertical]/tabs:after:w-0.5 group-data-[variant=line]/tabs-list:data-[state=active]:after:opacity-100",
71
+ className
72
+ )}
73
+ {...props}
74
+ />
75
+ )
76
+ }
77
+
78
+ function TabsContent({
79
+ className,
80
+ ...props
81
+ }: React.ComponentProps<typeof TabsPrimitive.Content>) {
82
+ return (
83
+ <TabsPrimitive.Content
84
+ data-slot="tabs-content"
85
+ className={cn("flex-1 outline-none", className)}
86
+ {...props}
87
+ />
88
+ )
89
+ }
90
+
91
+ export { Tabs, TabsList, TabsTrigger, TabsContent, tabsListVariants }
@@ -0,0 +1,18 @@
1
+ import * as React from "react"
2
+
3
+ import { cn } from "@/lib/utils"
4
+
5
+ function Textarea({ className, ...props }: React.ComponentProps<"textarea">) {
6
+ return (
7
+ <textarea
8
+ data-slot="textarea"
9
+ className={cn(
10
+ "border-input bg-input/20 dark:bg-input/30 focus-visible:border-ring focus-visible:ring-ring/30 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 resize-none rounded-md border px-2 py-2 text-sm transition-colors focus-visible:ring-2 aria-invalid:ring-2 md:text-xs/relaxed placeholder:text-muted-foreground flex field-sizing-content min-h-16 w-full outline-none disabled:cursor-not-allowed disabled:opacity-50",
11
+ className
12
+ )}
13
+ {...props}
14
+ />
15
+ )
16
+ }
17
+
18
+ export { Textarea }
@@ -0,0 +1,57 @@
1
+ "use client"
2
+
3
+ import * as React from "react"
4
+ import { Tooltip as TooltipPrimitive } from "radix-ui"
5
+
6
+ import { cn } from "@/lib/utils"
7
+
8
+ function TooltipProvider({
9
+ delayDuration = 0,
10
+ ...props
11
+ }: React.ComponentProps<typeof TooltipPrimitive.Provider>) {
12
+ return (
13
+ <TooltipPrimitive.Provider
14
+ data-slot="tooltip-provider"
15
+ delayDuration={delayDuration}
16
+ {...props}
17
+ />
18
+ )
19
+ }
20
+
21
+ function Tooltip({
22
+ ...props
23
+ }: React.ComponentProps<typeof TooltipPrimitive.Root>) {
24
+ return <TooltipPrimitive.Root data-slot="tooltip" {...props} />
25
+ }
26
+
27
+ function TooltipTrigger({
28
+ ...props
29
+ }: React.ComponentProps<typeof TooltipPrimitive.Trigger>) {
30
+ return <TooltipPrimitive.Trigger data-slot="tooltip-trigger" {...props} />
31
+ }
32
+
33
+ function TooltipContent({
34
+ className,
35
+ sideOffset = 0,
36
+ children,
37
+ ...props
38
+ }: React.ComponentProps<typeof TooltipPrimitive.Content>) {
39
+ return (
40
+ <TooltipPrimitive.Portal>
41
+ <TooltipPrimitive.Content
42
+ data-slot="tooltip-content"
43
+ sideOffset={sideOffset}
44
+ className={cn(
45
+ "bg-foreground text-background animate-in fade-in-0 zoom-in-95 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-fit origin-(--radix-tooltip-content-transform-origin) rounded-md px-3 py-1.5 text-xs text-balance",
46
+ className
47
+ )}
48
+ {...props}
49
+ >
50
+ {children}
51
+ <TooltipPrimitive.Arrow className="bg-foreground fill-foreground z-50 size-2.5 translate-y-[calc(-50%_-_2px)] rotate-45 rounded-[2px]" />
52
+ </TooltipPrimitive.Content>
53
+ </TooltipPrimitive.Portal>
54
+ )
55
+ }
56
+
57
+ export { Tooltip, TooltipTrigger, TooltipContent, TooltipProvider }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "https://ui.shadcn.com/schema.json",
3
+ "style": "radix-mira",
4
+ "rsc": true,
5
+ "tsx": true,
6
+ "tailwind": {
7
+ "config": "",
8
+ "css": "app/globals.css",
9
+ "baseColor": "zinc",
10
+ "cssVariables": true,
11
+ "prefix": ""
12
+ },
13
+ "iconLibrary": "remixicon",
14
+ "rtl": false,
15
+ "aliases": {
16
+ "components": "@/components",
17
+ "utils": "@/lib/utils",
18
+ "ui": "@/components/ui",
19
+ "lib": "@/lib",
20
+ "hooks": "@/hooks"
21
+ },
22
+ "menuColor": "inverted",
23
+ "menuAccent": "subtle",
24
+ "registries": {}
25
+ }
package/lib/data.ts ADDED
@@ -0,0 +1,297 @@
1
+ /**
2
+ * Server-side data access for the playground.
3
+ * Reads JSON files from the results/ and evals/ directories.
4
+ * Directory paths are provided via RESULTS_DIR and EVALS_DIR env vars.
5
+ */
6
+
7
+ import { readdirSync, readFileSync, existsSync } from "fs";
8
+ import { join, resolve } from "path";
9
+
10
+ function getResultsDir(): string {
11
+ return resolve(process.env.RESULTS_DIR || "./results");
12
+ }
13
+
14
+ function getEvalsDir(): string {
15
+ return resolve(process.env.EVALS_DIR || "./evals");
16
+ }
17
+
18
+ /** List experiments from the results directory. Pass limit to cap expensive per-item reads. */
19
+ export function listExperiments(limit?: number) {
20
+ const resultsDir = getResultsDir();
21
+
22
+ if (!existsSync(resultsDir)) {
23
+ return { items: [], total: 0 };
24
+ }
25
+
26
+ const entries = readdirSync(resultsDir, { withFileTypes: true })
27
+ .filter((e) => e.isDirectory())
28
+ .map((e) => e.name);
29
+
30
+ const total = entries.length;
31
+ const toProcess = limit ? entries.slice(0, limit) : entries;
32
+
33
+ const items = toProcess.map((name) => {
34
+ const expDir = join(resultsDir, name);
35
+ const timestamps = readdirSync(expDir, { withFileTypes: true })
36
+ .filter((e) => e.isDirectory())
37
+ .map((e) => e.name)
38
+ .sort()
39
+ .reverse();
40
+
41
+ let latestPassRate: number | undefined;
42
+ let latestTotalRuns = 0;
43
+ let latestPassedRuns = 0;
44
+
45
+ if (timestamps.length > 0) {
46
+ const latestDir = join(expDir, timestamps[0]);
47
+ const evalDirs = readdirSync(latestDir, { withFileTypes: true }).filter(
48
+ (e) => e.isDirectory()
49
+ );
50
+
51
+ for (const evalDir of evalDirs) {
52
+ const summaryPath = join(latestDir, evalDir.name, "summary.json");
53
+ if (existsSync(summaryPath)) {
54
+ try {
55
+ const summary = JSON.parse(readFileSync(summaryPath, "utf-8"));
56
+ latestTotalRuns += summary.totalRuns ?? 0;
57
+ latestPassedRuns += summary.passedRuns ?? 0;
58
+ } catch {
59
+ // Skip invalid summary files
60
+ }
61
+ }
62
+ }
63
+
64
+ if (latestTotalRuns > 0) {
65
+ latestPassRate = (latestPassedRuns / latestTotalRuns) * 100;
66
+ }
67
+ }
68
+
69
+ return {
70
+ name,
71
+ timestamps,
72
+ latestTimestamp: timestamps[0] ?? null,
73
+ latestPassRate,
74
+ latestTotalRuns,
75
+ latestPassedRuns,
76
+ };
77
+ });
78
+
79
+ return { items, total };
80
+ }
81
+
82
+ /** Get timestamps for a specific experiment */
83
+ export function getExperiment(name: string) {
84
+ const expDir = join(getResultsDir(), name);
85
+
86
+ if (!existsSync(expDir)) {
87
+ return null;
88
+ }
89
+
90
+ const timestamps = readdirSync(expDir, { withFileTypes: true })
91
+ .filter((e) => e.isDirectory())
92
+ .map((e) => e.name)
93
+ .sort()
94
+ .reverse();
95
+
96
+ return { name, timestamps, latestTimestamp: timestamps[0] ?? null };
97
+ }
98
+
99
+ /** Get full experiment detail for a specific timestamp */
100
+ export function getExperimentDetail(name: string, timestamp: string) {
101
+ const runDir = join(getResultsDir(), name, timestamp);
102
+
103
+ if (!existsSync(runDir)) {
104
+ return null;
105
+ }
106
+
107
+ const evalDirs = readdirSync(runDir, { withFileTypes: true })
108
+ .filter((e) => e.isDirectory())
109
+ .map((e) => e.name);
110
+
111
+ const evals = evalDirs.map((evalName) => {
112
+ const evalDir = join(runDir, evalName);
113
+ const summaryPath = join(evalDir, "summary.json");
114
+
115
+ let summary = {
116
+ totalRuns: 0,
117
+ passedRuns: 0,
118
+ passRate: "0%",
119
+ meanDuration: 0,
120
+ };
121
+ if (existsSync(summaryPath)) {
122
+ try {
123
+ summary = JSON.parse(readFileSync(summaryPath, "utf-8"));
124
+ } catch {
125
+ // Use defaults
126
+ }
127
+ }
128
+
129
+ // List run directories
130
+ const runDirs = readdirSync(evalDir, { withFileTypes: true })
131
+ .filter((e) => e.isDirectory() && e.name.startsWith("run-"))
132
+ .map((e) => e.name)
133
+ .sort();
134
+
135
+ // Read each run's result.json
136
+ const runs = runDirs.map((runDirName) => {
137
+ const resultPath = join(evalDir, runDirName, "result.json");
138
+ let result = null;
139
+ if (existsSync(resultPath)) {
140
+ try {
141
+ result = JSON.parse(readFileSync(resultPath, "utf-8"));
142
+ } catch {
143
+ // Skip
144
+ }
145
+ }
146
+ return { name: runDirName, result };
147
+ });
148
+
149
+ return {
150
+ name: evalName,
151
+ totalRuns: summary.totalRuns,
152
+ passedRuns: summary.passedRuns,
153
+ passRate:
154
+ typeof summary.passRate === "string"
155
+ ? parseFloat(summary.passRate)
156
+ : summary.passRate,
157
+ meanDuration: summary.meanDuration,
158
+ runs,
159
+ };
160
+ });
161
+
162
+ return { name, timestamp, evals };
163
+ }
164
+
165
+ /** Get result for a specific run */
166
+ export function getRunResult(
167
+ experiment: string,
168
+ timestamp: string,
169
+ evalName: string,
170
+ run: string
171
+ ) {
172
+ const resultPath = join(
173
+ getResultsDir(),
174
+ experiment,
175
+ timestamp,
176
+ evalName,
177
+ run,
178
+ "result.json"
179
+ );
180
+
181
+ if (!existsSync(resultPath)) {
182
+ return null;
183
+ }
184
+
185
+ try {
186
+ return { result: JSON.parse(readFileSync(resultPath, "utf-8")) };
187
+ } catch {
188
+ return null;
189
+ }
190
+ }
191
+
192
+ /** Get parsed transcript for a specific run */
193
+ export function getTranscript(
194
+ experiment: string,
195
+ timestamp: string,
196
+ evalName: string,
197
+ run: string
198
+ ) {
199
+ const transcriptPath = join(
200
+ getResultsDir(),
201
+ experiment,
202
+ timestamp,
203
+ evalName,
204
+ run,
205
+ "transcript.json"
206
+ );
207
+
208
+ if (!existsSync(transcriptPath)) {
209
+ return null;
210
+ }
211
+
212
+ try {
213
+ return JSON.parse(readFileSync(transcriptPath, "utf-8"));
214
+ } catch {
215
+ return null;
216
+ }
217
+ }
218
+
219
+ /** List evals from the evals directory. Pass limit to cap per-item reads. */
220
+ export function listEvals(limit?: number) {
221
+ const evalsDir = getEvalsDir();
222
+
223
+ if (!existsSync(evalsDir)) {
224
+ return { items: [], total: 0 };
225
+ }
226
+
227
+ const entries = readdirSync(evalsDir, { withFileTypes: true })
228
+ .filter((e) => e.isDirectory())
229
+ .map((e) => e.name);
230
+
231
+ const total = entries.length;
232
+ const toProcess = limit ? entries.slice(0, limit) : entries;
233
+
234
+ const items = toProcess.map((name) => {
235
+ const evalDir = join(evalsDir, name);
236
+ const promptPath = join(evalDir, "PROMPT.md");
237
+ let prompt = "";
238
+ if (existsSync(promptPath)) {
239
+ prompt = readFileSync(promptPath, "utf-8");
240
+ }
241
+
242
+ const files = readdirSync(evalDir, { withFileTypes: true })
243
+ .filter((e) => e.isFile())
244
+ .map((e) => e.name);
245
+
246
+ return { name, prompt, files };
247
+ });
248
+
249
+ return { items, total };
250
+ }
251
+
252
+ /** Get detail for a specific eval */
253
+ export function getEvalDetail(name: string) {
254
+ const evalDir = join(getEvalsDir(), name);
255
+
256
+ if (!existsSync(evalDir)) {
257
+ return null;
258
+ }
259
+
260
+ const promptPath = join(evalDir, "PROMPT.md");
261
+ let prompt = "";
262
+ if (existsSync(promptPath)) {
263
+ prompt = readFileSync(promptPath, "utf-8");
264
+ }
265
+
266
+ // Recursively list files
267
+ const files: string[] = [];
268
+ function walk(dir: string, prefix: string) {
269
+ const entries = readdirSync(dir, { withFileTypes: true });
270
+ for (const entry of entries) {
271
+ if (entry.name === "node_modules" || entry.name === ".git") continue;
272
+ const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
273
+ if (entry.isDirectory()) {
274
+ walk(join(dir, entry.name), relativePath);
275
+ } else {
276
+ files.push(relativePath);
277
+ }
278
+ }
279
+ }
280
+ walk(evalDir, "");
281
+
282
+ // Read file contents for key files
283
+ const fileContents: Record<string, string> = {};
284
+ const keyFiles = ["PROMPT.md", "EVAL.ts", "EVAL.tsx", "package.json"];
285
+ for (const file of keyFiles) {
286
+ const filePath = join(evalDir, file);
287
+ if (existsSync(filePath)) {
288
+ try {
289
+ fileContents[file] = readFileSync(filePath, "utf-8");
290
+ } catch {
291
+ // Skip unreadable files
292
+ }
293
+ }
294
+ }
295
+
296
+ return { name, prompt, files, fileContents };
297
+ }