@vercel/agent-eval-playground 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +50 -0
  2. package/app/compare/page.tsx +40 -0
  3. package/app/evals/[name]/page.tsx +22 -0
  4. package/app/evals/page.tsx +18 -0
  5. package/app/experiments/[name]/[timestamp]/page.tsx +23 -0
  6. package/app/experiments/page.tsx +28 -0
  7. package/app/globals.css +126 -0
  8. package/app/layout.tsx +102 -0
  9. package/app/page.tsx +179 -0
  10. package/app/transcript/[experiment]/[timestamp]/[evalName]/[run]/page.tsx +43 -0
  11. package/bin.mjs +86 -0
  12. package/components/ComparePage.tsx +312 -0
  13. package/components/EvalDetail.tsx +114 -0
  14. package/components/EvalsPage.tsx +80 -0
  15. package/components/ExperimentDetail.tsx +162 -0
  16. package/components/ExperimentList.tsx +103 -0
  17. package/components/O11ySummary.tsx +114 -0
  18. package/components/RunResultCard.tsx +72 -0
  19. package/components/ShowMore.tsx +60 -0
  20. package/components/TranscriptPage.tsx +46 -0
  21. package/components/TranscriptViewer.tsx +201 -0
  22. package/components/ui/alert-dialog.tsx +184 -0
  23. package/components/ui/badge.tsx +45 -0
  24. package/components/ui/button.tsx +60 -0
  25. package/components/ui/card.tsx +94 -0
  26. package/components/ui/collapsible.tsx +34 -0
  27. package/components/ui/combobox.tsx +297 -0
  28. package/components/ui/dropdown-menu.tsx +269 -0
  29. package/components/ui/field.tsx +227 -0
  30. package/components/ui/input-group.tsx +147 -0
  31. package/components/ui/input.tsx +19 -0
  32. package/components/ui/label.tsx +24 -0
  33. package/components/ui/progress.tsx +31 -0
  34. package/components/ui/scroll-area.tsx +58 -0
  35. package/components/ui/select.tsx +191 -0
  36. package/components/ui/separator.tsx +28 -0
  37. package/components/ui/table.tsx +116 -0
  38. package/components/ui/tabs.tsx +91 -0
  39. package/components/ui/textarea.tsx +18 -0
  40. package/components/ui/tooltip.tsx +57 -0
  41. package/components.json +25 -0
  42. package/lib/data.ts +297 -0
  43. package/lib/types.ts +113 -0
  44. package/lib/utils.ts +6 -0
  45. package/next.config.ts +5 -0
  46. package/package.json +51 -0
  47. package/postcss.config.mjs +7 -0
  48. package/public/vercel.svg +1 -0
  49. package/tsconfig.json +42 -0
package/README.md ADDED
@@ -0,0 +1,50 @@
1
+ # @vercel/agent-eval-playground
2
+
3
+ A web-based dashboard for browsing agent-eval experiment results. Ships as a standalone Next.js app — no build step required.
4
+
5
+ ## Usage
6
+
7
+ ```bash
8
+ # Run from your eval project root (where results/ and evals/ live)
9
+ npx @vercel/agent-eval-playground
10
+
11
+ # With options
12
+ npx @vercel/agent-eval-playground --results-dir ./results --evals-dir ./evals --port 3001
13
+ ```
14
+
15
+ ### CLI Options
16
+
17
+ | Flag | Default | Description |
18
+ |------|---------|-------------|
19
+ | `--results-dir <dir>` | `./results` | Path to experiment results directory |
20
+ | `--evals-dir <dir>` | `./evals` | Path to eval fixtures directory |
21
+ | `--port, -p <port>` | `3000` | HTTP server port |
22
+ | `--help, -h` | | Show help |
23
+
24
+ ## Pages
25
+
26
+ | Route | Description |
27
+ |-------|-------------|
28
+ | `/` | Dashboard with stats, recent experiments, eval previews |
29
+ | `/experiments` | Full experiment list |
30
+ | `/experiments/[name]/[timestamp]` | Experiment detail with per-eval breakdown and run results |
31
+ | `/evals` | All eval fixtures |
32
+ | `/evals/[name]` | Eval detail with prompt, files, and content |
33
+ | `/compare` | Side-by-side comparison of two experiment runs |
34
+ | `/transcript/[experiment]/[timestamp]/[evalName]/[run]` | Full agent transcript viewer |
35
+
36
+ ## Architecture
37
+
38
+ - **Server Components** for all data fetching (`lib/data.ts` reads `fs` directly)
39
+ - **Client Components** only for interactivity (`ComparePage` dropdowns, `TranscriptViewer` collapsibles)
40
+ - **No API routes** — all data is read server-side, no client-side fetching
41
+ - **No database** — filesystem is the source of truth
42
+ - **`force-dynamic`** on all pages — data can change between requests
43
+
44
+ ### How it works
45
+
46
+ The CLI (`bin.mjs`) resolves the bundled `next` binary, sets `RESULTS_DIR` and `EVALS_DIR` as environment variables, and spawns `next dev` from the package directory. Server Components in `lib/data.ts` read directly from the filesystem.
47
+
48
+ ## Publishing
49
+
50
+ Published alongside `@vercel/agent-eval` via changesets. Run `npx changeset`, select `@vercel/agent-eval-playground`, and merge to main. The release workflow handles the rest via OIDC.
@@ -0,0 +1,40 @@
1
+ import { ComparePage } from "@/components/ComparePage";
2
+ import { listExperiments, getExperimentDetail } from "@/lib/data";
3
+
4
+ export const dynamic = "force-dynamic";
5
+
6
+ function formatTimestamp(ts: string): string {
7
+ try {
8
+ const isoString = ts.replace(/T(\d{2})-(\d{2})-(\d{2})/, "T$1:$2:$3");
9
+ const date = new Date(isoString);
10
+ if (isNaN(date.getTime())) return ts;
11
+ return date.toLocaleString();
12
+ } catch {
13
+ return ts;
14
+ }
15
+ }
16
+
17
+ export default async function CompareRoute() {
18
+ const { items: experiments } = listExperiments();
19
+
20
+ // Build options and details map server-side to avoid hydration mismatch
21
+ // (toLocaleString differs between Node.js and browser)
22
+ const options = experiments.flatMap((exp) =>
23
+ exp.timestamps.map((ts) => ({
24
+ value: `${exp.name}|||${ts}`,
25
+ label: `${exp.name} / ${formatTimestamp(ts)}`,
26
+ }))
27
+ );
28
+
29
+ const detailsMap: Record<string, ReturnType<typeof getExperimentDetail>> = {};
30
+ for (const exp of experiments) {
31
+ for (const ts of exp.timestamps) {
32
+ const detail = getExperimentDetail(exp.name, ts);
33
+ if (detail) {
34
+ detailsMap[`${exp.name}|||${ts}`] = detail;
35
+ }
36
+ }
37
+ }
38
+
39
+ return <ComparePage options={options} detailsMap={detailsMap} />;
40
+ }
@@ -0,0 +1,22 @@
1
+ import { notFound } from "next/navigation";
2
+ import { EvalDetail } from "@/components/EvalDetail";
3
+ import { getEvalDetail } from "@/lib/data";
4
+
5
+ export const dynamic = "force-dynamic";
6
+
7
+ export default async function EvalDetailRoute({
8
+ params,
9
+ }: {
10
+ params: Promise<{ name: string }>;
11
+ }) {
12
+ const { name } = await params;
13
+ const decodedName = decodeURIComponent(name);
14
+
15
+ const data = getEvalDetail(decodedName);
16
+
17
+ if (!data) {
18
+ notFound();
19
+ }
20
+
21
+ return <EvalDetail data={data} />;
22
+ }
@@ -0,0 +1,18 @@
1
+ import { EvalsPage } from "@/components/EvalsPage";
2
+ import { listEvals } from "@/lib/data";
3
+
4
+ export const dynamic = "force-dynamic";
5
+
6
+ const LIMIT = 21; // 7 rows of 3
7
+
8
+ export default async function EvalsRoute({
9
+ searchParams,
10
+ }: {
11
+ searchParams: Promise<{ all?: string }>;
12
+ }) {
13
+ const { all } = await searchParams;
14
+ const showAll = all !== undefined;
15
+ const { items: evals, total } = listEvals(showAll ? undefined : LIMIT);
16
+
17
+ return <EvalsPage evals={evals} total={total} showAll={showAll} />;
18
+ }
@@ -0,0 +1,23 @@
1
+ import { notFound } from "next/navigation";
2
+ import { ExperimentDetail } from "@/components/ExperimentDetail";
3
+ import { getExperimentDetail } from "@/lib/data";
4
+
5
+ export const dynamic = "force-dynamic";
6
+
7
+ export default async function ExperimentPage({
8
+ params,
9
+ }: {
10
+ params: Promise<{ name: string; timestamp: string }>;
11
+ }) {
12
+ const { name, timestamp } = await params;
13
+ const decodedName = decodeURIComponent(name);
14
+ const decodedTimestamp = decodeURIComponent(timestamp);
15
+
16
+ const data = getExperimentDetail(decodedName, decodedTimestamp);
17
+
18
+ if (!data) {
19
+ notFound();
20
+ }
21
+
22
+ return <ExperimentDetail data={data} />;
23
+ }
@@ -0,0 +1,28 @@
1
+ import { ExperimentList } from "@/components/ExperimentList";
2
+ import { listExperiments } from "@/lib/data";
3
+
4
+ export const dynamic = "force-dynamic";
5
+
6
+ const LIMIT = 20;
7
+
8
+ export default async function ExperimentsPage({
9
+ searchParams,
10
+ }: {
11
+ searchParams: Promise<{ all?: string }>;
12
+ }) {
13
+ const { all } = await searchParams;
14
+ const showAll = all !== undefined;
15
+ const { items: experiments, total } = listExperiments(showAll ? undefined : LIMIT);
16
+
17
+ return (
18
+ <div className="space-y-6">
19
+ <div>
20
+ <h1 className="text-2xl font-bold tracking-tight">Experiments</h1>
21
+ <p className="text-muted-foreground mt-1">
22
+ Browse and inspect your agent evaluation results.
23
+ </p>
24
+ </div>
25
+ <ExperimentList experiments={experiments} total={total} showAll={showAll} />
26
+ </div>
27
+ );
28
+ }
@@ -0,0 +1,126 @@
1
+ @import "tailwindcss";
2
+ @import "tw-animate-css";
3
+ @import "shadcn/tailwind.css";
4
+
5
+ @custom-variant dark (&:is(.dark *));
6
+
7
+ @theme inline {
8
+ --color-background: var(--background);
9
+ --color-foreground: var(--foreground);
10
+ --font-sans: var(--font-sans);
11
+ --font-mono: var(--font-geist-mono);
12
+ --color-sidebar-ring: var(--sidebar-ring);
13
+ --color-sidebar-border: var(--sidebar-border);
14
+ --color-sidebar-accent-foreground: var(--sidebar-accent-foreground);
15
+ --color-sidebar-accent: var(--sidebar-accent);
16
+ --color-sidebar-primary-foreground: var(--sidebar-primary-foreground);
17
+ --color-sidebar-primary: var(--sidebar-primary);
18
+ --color-sidebar-foreground: var(--sidebar-foreground);
19
+ --color-sidebar: var(--sidebar);
20
+ --color-chart-5: var(--chart-5);
21
+ --color-chart-4: var(--chart-4);
22
+ --color-chart-3: var(--chart-3);
23
+ --color-chart-2: var(--chart-2);
24
+ --color-chart-1: var(--chart-1);
25
+ --color-ring: var(--ring);
26
+ --color-input: var(--input);
27
+ --color-border: var(--border);
28
+ --color-destructive: var(--destructive);
29
+ --color-accent-foreground: var(--accent-foreground);
30
+ --color-accent: var(--accent);
31
+ --color-muted-foreground: var(--muted-foreground);
32
+ --color-muted: var(--muted);
33
+ --color-secondary-foreground: var(--secondary-foreground);
34
+ --color-secondary: var(--secondary);
35
+ --color-primary-foreground: var(--primary-foreground);
36
+ --color-primary: var(--primary);
37
+ --color-popover-foreground: var(--popover-foreground);
38
+ --color-popover: var(--popover);
39
+ --color-card-foreground: var(--card-foreground);
40
+ --color-card: var(--card);
41
+ --radius-sm: calc(var(--radius) - 4px);
42
+ --radius-md: calc(var(--radius) - 2px);
43
+ --radius-lg: var(--radius);
44
+ --radius-xl: calc(var(--radius) + 4px);
45
+ --radius-2xl: calc(var(--radius) + 8px);
46
+ --radius-3xl: calc(var(--radius) + 12px);
47
+ --radius-4xl: calc(var(--radius) + 16px);
48
+ }
49
+
50
+ :root {
51
+ --background: oklch(1 0 0);
52
+ --foreground: oklch(0.141 0.005 285.823);
53
+ --card: oklch(1 0 0);
54
+ --card-foreground: oklch(0.141 0.005 285.823);
55
+ --popover: oklch(1 0 0);
56
+ --popover-foreground: oklch(0.141 0.005 285.823);
57
+ --primary: oklch(0.648 0.2 131.684);
58
+ --primary-foreground: oklch(0.986 0.031 120.757);
59
+ --secondary: oklch(0.967 0.001 286.375);
60
+ --secondary-foreground: oklch(0.21 0.006 285.885);
61
+ --muted: oklch(0.967 0.001 286.375);
62
+ --muted-foreground: oklch(0.552 0.016 285.938);
63
+ --accent: oklch(0.967 0.001 286.375);
64
+ --accent-foreground: oklch(0.21 0.006 285.885);
65
+ --destructive: oklch(0.577 0.245 27.325);
66
+ --border: oklch(0.92 0.004 286.32);
67
+ --input: oklch(0.92 0.004 286.32);
68
+ --ring: oklch(0.705 0.015 286.067);
69
+ --chart-1: oklch(0.871 0.15 154.449);
70
+ --chart-2: oklch(0.723 0.219 149.579);
71
+ --chart-3: oklch(0.627 0.194 149.214);
72
+ --chart-4: oklch(0.527 0.154 150.069);
73
+ --chart-5: oklch(0.448 0.119 151.328);
74
+ --radius: 0.45rem;
75
+ --sidebar: oklch(0.985 0 0);
76
+ --sidebar-foreground: oklch(0.141 0.005 285.823);
77
+ --sidebar-primary: oklch(0.648 0.2 131.684);
78
+ --sidebar-primary-foreground: oklch(0.986 0.031 120.757);
79
+ --sidebar-accent: oklch(0.967 0.001 286.375);
80
+ --sidebar-accent-foreground: oklch(0.21 0.006 285.885);
81
+ --sidebar-border: oklch(0.92 0.004 286.32);
82
+ --sidebar-ring: oklch(0.705 0.015 286.067);
83
+ }
84
+
85
+ .dark {
86
+ --background: oklch(0.141 0.005 285.823);
87
+ --foreground: oklch(0.985 0 0);
88
+ --card: oklch(0.21 0.006 285.885);
89
+ --card-foreground: oklch(0.985 0 0);
90
+ --popover: oklch(0.21 0.006 285.885);
91
+ --popover-foreground: oklch(0.985 0 0);
92
+ --primary: oklch(0.648 0.2 131.684);
93
+ --primary-foreground: oklch(0.986 0.031 120.757);
94
+ --secondary: oklch(0.274 0.006 286.033);
95
+ --secondary-foreground: oklch(0.985 0 0);
96
+ --muted: oklch(0.274 0.006 286.033);
97
+ --muted-foreground: oklch(0.705 0.015 286.067);
98
+ --accent: oklch(0.274 0.006 286.033);
99
+ --accent-foreground: oklch(0.985 0 0);
100
+ --destructive: oklch(0.704 0.191 22.216);
101
+ --border: oklch(1 0 0 / 10%);
102
+ --input: oklch(1 0 0 / 15%);
103
+ --ring: oklch(0.552 0.016 285.938);
104
+ --chart-1: oklch(0.871 0.15 154.449);
105
+ --chart-2: oklch(0.723 0.219 149.579);
106
+ --chart-3: oklch(0.627 0.194 149.214);
107
+ --chart-4: oklch(0.527 0.154 150.069);
108
+ --chart-5: oklch(0.448 0.119 151.328);
109
+ --sidebar: oklch(0.21 0.006 285.885);
110
+ --sidebar-foreground: oklch(0.985 0 0);
111
+ --sidebar-primary: oklch(0.768 0.233 130.85);
112
+ --sidebar-primary-foreground: oklch(0.986 0.031 120.757);
113
+ --sidebar-accent: oklch(0.274 0.006 286.033);
114
+ --sidebar-accent-foreground: oklch(0.985 0 0);
115
+ --sidebar-border: oklch(1 0 0 / 10%);
116
+ --sidebar-ring: oklch(0.552 0.016 285.938);
117
+ }
118
+
119
+ @layer base {
120
+ * {
121
+ @apply border-border outline-ring/50;
122
+ }
123
+ body {
124
+ @apply bg-background text-foreground;
125
+ }
126
+ }
package/app/layout.tsx ADDED
@@ -0,0 +1,102 @@
1
+ import type { Metadata } from "next";
2
+ import { Geist, Geist_Mono, Figtree } from "next/font/google";
3
+ import Link from "next/link";
4
+ import "./globals.css";
5
+
6
+ const figtree = Figtree({ subsets: ["latin"], variable: "--font-sans" });
7
+
8
+ const geistSans = Geist({
9
+ variable: "--font-geist-sans",
10
+ subsets: ["latin"],
11
+ });
12
+
13
+ const geistMono = Geist_Mono({
14
+ variable: "--font-geist-mono",
15
+ subsets: ["latin"],
16
+ });
17
+
18
+ export const metadata: Metadata = {
19
+ title: "Agent Eval Playground",
20
+ description: "Browse experiment results, inspect agent transcripts, and compare runs",
21
+ };
22
+
23
+ export default function RootLayout({
24
+ children,
25
+ }: Readonly<{
26
+ children: React.ReactNode;
27
+ }>) {
28
+ return (
29
+ <html lang="en" className={`dark ${figtree.variable}`}>
30
+ <body className={`${geistSans.variable} ${geistMono.variable} antialiased`}>
31
+ <div className="min-h-screen bg-background">
32
+ <header className="sticky top-0 z-50 bg-background">
33
+ <div className="flex h-14 items-center justify-between px-4 gap-6">
34
+ <div className="flex items-center gap-2">
35
+ <Link href="https://vercel.com" title="Made with love by Vercel">
36
+ <svg
37
+ data-testid="geist-icon"
38
+ height="18"
39
+ strokeLinejoin="round"
40
+ viewBox="0 0 16 16"
41
+ width="18"
42
+ style={{ color: "currentcolor" }}
43
+ >
44
+ <path
45
+ fillRule="evenodd"
46
+ clipRule="evenodd"
47
+ d="M8 1L16 15H0L8 1Z"
48
+ fill="currentColor"
49
+ />
50
+ </svg>
51
+ </Link>
52
+ <span className="text-(--ds-gray-500)">
53
+ <svg
54
+ data-testid="geist-icon"
55
+ height="16"
56
+ strokeLinejoin="round"
57
+ viewBox="0 0 16 16"
58
+ width="16"
59
+ style={{ color: "currentcolor" }}
60
+ >
61
+ <path
62
+ fillRule="evenodd"
63
+ clipRule="evenodd"
64
+ d="M4.01526 15.3939L4.3107 14.7046L10.3107 0.704556L10.6061 0.0151978L11.9849 0.606077L11.6894 1.29544L5.68942 15.2954L5.39398 15.9848L4.01526 15.3939Z"
65
+ fill="currentColor"
66
+ />
67
+ </svg>
68
+ </span>
69
+ <Link href="/">
70
+ <span className="font-medium tracking-tight text-lg">agent-eval</span>
71
+ </Link>
72
+ </div>
73
+ <nav className="flex items-baseline gap-4">
74
+ <Link
75
+ href="/experiments"
76
+ className="text-sm text-muted-foreground hover:text-foreground transition-colors"
77
+ >
78
+ Experiments
79
+ </Link>
80
+ <Link
81
+ href="/compare"
82
+ className="text-sm text-muted-foreground hover:text-foreground transition-colors"
83
+ >
84
+ Compare
85
+ </Link>
86
+ <Link
87
+ href="/evals"
88
+ className="text-sm text-muted-foreground hover:text-foreground transition-colors"
89
+ >
90
+ Evals
91
+ </Link>
92
+ </nav>
93
+ </div>
94
+ </header>
95
+ <main className="mx-auto max-w-5xl px-6 py-8 text-sm">
96
+ {children}
97
+ </main>
98
+ </div>
99
+ </body>
100
+ </html>
101
+ );
102
+ }
package/app/page.tsx ADDED
@@ -0,0 +1,179 @@
1
+ import Link from "next/link";
2
+ import { listExperiments, listEvals } from "@/lib/data";
3
+ import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
4
+ import { Badge } from "@/components/ui/badge";
5
+
6
+ export const dynamic = "force-dynamic";
7
+
8
+ export default async function HomePage() {
9
+ const { items: experiments, total: totalExperiments } = listExperiments(6);
10
+ const { items: evals, total: totalEvals } = listEvals(6);
11
+
12
+ const totalRuns = experiments.reduce((sum, e) => sum + e.timestamps.length, 0);
13
+
14
+ // Compute aggregate pass rate across all latest experiment runs
15
+ const totalPassed = experiments.reduce((sum, e) => sum + (e.latestPassedRuns ?? 0), 0);
16
+ const totalAttempted = experiments.reduce((sum, e) => sum + (e.latestTotalRuns ?? 0), 0);
17
+ const overallPassRate = totalAttempted > 0 ? (totalPassed / totalAttempted) * 100 : 0;
18
+
19
+ return (
20
+ <div className="space-y-10">
21
+ {/* Hero */}
22
+ <div>
23
+ <h1 className="text-2xl font-bold tracking-tight">Overview</h1>
24
+ <p className="text-muted-foreground mt-1">
25
+ Playground for your agent evals to view experiments, evals, and compare runs.
26
+ </p>
27
+ </div>
28
+
29
+ {/* Stats */}
30
+ <div className="grid grid-cols-1 sm:grid-cols-4 gap-4">
31
+ <Card>
32
+ <CardContent className="py-4 px-4">
33
+ <div className="text-xs text-muted-foreground">Experiments</div>
34
+ <div className="text-2xl font-bold mt-1">{totalExperiments}</div>
35
+ </CardContent>
36
+ </Card>
37
+ <Card>
38
+ <CardContent className="py-4 px-4">
39
+ <div className="text-xs text-muted-foreground">Total Runs</div>
40
+ <div className="text-2xl font-bold mt-1">{totalRuns}</div>
41
+ </CardContent>
42
+ </Card>
43
+ <Card>
44
+ <CardContent className="py-4 px-4">
45
+ <div className="text-xs text-muted-foreground">Eval Fixtures</div>
46
+ <div className="text-2xl font-bold mt-1">{totalEvals}</div>
47
+ </CardContent>
48
+ </Card>
49
+ <Card>
50
+ <CardContent className="py-4 px-4">
51
+ <div className="text-xs text-muted-foreground">Latest Pass Rate</div>
52
+ <div className="text-2xl font-bold mt-1">
53
+ {totalAttempted > 0 ? `${overallPassRate.toFixed(0)}%` : "—"}
54
+ </div>
55
+ </CardContent>
56
+ </Card>
57
+ </div>
58
+
59
+ {/* Recent Experiments */}
60
+ <section className="space-y-3">
61
+ <div className="flex items-center justify-between">
62
+ <h2 className="text-lg font-semibold tracking-tight">Recent Experiments</h2>
63
+ <Link href="/experiments" className="text-sm text-muted-foreground hover:text-foreground transition-colors">
64
+ View all →
65
+ </Link>
66
+ </div>
67
+ {experiments.length === 0 ? (
68
+ <Card>
69
+ <CardContent className="py-8 text-center text-muted-foreground">
70
+ No experiments yet. Run <code className="text-foreground bg-muted px-1.5 py-0.5 rounded text-xs">agent-eval</code> to get started.
71
+ </CardContent>
72
+ </Card>
73
+ ) : (
74
+ <div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-4">
75
+ {experiments.slice(0, 6).map((exp) => (
76
+ <Link
77
+ key={exp.name}
78
+ href={exp.latestTimestamp
79
+ ? `/experiments/${encodeURIComponent(exp.name)}/${encodeURIComponent(exp.latestTimestamp)}`
80
+ : "/experiments"}
81
+ className="block cursor-pointer"
82
+ >
83
+ <Card className="transition-colors hover:bg-muted">
84
+ <CardHeader className="pb-2">
85
+ <CardTitle className="text-sm">{exp.name}</CardTitle>
86
+ </CardHeader>
87
+ <CardContent>
88
+ <div className="flex items-center gap-2">
89
+ {exp.latestPassRate !== undefined ? (
90
+ <Badge
91
+ variant={
92
+ exp.latestPassRate === 100
93
+ ? "default"
94
+ : exp.latestPassRate >= 50
95
+ ? "secondary"
96
+ : "destructive"
97
+ }
98
+ >
99
+ {exp.latestPassRate.toFixed(0)}%
100
+ </Badge>
101
+ ) : (
102
+ <Badge variant="outline">—</Badge>
103
+ )}
104
+ <span className="text-xs text-muted-foreground">
105
+ {exp.timestamps.length} run{exp.timestamps.length !== 1 ? "s" : ""}
106
+ </span>
107
+ </div>
108
+ </CardContent>
109
+ </Card>
110
+ </Link>
111
+ ))}
112
+ </div>
113
+ )}
114
+ </section>
115
+
116
+ {/* Evals Preview */}
117
+ <section className="space-y-3">
118
+ <div className="flex items-center justify-between">
119
+ <h2 className="text-lg font-semibold tracking-tight">Eval Fixtures</h2>
120
+ <Link href="/evals" className="text-sm text-muted-foreground hover:text-foreground transition-colors">
121
+ View all →
122
+ </Link>
123
+ </div>
124
+ {evals.length === 0 ? (
125
+ <Card>
126
+ <CardContent className="py-8 text-center text-muted-foreground">
127
+ No evals found. Create evals in your <code className="text-foreground bg-muted px-1.5 py-0.5 rounded text-xs">evals/</code> directory.
128
+ </CardContent>
129
+ </Card>
130
+ ) : (
131
+ <div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-4">
132
+ {evals.slice(0, 6).map((evalInfo) => (
133
+ <Link
134
+ key={evalInfo.name}
135
+ href={`/evals/${encodeURIComponent(evalInfo.name)}`}
136
+ className="block cursor-pointer"
137
+ >
138
+ <Card className="transition-colors hover:bg-muted">
139
+ <CardHeader className="pb-2">
140
+ <CardTitle className="text-sm">{evalInfo.name}</CardTitle>
141
+ </CardHeader>
142
+ <CardContent>
143
+ {evalInfo.prompt ? (
144
+ <p className="text-xs text-muted-foreground line-clamp-2">
145
+ {evalInfo.prompt.slice(0, 150)}
146
+ {evalInfo.prompt.length > 150 ? "..." : ""}
147
+ </p>
148
+ ) : (
149
+ <p className="text-xs text-muted-foreground">No prompt</p>
150
+ )}
151
+ </CardContent>
152
+ </Card>
153
+ </Link>
154
+ ))}
155
+ </div>
156
+ )}
157
+ </section>
158
+
159
+ {/* Compare CTA */}
160
+ <section className="space-y-3">
161
+ <div className="flex items-center justify-between">
162
+ <h2 className="text-lg font-semibold tracking-tight">Compare</h2>
163
+ </div>
164
+ <Link href="/compare" className="block cursor-pointer">
165
+ <Card className="transition-colors hover:bg-muted">
166
+ <CardContent className="py-8 text-center">
167
+ <p className="mx-auto max-w-sm text-sm text-muted-foreground">
168
+ Compare two experiment runs side-by-side to see pass rate deltas, duration changes, and per-eval breakdowns.
169
+ </p>
170
+ <p className="text-xs text-foreground mt-2">
171
+ Open Compare →
172
+ </p>
173
+ </CardContent>
174
+ </Card>
175
+ </Link>
176
+ </section>
177
+ </div>
178
+ );
179
+ }
@@ -0,0 +1,43 @@
1
+ import { notFound } from "next/navigation";
2
+ import { TranscriptPage } from "@/components/TranscriptPage";
3
+ import { getTranscript } from "@/lib/data";
4
+
5
+ export const dynamic = "force-dynamic";
6
+
7
+ export default async function TranscriptRoute({
8
+ params,
9
+ }: {
10
+ params: Promise<{
11
+ experiment: string;
12
+ timestamp: string;
13
+ evalName: string;
14
+ run: string;
15
+ }>;
16
+ }) {
17
+ const { experiment, timestamp, evalName, run } = await params;
18
+ const decodedExperiment = decodeURIComponent(experiment);
19
+ const decodedTimestamp = decodeURIComponent(timestamp);
20
+ const decodedEvalName = decodeURIComponent(evalName);
21
+ const decodedRun = decodeURIComponent(run);
22
+
23
+ const transcript = getTranscript(
24
+ decodedExperiment,
25
+ decodedTimestamp,
26
+ decodedEvalName,
27
+ decodedRun
28
+ );
29
+
30
+ if (!transcript) {
31
+ notFound();
32
+ }
33
+
34
+ return (
35
+ <TranscriptPage
36
+ experiment={decodedExperiment}
37
+ timestamp={decodedTimestamp}
38
+ evalName={decodedEvalName}
39
+ run={decodedRun}
40
+ transcript={transcript}
41
+ />
42
+ );
43
+ }