@vercel/agent-eval-playground 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -0
- package/app/compare/page.tsx +40 -0
- package/app/evals/[name]/page.tsx +22 -0
- package/app/evals/page.tsx +18 -0
- package/app/experiments/[name]/[timestamp]/page.tsx +23 -0
- package/app/experiments/page.tsx +28 -0
- package/app/globals.css +126 -0
- package/app/layout.tsx +102 -0
- package/app/page.tsx +179 -0
- package/app/transcript/[experiment]/[timestamp]/[evalName]/[run]/page.tsx +43 -0
- package/bin.mjs +86 -0
- package/components/ComparePage.tsx +312 -0
- package/components/EvalDetail.tsx +114 -0
- package/components/EvalsPage.tsx +80 -0
- package/components/ExperimentDetail.tsx +162 -0
- package/components/ExperimentList.tsx +103 -0
- package/components/O11ySummary.tsx +114 -0
- package/components/RunResultCard.tsx +72 -0
- package/components/ShowMore.tsx +60 -0
- package/components/TranscriptPage.tsx +46 -0
- package/components/TranscriptViewer.tsx +201 -0
- package/components/ui/alert-dialog.tsx +184 -0
- package/components/ui/badge.tsx +45 -0
- package/components/ui/button.tsx +60 -0
- package/components/ui/card.tsx +94 -0
- package/components/ui/collapsible.tsx +34 -0
- package/components/ui/combobox.tsx +297 -0
- package/components/ui/dropdown-menu.tsx +269 -0
- package/components/ui/field.tsx +227 -0
- package/components/ui/input-group.tsx +147 -0
- package/components/ui/input.tsx +19 -0
- package/components/ui/label.tsx +24 -0
- package/components/ui/progress.tsx +31 -0
- package/components/ui/scroll-area.tsx +58 -0
- package/components/ui/select.tsx +191 -0
- package/components/ui/separator.tsx +28 -0
- package/components/ui/table.tsx +116 -0
- package/components/ui/tabs.tsx +91 -0
- package/components/ui/textarea.tsx +18 -0
- package/components/ui/tooltip.tsx +57 -0
- package/components.json +25 -0
- package/lib/data.ts +297 -0
- package/lib/types.ts +113 -0
- package/lib/utils.ts +6 -0
- package/next.config.ts +5 -0
- package/package.json +51 -0
- package/postcss.config.mjs +7 -0
- package/public/vercel.svg +1 -0
- package/tsconfig.json +42 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import Link from "next/link";
|
|
2
|
+
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
|
3
|
+
import { Badge } from "@/components/ui/badge";
|
|
4
|
+
import { Progress } from "@/components/ui/progress";
|
|
5
|
+
import { Separator } from "@/components/ui/separator";
|
|
6
|
+
import { RunResultCard } from "@/components/RunResultCard";
|
|
7
|
+
|
|
8
|
+
interface EvalDetail {
|
|
9
|
+
name: string;
|
|
10
|
+
totalRuns: number;
|
|
11
|
+
passedRuns: number;
|
|
12
|
+
passRate: number;
|
|
13
|
+
meanDuration: number;
|
|
14
|
+
runs: { name: string; result: RunResult | null }[];
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
interface RunResult {
|
|
18
|
+
status: "passed" | "failed";
|
|
19
|
+
error?: string;
|
|
20
|
+
duration: number;
|
|
21
|
+
o11y?: {
|
|
22
|
+
totalToolCalls: number;
|
|
23
|
+
thinkingBlocks: number;
|
|
24
|
+
errors: string[];
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
interface ExperimentDetailData {
|
|
29
|
+
name: string;
|
|
30
|
+
timestamp: string;
|
|
31
|
+
evals: EvalDetail[];
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
interface ExperimentDetailProps {
|
|
35
|
+
data: ExperimentDetailData;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function ExperimentDetail({ data }: ExperimentDetailProps) {
|
|
39
|
+
const totalEvals = data.evals.length;
|
|
40
|
+
const passedEvals = data.evals.filter(
|
|
41
|
+
(e) => e.passedRuns === e.totalRuns
|
|
42
|
+
).length;
|
|
43
|
+
const overallPassRate =
|
|
44
|
+
data.evals.reduce((sum, e) => sum + e.passRate, 0) / (totalEvals || 1);
|
|
45
|
+
|
|
46
|
+
return (
|
|
47
|
+
<div className="space-y-6">
|
|
48
|
+
{/* Header */}
|
|
49
|
+
<div>
|
|
50
|
+
<div className="flex items-center gap-2 text-sm text-muted-foreground mb-1">
|
|
51
|
+
<Link href="/experiments" className="cursor-pointer hover:underline underline-offset-4">
|
|
52
|
+
Experiments
|
|
53
|
+
</Link>
|
|
54
|
+
<span>/</span>
|
|
55
|
+
<span>{data.name}</span>
|
|
56
|
+
<span>/</span>
|
|
57
|
+
<span>{formatTimestamp(data.timestamp)}</span>
|
|
58
|
+
</div>
|
|
59
|
+
<h1 className="text-2xl font-bold tracking-tight">{data.name}</h1>
|
|
60
|
+
</div>
|
|
61
|
+
|
|
62
|
+
{/* Summary cards */}
|
|
63
|
+
<div className="grid grid-cols-1 md:grid-cols-4 gap-4">
|
|
64
|
+
<Card>
|
|
65
|
+
<CardContent className="py-4 px-4">
|
|
66
|
+
<div className="text-xs text-muted-foreground">
|
|
67
|
+
Overall Pass Rate
|
|
68
|
+
</div>
|
|
69
|
+
<div className="text-2xl font-bold mt-1">
|
|
70
|
+
{overallPassRate.toFixed(0)}%
|
|
71
|
+
</div>
|
|
72
|
+
<Progress value={overallPassRate} className="mt-2 h-1.5" />
|
|
73
|
+
</CardContent>
|
|
74
|
+
</Card>
|
|
75
|
+
<Card>
|
|
76
|
+
<CardContent className="py-4 px-4">
|
|
77
|
+
<div className="text-xs text-muted-foreground">Evals</div>
|
|
78
|
+
<div className="text-2xl font-bold mt-1">
|
|
79
|
+
{passedEvals}/{totalEvals}
|
|
80
|
+
</div>
|
|
81
|
+
<div className="text-xs text-muted-foreground mt-1">passed</div>
|
|
82
|
+
</CardContent>
|
|
83
|
+
</Card>
|
|
84
|
+
<Card>
|
|
85
|
+
<CardContent className="py-4 px-4">
|
|
86
|
+
<div className="text-xs text-muted-foreground">Avg Duration</div>
|
|
87
|
+
<div className="text-2xl font-bold mt-1">
|
|
88
|
+
{(
|
|
89
|
+
data.evals.reduce((s, e) => s + e.meanDuration, 0) /
|
|
90
|
+
(totalEvals || 1)
|
|
91
|
+
).toFixed(1)}
|
|
92
|
+
s
|
|
93
|
+
</div>
|
|
94
|
+
</CardContent>
|
|
95
|
+
</Card>
|
|
96
|
+
<Card>
|
|
97
|
+
<CardContent className="py-4 px-4">
|
|
98
|
+
<div className="text-xs text-muted-foreground">Timestamp</div>
|
|
99
|
+
<div className="text-sm font-medium mt-1">
|
|
100
|
+
{formatTimestamp(data.timestamp)}
|
|
101
|
+
</div>
|
|
102
|
+
</CardContent>
|
|
103
|
+
</Card>
|
|
104
|
+
</div>
|
|
105
|
+
|
|
106
|
+
<Separator />
|
|
107
|
+
|
|
108
|
+
{/* Per-eval breakdown */}
|
|
109
|
+
<div className="space-y-6">
|
|
110
|
+
{data.evals.map((evalDetail) => (
|
|
111
|
+
<Card key={evalDetail.name}>
|
|
112
|
+
<CardHeader>
|
|
113
|
+
<div className="flex items-center justify-between">
|
|
114
|
+
<div className="flex items-center gap-3">
|
|
115
|
+
<CardTitle className="text-lg">{evalDetail.name}</CardTitle>
|
|
116
|
+
<Badge
|
|
117
|
+
variant={
|
|
118
|
+
evalDetail.passedRuns === evalDetail.totalRuns
|
|
119
|
+
? "default"
|
|
120
|
+
: "destructive"
|
|
121
|
+
}
|
|
122
|
+
>
|
|
123
|
+
{evalDetail.passedRuns}/{evalDetail.totalRuns} passed
|
|
124
|
+
</Badge>
|
|
125
|
+
</div>
|
|
126
|
+
<div className="text-sm text-muted-foreground">
|
|
127
|
+
avg {evalDetail.meanDuration.toFixed(1)}s
|
|
128
|
+
</div>
|
|
129
|
+
</div>
|
|
130
|
+
<Progress value={evalDetail.passRate} className="h-1.5 mt-2" />
|
|
131
|
+
</CardHeader>
|
|
132
|
+
<CardContent>
|
|
133
|
+
<div className="space-y-2">
|
|
134
|
+
{evalDetail.runs.map((run) => (
|
|
135
|
+
<RunResultCard
|
|
136
|
+
key={run.name}
|
|
137
|
+
runName={run.name}
|
|
138
|
+
result={run.result}
|
|
139
|
+
experiment={data.name}
|
|
140
|
+
timestamp={data.timestamp}
|
|
141
|
+
evalName={evalDetail.name}
|
|
142
|
+
/>
|
|
143
|
+
))}
|
|
144
|
+
</div>
|
|
145
|
+
</CardContent>
|
|
146
|
+
</Card>
|
|
147
|
+
))}
|
|
148
|
+
</div>
|
|
149
|
+
</div>
|
|
150
|
+
);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function formatTimestamp(ts: string): string {
|
|
154
|
+
try {
|
|
155
|
+
const isoString = ts.replace(/T(\d{2})-(\d{2})-(\d{2})/, "T$1:$2:$3");
|
|
156
|
+
const date = new Date(isoString);
|
|
157
|
+
if (isNaN(date.getTime())) return ts;
|
|
158
|
+
return date.toLocaleString();
|
|
159
|
+
} catch {
|
|
160
|
+
return ts;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import Link from "next/link";
|
|
2
|
+
import { Badge } from "@/components/ui/badge";
|
|
3
|
+
import { Card, CardContent } from "@/components/ui/card";
|
|
4
|
+
import { ShowMore } from "@/components/ShowMore";
|
|
5
|
+
|
|
6
|
+
interface ExperimentInfo {
|
|
7
|
+
name: string;
|
|
8
|
+
timestamps: string[];
|
|
9
|
+
latestTimestamp: string | null;
|
|
10
|
+
latestPassRate?: number;
|
|
11
|
+
latestTotalRuns?: number;
|
|
12
|
+
latestPassedRuns?: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
interface ExperimentListProps {
|
|
16
|
+
experiments: ExperimentInfo[];
|
|
17
|
+
total: number;
|
|
18
|
+
showAll: boolean;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function ExperimentList({ experiments, total, showAll }: ExperimentListProps) {
|
|
22
|
+
if (experiments.length === 0) {
|
|
23
|
+
return (
|
|
24
|
+
<Card>
|
|
25
|
+
<CardContent className="py-12 text-center">
|
|
26
|
+
<p className="text-muted-foreground text-lg">No experiments found</p>
|
|
27
|
+
<p className="text-muted-foreground text-sm mt-2">
|
|
28
|
+
Run an experiment with <code className="text-foreground bg-muted px-1.5 py-0.5 rounded text-xs">agent-eval <config></code> to see results here.
|
|
29
|
+
</p>
|
|
30
|
+
</CardContent>
|
|
31
|
+
</Card>
|
|
32
|
+
);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return (
|
|
36
|
+
<Card>
|
|
37
|
+
<CardContent className="pt-0">
|
|
38
|
+
<div>
|
|
39
|
+
{/* Header */}
|
|
40
|
+
<div className="grid grid-cols-[1fr_auto_auto_auto] gap-4 px-3 py-2 text-xs text-muted-foreground font-medium border-b border-border">
|
|
41
|
+
<span>Name</span>
|
|
42
|
+
<span className="w-12 text-right">Runs</span>
|
|
43
|
+
<span className="w-24">Pass Rate</span>
|
|
44
|
+
<span className="w-44">Latest Run</span>
|
|
45
|
+
</div>
|
|
46
|
+
{/* Rows */}
|
|
47
|
+
<ShowMore total={total} showAllHref={showAll ? undefined : "/experiments?all"}>
|
|
48
|
+
{experiments.map((exp) => (
|
|
49
|
+
<Link
|
|
50
|
+
key={exp.name}
|
|
51
|
+
href={
|
|
52
|
+
exp.latestTimestamp
|
|
53
|
+
? `/experiments/${encodeURIComponent(exp.name)}/${encodeURIComponent(exp.latestTimestamp)}`
|
|
54
|
+
: "#"
|
|
55
|
+
}
|
|
56
|
+
className="grid grid-cols-[1fr_auto_auto_auto] gap-4 items-center px-3 py-2.5 cursor-pointer transition-colors hover:bg-muted rounded-md"
|
|
57
|
+
>
|
|
58
|
+
<span className="font-medium truncate">{exp.name}</span>
|
|
59
|
+
<span className="w-12 text-right text-muted-foreground">{exp.timestamps.length}</span>
|
|
60
|
+
<span className="w-24">
|
|
61
|
+
{exp.latestPassRate !== undefined ? (
|
|
62
|
+
<span className="flex items-center gap-2">
|
|
63
|
+
<Badge
|
|
64
|
+
variant={
|
|
65
|
+
exp.latestPassRate === 100
|
|
66
|
+
? "default"
|
|
67
|
+
: exp.latestPassRate >= 50
|
|
68
|
+
? "secondary"
|
|
69
|
+
: "destructive"
|
|
70
|
+
}
|
|
71
|
+
>
|
|
72
|
+
{exp.latestPassRate.toFixed(0)}%
|
|
73
|
+
</Badge>
|
|
74
|
+
<span className="text-xs text-muted-foreground">
|
|
75
|
+
{exp.latestPassedRuns}/{exp.latestTotalRuns}
|
|
76
|
+
</span>
|
|
77
|
+
</span>
|
|
78
|
+
) : (
|
|
79
|
+
<span className="text-muted-foreground">--</span>
|
|
80
|
+
)}
|
|
81
|
+
</span>
|
|
82
|
+
<span className="w-44 text-xs text-muted-foreground">
|
|
83
|
+
{exp.latestTimestamp ? formatTimestamp(exp.latestTimestamp) : "--"}
|
|
84
|
+
</span>
|
|
85
|
+
</Link>
|
|
86
|
+
))}
|
|
87
|
+
</ShowMore>
|
|
88
|
+
</div>
|
|
89
|
+
</CardContent>
|
|
90
|
+
</Card>
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function formatTimestamp(ts: string): string {
|
|
95
|
+
try {
|
|
96
|
+
const isoString = ts.replace(/T(\d{2})-(\d{2})-(\d{2})/, "T$1:$2:$3");
|
|
97
|
+
const date = new Date(isoString);
|
|
98
|
+
if (isNaN(date.getTime())) return ts;
|
|
99
|
+
return date.toLocaleString();
|
|
100
|
+
} catch {
|
|
101
|
+
return ts;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { Badge } from "@/components/ui/badge";
|
|
2
|
+
import type { TranscriptSummary, ToolName } from "@/lib/types";
|
|
3
|
+
|
|
4
|
+
interface O11ySummaryProps {
|
|
5
|
+
summary: TranscriptSummary;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
const TOOL_LABELS: Record<ToolName, string> = {
|
|
9
|
+
file_read: "File Read",
|
|
10
|
+
file_write: "File Write",
|
|
11
|
+
file_edit: "File Edit",
|
|
12
|
+
shell: "Shell",
|
|
13
|
+
web_fetch: "Web Fetch",
|
|
14
|
+
web_search: "Web Search",
|
|
15
|
+
glob: "Glob",
|
|
16
|
+
grep: "Grep",
|
|
17
|
+
list_dir: "List Dir",
|
|
18
|
+
agent_task: "Agent Task",
|
|
19
|
+
unknown: "Unknown",
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
export function O11ySummary({ summary }: O11ySummaryProps) {
|
|
23
|
+
const toolEntries = Object.entries(summary.toolCalls)
|
|
24
|
+
.filter(([, count]) => count > 0)
|
|
25
|
+
.sort(([, a], [, b]) => b - a);
|
|
26
|
+
|
|
27
|
+
return (
|
|
28
|
+
<div className="space-y-3 rounded-lg bg-muted/50 p-4">
|
|
29
|
+
{/* Inline stats */}
|
|
30
|
+
<div className="flex flex-wrap gap-x-5 gap-y-1 text-xs">
|
|
31
|
+
<span><span className="text-muted-foreground">Turns</span> <span className="font-medium">{summary.totalTurns}</span></span>
|
|
32
|
+
<span><span className="text-muted-foreground">Tool Calls</span> <span className="font-medium">{summary.totalToolCalls}</span></span>
|
|
33
|
+
<span><span className="text-muted-foreground">Thinking</span> <span className="font-medium">{summary.thinkingBlocks}</span></span>
|
|
34
|
+
<span>
|
|
35
|
+
<span className="text-muted-foreground">Errors</span>{" "}
|
|
36
|
+
<span className={`font-medium ${summary.errors.length > 0 ? "text-destructive" : ""}`}>{summary.errors.length}</span>
|
|
37
|
+
</span>
|
|
38
|
+
</div>
|
|
39
|
+
|
|
40
|
+
{/* Tool breakdown - inline */}
|
|
41
|
+
{toolEntries.length > 0 && (
|
|
42
|
+
<div className="flex flex-wrap gap-1.5">
|
|
43
|
+
{toolEntries.map(([tool, count]) => (
|
|
44
|
+
<Badge key={tool} variant="secondary" className="text-xs font-normal">
|
|
45
|
+
{TOOL_LABELS[tool as ToolName] ?? tool} <span className="font-mono ml-1">{count}</span>
|
|
46
|
+
</Badge>
|
|
47
|
+
))}
|
|
48
|
+
</div>
|
|
49
|
+
)}
|
|
50
|
+
|
|
51
|
+
{/* Files - inline */}
|
|
52
|
+
{(summary.filesRead.length > 0 || summary.filesModified.length > 0) && (
|
|
53
|
+
<div className="space-y-1.5 text-xs">
|
|
54
|
+
{summary.filesRead.length > 0 && (
|
|
55
|
+
<div className="flex flex-wrap gap-1 items-center">
|
|
56
|
+
<span className="text-muted-foreground shrink-0">Read ({summary.filesRead.length})</span>
|
|
57
|
+
{summary.filesRead.slice(0, 10).map((f) => (
|
|
58
|
+
<Badge key={f} variant="outline" className="text-xs font-mono font-normal">
|
|
59
|
+
{f}
|
|
60
|
+
</Badge>
|
|
61
|
+
))}
|
|
62
|
+
{summary.filesRead.length > 10 && (
|
|
63
|
+
<span className="text-muted-foreground">+{summary.filesRead.length - 10} more</span>
|
|
64
|
+
)}
|
|
65
|
+
</div>
|
|
66
|
+
)}
|
|
67
|
+
{summary.filesModified.length > 0 && (
|
|
68
|
+
<div className="flex flex-wrap gap-1 items-center">
|
|
69
|
+
<span className="text-muted-foreground shrink-0">Modified ({summary.filesModified.length})</span>
|
|
70
|
+
{summary.filesModified.slice(0, 10).map((f) => (
|
|
71
|
+
<Badge key={f} variant="outline" className="text-xs font-mono font-normal">
|
|
72
|
+
{f}
|
|
73
|
+
</Badge>
|
|
74
|
+
))}
|
|
75
|
+
{summary.filesModified.length > 10 && (
|
|
76
|
+
<span className="text-muted-foreground">+{summary.filesModified.length - 10} more</span>
|
|
77
|
+
)}
|
|
78
|
+
</div>
|
|
79
|
+
)}
|
|
80
|
+
</div>
|
|
81
|
+
)}
|
|
82
|
+
|
|
83
|
+
{/* Shell commands - compact */}
|
|
84
|
+
{summary.shellCommands.length > 0 && (
|
|
85
|
+
<div className="space-y-1 text-xs">
|
|
86
|
+
<span className="text-muted-foreground">Shell ({summary.shellCommands.length})</span>
|
|
87
|
+
<div className="flex flex-wrap gap-1">
|
|
88
|
+
{summary.shellCommands.slice(0, 10).map((cmd, i) => (
|
|
89
|
+
<Badge
|
|
90
|
+
key={i}
|
|
91
|
+
variant={cmd.exitCode === 0 ? "outline" : "destructive"}
|
|
92
|
+
className="text-xs font-mono font-normal max-w-64 truncate"
|
|
93
|
+
>
|
|
94
|
+
{cmd.command}
|
|
95
|
+
</Badge>
|
|
96
|
+
))}
|
|
97
|
+
</div>
|
|
98
|
+
</div>
|
|
99
|
+
)}
|
|
100
|
+
|
|
101
|
+
{/* Errors - compact */}
|
|
102
|
+
{summary.errors.length > 0 && (
|
|
103
|
+
<div className="space-y-1 text-xs">
|
|
104
|
+
<span className="text-destructive font-medium">Errors ({summary.errors.length})</span>
|
|
105
|
+
{summary.errors.map((err, i) => (
|
|
106
|
+
<div key={i} className="text-xs font-mono text-destructive truncate">
|
|
107
|
+
{err}
|
|
108
|
+
</div>
|
|
109
|
+
))}
|
|
110
|
+
</div>
|
|
111
|
+
)}
|
|
112
|
+
</div>
|
|
113
|
+
);
|
|
114
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import Link from "next/link";
|
|
2
|
+
import { Badge } from "@/components/ui/badge";
|
|
3
|
+
import { Card, CardContent } from "@/components/ui/card";
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
interface RunResult {
|
|
7
|
+
status: "passed" | "failed";
|
|
8
|
+
error?: string;
|
|
9
|
+
duration: number;
|
|
10
|
+
o11y?: {
|
|
11
|
+
totalToolCalls: number;
|
|
12
|
+
thinkingBlocks: number;
|
|
13
|
+
errors: string[];
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
interface RunResultCardProps {
|
|
18
|
+
runName: string;
|
|
19
|
+
result: RunResult | null;
|
|
20
|
+
experiment: string;
|
|
21
|
+
timestamp: string;
|
|
22
|
+
evalName: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function RunResultCard({
|
|
26
|
+
runName,
|
|
27
|
+
result,
|
|
28
|
+
experiment,
|
|
29
|
+
timestamp,
|
|
30
|
+
evalName,
|
|
31
|
+
}: RunResultCardProps) {
|
|
32
|
+
if (!result) {
|
|
33
|
+
return (
|
|
34
|
+
<Card className="opacity-50">
|
|
35
|
+
<CardContent className="py-3 px-4">
|
|
36
|
+
<span className="text-sm text-muted-foreground">{runName}: No result</span>
|
|
37
|
+
</CardContent>
|
|
38
|
+
</Card>
|
|
39
|
+
);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return (
|
|
43
|
+
<Link
|
|
44
|
+
href={`/transcript/${encodeURIComponent(experiment)}/${encodeURIComponent(timestamp)}/${encodeURIComponent(evalName)}/${encodeURIComponent(runName)}`}
|
|
45
|
+
className="block cursor-pointer"
|
|
46
|
+
>
|
|
47
|
+
<Card className="transition-colors hover:bg-muted">
|
|
48
|
+
<CardContent className="py-3 px-4 flex items-center justify-between">
|
|
49
|
+
<div className="flex items-center gap-3">
|
|
50
|
+
<span className="text-sm font-medium">{runName}</span>
|
|
51
|
+
<Badge
|
|
52
|
+
variant={result.status === "passed" ? "default" : "destructive"}
|
|
53
|
+
>
|
|
54
|
+
{result.status}
|
|
55
|
+
</Badge>
|
|
56
|
+
{result.error && (
|
|
57
|
+
<span className="text-xs text-destructive truncate max-w-64">
|
|
58
|
+
{result.error}
|
|
59
|
+
</span>
|
|
60
|
+
)}
|
|
61
|
+
</div>
|
|
62
|
+
<div className="flex items-center gap-4 text-xs text-muted-foreground">
|
|
63
|
+
{result.o11y && (
|
|
64
|
+
<span>{result.o11y.totalToolCalls} tool calls</span>
|
|
65
|
+
)}
|
|
66
|
+
<span>{result.duration.toFixed(1)}s</span>
|
|
67
|
+
</div>
|
|
68
|
+
</CardContent>
|
|
69
|
+
</Card>
|
|
70
|
+
</Link>
|
|
71
|
+
);
|
|
72
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"use client";
|
|
2
|
+
|
|
3
|
+
import { useState } from "react";
|
|
4
|
+
import Link from "next/link";
|
|
5
|
+
|
|
6
|
+
const DEFAULT_LIMIT = 50;
|
|
7
|
+
|
|
8
|
+
interface ShowMoreProps {
|
|
9
|
+
children: React.ReactNode[];
|
|
10
|
+
limit?: number;
|
|
11
|
+
/** Total count of items (when server limits the fetch). Shows a Link instead of client-side toggle. */
|
|
12
|
+
total?: number;
|
|
13
|
+
/** URL to navigate to when showing all items (server-driven mode). */
|
|
14
|
+
showAllHref?: string;
|
|
15
|
+
className?: string;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function ShowMore({
|
|
19
|
+
children,
|
|
20
|
+
limit = DEFAULT_LIMIT,
|
|
21
|
+
total,
|
|
22
|
+
showAllHref,
|
|
23
|
+
className,
|
|
24
|
+
}: ShowMoreProps) {
|
|
25
|
+
const [visibleCount, setVisibleCount] = useState(limit);
|
|
26
|
+
|
|
27
|
+
// Server-driven: fewer children than total, link to load all from server
|
|
28
|
+
const serverLimited = total !== undefined && total > children.length;
|
|
29
|
+
|
|
30
|
+
// Client-driven: all children passed, show in increments
|
|
31
|
+
const clientLimited = !serverLimited && children.length > visibleCount;
|
|
32
|
+
|
|
33
|
+
const visible = clientLimited ? children.slice(0, visibleCount) : children;
|
|
34
|
+
|
|
35
|
+
const totalCount = total ?? children.length;
|
|
36
|
+
const shownCount = visible.length;
|
|
37
|
+
const remainingCount = totalCount - shownCount;
|
|
38
|
+
|
|
39
|
+
return (
|
|
40
|
+
<div className={className}>
|
|
41
|
+
{visible}
|
|
42
|
+
{serverLimited && showAllHref && (
|
|
43
|
+
<Link
|
|
44
|
+
href={showAllHref}
|
|
45
|
+
className="block w-full py-2 text-center text-sm text-muted-foreground hover:text-foreground transition-colors cursor-pointer"
|
|
46
|
+
>
|
|
47
|
+
Show more ({remainingCount} remaining)
|
|
48
|
+
</Link>
|
|
49
|
+
)}
|
|
50
|
+
{clientLimited && (
|
|
51
|
+
<button
|
|
52
|
+
onClick={() => setVisibleCount((c) => c + limit)}
|
|
53
|
+
className="w-full py-2 text-sm text-muted-foreground hover:text-foreground transition-colors cursor-pointer"
|
|
54
|
+
>
|
|
55
|
+
Show more ({remainingCount} remaining)
|
|
56
|
+
</button>
|
|
57
|
+
)}
|
|
58
|
+
</div>
|
|
59
|
+
);
|
|
60
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import Link from "next/link";
|
|
2
|
+
import { Card, CardContent } from "@/components/ui/card";
|
|
3
|
+
import { TranscriptViewer } from "@/components/TranscriptViewer";
|
|
4
|
+
import type { Transcript } from "@/lib/types";
|
|
5
|
+
|
|
6
|
+
interface TranscriptPageProps {
|
|
7
|
+
experiment: string;
|
|
8
|
+
timestamp: string;
|
|
9
|
+
evalName: string;
|
|
10
|
+
run: string;
|
|
11
|
+
transcript: Transcript;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export function TranscriptPage({
|
|
15
|
+
experiment,
|
|
16
|
+
timestamp,
|
|
17
|
+
evalName,
|
|
18
|
+
run,
|
|
19
|
+
transcript,
|
|
20
|
+
}: TranscriptPageProps) {
|
|
21
|
+
return (
|
|
22
|
+
<div className="space-y-4">
|
|
23
|
+
{/* Breadcrumb */}
|
|
24
|
+
<div className="flex items-center gap-2 text-sm text-muted-foreground">
|
|
25
|
+
<Link href="/experiments" className="cursor-pointer hover:underline underline-offset-4">
|
|
26
|
+
Experiments
|
|
27
|
+
</Link>
|
|
28
|
+
<span>/</span>
|
|
29
|
+
<Link
|
|
30
|
+
href={`/experiments/${encodeURIComponent(experiment)}/${encodeURIComponent(timestamp)}`}
|
|
31
|
+
className="cursor-pointer hover:underline underline-offset-4"
|
|
32
|
+
>
|
|
33
|
+
{experiment}
|
|
34
|
+
</Link>
|
|
35
|
+
<span>/</span>
|
|
36
|
+
<span>{evalName}</span>
|
|
37
|
+
<span>/</span>
|
|
38
|
+
<span>{run}</span>
|
|
39
|
+
</div>
|
|
40
|
+
|
|
41
|
+
<h1 className="text-2xl font-bold tracking-tight">Transcript</h1>
|
|
42
|
+
|
|
43
|
+
<TranscriptViewer transcript={transcript} />
|
|
44
|
+
</div>
|
|
45
|
+
);
|
|
46
|
+
}
|