@vercel/agent-eval-playground 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -0
- package/app/compare/page.tsx +40 -0
- package/app/evals/[name]/page.tsx +22 -0
- package/app/evals/page.tsx +18 -0
- package/app/experiments/[name]/[timestamp]/page.tsx +23 -0
- package/app/experiments/page.tsx +28 -0
- package/app/globals.css +126 -0
- package/app/layout.tsx +102 -0
- package/app/page.tsx +179 -0
- package/app/transcript/[experiment]/[timestamp]/[evalName]/[run]/page.tsx +43 -0
- package/bin.mjs +86 -0
- package/components/ComparePage.tsx +312 -0
- package/components/EvalDetail.tsx +114 -0
- package/components/EvalsPage.tsx +80 -0
- package/components/ExperimentDetail.tsx +162 -0
- package/components/ExperimentList.tsx +103 -0
- package/components/O11ySummary.tsx +114 -0
- package/components/RunResultCard.tsx +72 -0
- package/components/ShowMore.tsx +60 -0
- package/components/TranscriptPage.tsx +46 -0
- package/components/TranscriptViewer.tsx +201 -0
- package/components/ui/alert-dialog.tsx +184 -0
- package/components/ui/badge.tsx +45 -0
- package/components/ui/button.tsx +60 -0
- package/components/ui/card.tsx +94 -0
- package/components/ui/collapsible.tsx +34 -0
- package/components/ui/combobox.tsx +297 -0
- package/components/ui/dropdown-menu.tsx +269 -0
- package/components/ui/field.tsx +227 -0
- package/components/ui/input-group.tsx +147 -0
- package/components/ui/input.tsx +19 -0
- package/components/ui/label.tsx +24 -0
- package/components/ui/progress.tsx +31 -0
- package/components/ui/scroll-area.tsx +58 -0
- package/components/ui/select.tsx +191 -0
- package/components/ui/separator.tsx +28 -0
- package/components/ui/table.tsx +116 -0
- package/components/ui/tabs.tsx +91 -0
- package/components/ui/textarea.tsx +18 -0
- package/components/ui/tooltip.tsx +57 -0
- package/components.json +25 -0
- package/lib/data.ts +297 -0
- package/lib/types.ts +113 -0
- package/lib/utils.ts +6 -0
- package/next.config.ts +5 -0
- package/package.json +51 -0
- package/postcss.config.mjs +7 -0
- package/public/vercel.svg +1 -0
- package/tsconfig.json +42 -0
package/lib/types.ts
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Playground types — mirrors the core agent-eval types for the UI.
|
|
3
|
+
* Kept separate to avoid importing Node.js code into the browser bundle.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/** Canonical tool names across agents */
|
|
7
|
+
export type ToolName =
|
|
8
|
+
| "file_read"
|
|
9
|
+
| "file_write"
|
|
10
|
+
| "file_edit"
|
|
11
|
+
| "shell"
|
|
12
|
+
| "web_fetch"
|
|
13
|
+
| "web_search"
|
|
14
|
+
| "glob"
|
|
15
|
+
| "grep"
|
|
16
|
+
| "list_dir"
|
|
17
|
+
| "agent_task"
|
|
18
|
+
| "unknown";
|
|
19
|
+
|
|
20
|
+
/** An event in the transcript */
|
|
21
|
+
export interface TranscriptEvent {
|
|
22
|
+
timestamp?: string;
|
|
23
|
+
type: "message" | "tool_call" | "tool_result" | "thinking" | "error";
|
|
24
|
+
role?: "user" | "assistant" | "system";
|
|
25
|
+
content?: string;
|
|
26
|
+
tool?: {
|
|
27
|
+
name: ToolName;
|
|
28
|
+
originalName: string;
|
|
29
|
+
args?: Record<string, unknown>;
|
|
30
|
+
result?: unknown;
|
|
31
|
+
durationMs?: number;
|
|
32
|
+
success?: boolean;
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/** Summary statistics derived from the transcript */
|
|
37
|
+
export interface TranscriptSummary {
|
|
38
|
+
totalTurns: number;
|
|
39
|
+
toolCalls: Record<ToolName, number>;
|
|
40
|
+
totalToolCalls: number;
|
|
41
|
+
webFetches: { url: string; method?: string; status?: number; success?: boolean }[];
|
|
42
|
+
filesRead: string[];
|
|
43
|
+
filesModified: string[];
|
|
44
|
+
shellCommands: { command: string; exitCode?: number; success?: boolean }[];
|
|
45
|
+
errors: string[];
|
|
46
|
+
thinkingBlocks: number;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/** A parsed transcript */
|
|
50
|
+
export interface Transcript {
|
|
51
|
+
agent: string;
|
|
52
|
+
model?: string;
|
|
53
|
+
events: TranscriptEvent[];
|
|
54
|
+
summary: TranscriptSummary;
|
|
55
|
+
parseSuccess: boolean;
|
|
56
|
+
parseErrors?: string[];
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Result of a single eval run */
|
|
60
|
+
export interface EvalRunResult {
|
|
61
|
+
status: "passed" | "failed";
|
|
62
|
+
error?: string;
|
|
63
|
+
duration: number;
|
|
64
|
+
transcriptPath?: string;
|
|
65
|
+
transcriptRawPath?: string;
|
|
66
|
+
outputPaths?: {
|
|
67
|
+
eval?: string;
|
|
68
|
+
scripts?: Record<string, string>;
|
|
69
|
+
};
|
|
70
|
+
o11y?: TranscriptSummary;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/** Summary of multiple runs for a single eval */
|
|
74
|
+
export interface EvalSummary {
|
|
75
|
+
name: string;
|
|
76
|
+
totalRuns: number;
|
|
77
|
+
passedRuns: number;
|
|
78
|
+
passRate: number;
|
|
79
|
+
meanDuration: number;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/** Experiment info returned by the API */
|
|
83
|
+
export interface ExperimentInfo {
|
|
84
|
+
name: string;
|
|
85
|
+
timestamps: string[];
|
|
86
|
+
latestTimestamp: string;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** Full experiment detail for a specific timestamp */
|
|
90
|
+
export interface ExperimentDetail {
|
|
91
|
+
startedAt: string;
|
|
92
|
+
completedAt: string;
|
|
93
|
+
config: {
|
|
94
|
+
agent: string;
|
|
95
|
+
model: string | string[];
|
|
96
|
+
runs: number;
|
|
97
|
+
earlyExit: boolean;
|
|
98
|
+
timeout: number;
|
|
99
|
+
};
|
|
100
|
+
evals: EvalSummary[];
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/** Run detail with result and optional o11y */
|
|
104
|
+
export interface RunDetail {
|
|
105
|
+
result: EvalRunResult;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/** Eval fixture info */
|
|
109
|
+
export interface EvalInfo {
|
|
110
|
+
name: string;
|
|
111
|
+
prompt: string;
|
|
112
|
+
files: string[];
|
|
113
|
+
}
|
package/lib/utils.ts
ADDED
package/next.config.ts
ADDED
package/package.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@vercel/agent-eval-playground",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"description": "Web-based playground for browsing agent-eval experiment results",
|
|
5
|
+
"bin": {
|
|
6
|
+
"agent-eval-playground": "./bin.mjs"
|
|
7
|
+
},
|
|
8
|
+
"scripts": {
|
|
9
|
+
"dev": "next dev",
|
|
10
|
+
"build": "next build",
|
|
11
|
+
"start": "next start",
|
|
12
|
+
"release": "changeset publish"
|
|
13
|
+
},
|
|
14
|
+
"dependencies": {
|
|
15
|
+
"@base-ui/react": "^1.1.0",
|
|
16
|
+
"@remixicon/react": "^4.9.0",
|
|
17
|
+
"class-variance-authority": "^0.7.1",
|
|
18
|
+
"clsx": "^2.1.1",
|
|
19
|
+
"lucide-react": "^0.469.0",
|
|
20
|
+
"next": "^16.0.0",
|
|
21
|
+
"radix-ui": "^1.4.3",
|
|
22
|
+
"react": "^19.2.0",
|
|
23
|
+
"react-dom": "^19.2.0",
|
|
24
|
+
"tailwind-merge": "^2.6.0",
|
|
25
|
+
"tw-animate-css": "^1.4.0"
|
|
26
|
+
},
|
|
27
|
+
"devDependencies": {
|
|
28
|
+
"@tailwindcss/postcss": "^4.0.0",
|
|
29
|
+
"@types/node": "^22.0.0",
|
|
30
|
+
"@types/react": "^19.0.0",
|
|
31
|
+
"@types/react-dom": "^19.0.0",
|
|
32
|
+
"postcss": "^8.4.49",
|
|
33
|
+
"tailwindcss": "^4.0.0",
|
|
34
|
+
"typescript": "^5.6.0"
|
|
35
|
+
},
|
|
36
|
+
"files": [
|
|
37
|
+
"app",
|
|
38
|
+
"components",
|
|
39
|
+
"lib",
|
|
40
|
+
"public",
|
|
41
|
+
"bin.mjs",
|
|
42
|
+
"next.config.ts",
|
|
43
|
+
"tsconfig.json",
|
|
44
|
+
"postcss.config.mjs",
|
|
45
|
+
"components.json"
|
|
46
|
+
],
|
|
47
|
+
"publishConfig": {
|
|
48
|
+
"access": "public"
|
|
49
|
+
},
|
|
50
|
+
"license": "MIT"
|
|
51
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2017",
|
|
4
|
+
"lib": [
|
|
5
|
+
"dom",
|
|
6
|
+
"dom.iterable",
|
|
7
|
+
"esnext"
|
|
8
|
+
],
|
|
9
|
+
"allowJs": true,
|
|
10
|
+
"skipLibCheck": true,
|
|
11
|
+
"strict": true,
|
|
12
|
+
"noEmit": true,
|
|
13
|
+
"esModuleInterop": true,
|
|
14
|
+
"module": "esnext",
|
|
15
|
+
"moduleResolution": "bundler",
|
|
16
|
+
"resolveJsonModule": true,
|
|
17
|
+
"isolatedModules": true,
|
|
18
|
+
"jsx": "react-jsx",
|
|
19
|
+
"incremental": true,
|
|
20
|
+
"plugins": [
|
|
21
|
+
{
|
|
22
|
+
"name": "next"
|
|
23
|
+
}
|
|
24
|
+
],
|
|
25
|
+
"paths": {
|
|
26
|
+
"@/*": [
|
|
27
|
+
"./*"
|
|
28
|
+
]
|
|
29
|
+
},
|
|
30
|
+
"baseUrl": "."
|
|
31
|
+
},
|
|
32
|
+
"include": [
|
|
33
|
+
"next-env.d.ts",
|
|
34
|
+
"**/*.ts",
|
|
35
|
+
"**/*.tsx",
|
|
36
|
+
".next/types/**/*.ts",
|
|
37
|
+
".next/dev/types/**/*.ts"
|
|
38
|
+
],
|
|
39
|
+
"exclude": [
|
|
40
|
+
"node_modules"
|
|
41
|
+
]
|
|
42
|
+
}
|