@vercel/agent-eval-playground 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +50 -0
  2. package/app/compare/page.tsx +40 -0
  3. package/app/evals/[name]/page.tsx +22 -0
  4. package/app/evals/page.tsx +18 -0
  5. package/app/experiments/[name]/[timestamp]/page.tsx +23 -0
  6. package/app/experiments/page.tsx +28 -0
  7. package/app/globals.css +126 -0
  8. package/app/layout.tsx +102 -0
  9. package/app/page.tsx +179 -0
  10. package/app/transcript/[experiment]/[timestamp]/[evalName]/[run]/page.tsx +43 -0
  11. package/bin.mjs +86 -0
  12. package/components/ComparePage.tsx +312 -0
  13. package/components/EvalDetail.tsx +114 -0
  14. package/components/EvalsPage.tsx +80 -0
  15. package/components/ExperimentDetail.tsx +162 -0
  16. package/components/ExperimentList.tsx +103 -0
  17. package/components/O11ySummary.tsx +114 -0
  18. package/components/RunResultCard.tsx +72 -0
  19. package/components/ShowMore.tsx +60 -0
  20. package/components/TranscriptPage.tsx +46 -0
  21. package/components/TranscriptViewer.tsx +201 -0
  22. package/components/ui/alert-dialog.tsx +184 -0
  23. package/components/ui/badge.tsx +45 -0
  24. package/components/ui/button.tsx +60 -0
  25. package/components/ui/card.tsx +94 -0
  26. package/components/ui/collapsible.tsx +34 -0
  27. package/components/ui/combobox.tsx +297 -0
  28. package/components/ui/dropdown-menu.tsx +269 -0
  29. package/components/ui/field.tsx +227 -0
  30. package/components/ui/input-group.tsx +147 -0
  31. package/components/ui/input.tsx +19 -0
  32. package/components/ui/label.tsx +24 -0
  33. package/components/ui/progress.tsx +31 -0
  34. package/components/ui/scroll-area.tsx +58 -0
  35. package/components/ui/select.tsx +191 -0
  36. package/components/ui/separator.tsx +28 -0
  37. package/components/ui/table.tsx +116 -0
  38. package/components/ui/tabs.tsx +91 -0
  39. package/components/ui/textarea.tsx +18 -0
  40. package/components/ui/tooltip.tsx +57 -0
  41. package/components.json +25 -0
  42. package/lib/data.ts +297 -0
  43. package/lib/types.ts +113 -0
  44. package/lib/utils.ts +6 -0
  45. package/next.config.ts +5 -0
  46. package/package.json +51 -0
  47. package/postcss.config.mjs +7 -0
  48. package/public/vercel.svg +1 -0
  49. package/tsconfig.json +42 -0
package/lib/types.ts ADDED
@@ -0,0 +1,113 @@
1
+ /**
2
+ * Playground types — mirrors the core agent-eval types for the UI.
3
+ * Kept separate to avoid importing Node.js code into the browser bundle.
4
+ */
5
+
6
+ /** Canonical tool names across agents */
7
+ export type ToolName =
8
+ | "file_read"
9
+ | "file_write"
10
+ | "file_edit"
11
+ | "shell"
12
+ | "web_fetch"
13
+ | "web_search"
14
+ | "glob"
15
+ | "grep"
16
+ | "list_dir"
17
+ | "agent_task"
18
+ | "unknown";
19
+
20
+ /** An event in the transcript */
21
+ export interface TranscriptEvent {
22
+ timestamp?: string;
23
+ type: "message" | "tool_call" | "tool_result" | "thinking" | "error";
24
+ role?: "user" | "assistant" | "system";
25
+ content?: string;
26
+ tool?: {
27
+ name: ToolName;
28
+ originalName: string;
29
+ args?: Record<string, unknown>;
30
+ result?: unknown;
31
+ durationMs?: number;
32
+ success?: boolean;
33
+ };
34
+ }
35
+
36
+ /** Summary statistics derived from the transcript */
37
+ export interface TranscriptSummary {
38
+ totalTurns: number;
39
+ toolCalls: Record<ToolName, number>;
40
+ totalToolCalls: number;
41
+ webFetches: { url: string; method?: string; status?: number; success?: boolean }[];
42
+ filesRead: string[];
43
+ filesModified: string[];
44
+ shellCommands: { command: string; exitCode?: number; success?: boolean }[];
45
+ errors: string[];
46
+ thinkingBlocks: number;
47
+ }
48
+
49
+ /** A parsed transcript */
50
+ export interface Transcript {
51
+ agent: string;
52
+ model?: string;
53
+ events: TranscriptEvent[];
54
+ summary: TranscriptSummary;
55
+ parseSuccess: boolean;
56
+ parseErrors?: string[];
57
+ }
58
+
59
+ /** Result of a single eval run */
60
+ export interface EvalRunResult {
61
+ status: "passed" | "failed";
62
+ error?: string;
63
+ duration: number;
64
+ transcriptPath?: string;
65
+ transcriptRawPath?: string;
66
+ outputPaths?: {
67
+ eval?: string;
68
+ scripts?: Record<string, string>;
69
+ };
70
+ o11y?: TranscriptSummary;
71
+ }
72
+
73
+ /** Summary of multiple runs for a single eval */
74
+ export interface EvalSummary {
75
+ name: string;
76
+ totalRuns: number;
77
+ passedRuns: number;
78
+ passRate: number;
79
+ meanDuration: number;
80
+ }
81
+
82
+ /** Experiment info returned by the API */
83
+ export interface ExperimentInfo {
84
+ name: string;
85
+ timestamps: string[];
86
+ latestTimestamp: string;
87
+ }
88
+
89
+ /** Full experiment detail for a specific timestamp */
90
+ export interface ExperimentDetail {
91
+ startedAt: string;
92
+ completedAt: string;
93
+ config: {
94
+ agent: string;
95
+ model: string | string[];
96
+ runs: number;
97
+ earlyExit: boolean;
98
+ timeout: number;
99
+ };
100
+ evals: EvalSummary[];
101
+ }
102
+
103
+ /** Run detail with result and optional o11y */
104
+ export interface RunDetail {
105
+ result: EvalRunResult;
106
+ }
107
+
108
+ /** Eval fixture info */
109
+ export interface EvalInfo {
110
+ name: string;
111
+ prompt: string;
112
+ files: string[];
113
+ }
package/lib/utils.ts ADDED
@@ -0,0 +1,6 @@
1
+ import { clsx, type ClassValue } from "clsx"
2
+ import { twMerge } from "tailwind-merge"
3
+
4
+ export function cn(...inputs: ClassValue[]) {
5
+ return twMerge(clsx(inputs))
6
+ }
package/next.config.ts ADDED
@@ -0,0 +1,5 @@
1
+ import type { NextConfig } from "next";
2
+
3
+ const nextConfig: NextConfig = {};
4
+
5
+ export default nextConfig;
package/package.json ADDED
@@ -0,0 +1,51 @@
1
+ {
2
+ "name": "@vercel/agent-eval-playground",
3
+ "version": "0.0.1",
4
+ "description": "Web-based playground for browsing agent-eval experiment results",
5
+ "bin": {
6
+ "agent-eval-playground": "./bin.mjs"
7
+ },
8
+ "scripts": {
9
+ "dev": "next dev",
10
+ "build": "next build",
11
+ "start": "next start",
12
+ "release": "changeset publish"
13
+ },
14
+ "dependencies": {
15
+ "@base-ui/react": "^1.1.0",
16
+ "@remixicon/react": "^4.9.0",
17
+ "class-variance-authority": "^0.7.1",
18
+ "clsx": "^2.1.1",
19
+ "lucide-react": "^0.469.0",
20
+ "next": "^16.0.0",
21
+ "radix-ui": "^1.4.3",
22
+ "react": "^19.2.0",
23
+ "react-dom": "^19.2.0",
24
+ "tailwind-merge": "^2.6.0",
25
+ "tw-animate-css": "^1.4.0"
26
+ },
27
+ "devDependencies": {
28
+ "@tailwindcss/postcss": "^4.0.0",
29
+ "@types/node": "^22.0.0",
30
+ "@types/react": "^19.0.0",
31
+ "@types/react-dom": "^19.0.0",
32
+ "postcss": "^8.4.49",
33
+ "tailwindcss": "^4.0.0",
34
+ "typescript": "^5.6.0"
35
+ },
36
+ "files": [
37
+ "app",
38
+ "components",
39
+ "lib",
40
+ "public",
41
+ "bin.mjs",
42
+ "next.config.ts",
43
+ "tsconfig.json",
44
+ "postcss.config.mjs",
45
+ "components.json"
46
+ ],
47
+ "publishConfig": {
48
+ "access": "public"
49
+ },
50
+ "license": "MIT"
51
+ }
@@ -0,0 +1,7 @@
1
+ const config = {
2
+ plugins: {
3
+ "@tailwindcss/postcss": {},
4
+ },
5
+ };
6
+
7
+ export default config;
@@ -0,0 +1 @@
1
+ <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>
package/tsconfig.json ADDED
@@ -0,0 +1,42 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2017",
4
+ "lib": [
5
+ "dom",
6
+ "dom.iterable",
7
+ "esnext"
8
+ ],
9
+ "allowJs": true,
10
+ "skipLibCheck": true,
11
+ "strict": true,
12
+ "noEmit": true,
13
+ "esModuleInterop": true,
14
+ "module": "esnext",
15
+ "moduleResolution": "bundler",
16
+ "resolveJsonModule": true,
17
+ "isolatedModules": true,
18
+ "jsx": "react-jsx",
19
+ "incremental": true,
20
+ "plugins": [
21
+ {
22
+ "name": "next"
23
+ }
24
+ ],
25
+ "paths": {
26
+ "@/*": [
27
+ "./*"
28
+ ]
29
+ },
30
+ "baseUrl": "."
31
+ },
32
+ "include": [
33
+ "next-env.d.ts",
34
+ "**/*.ts",
35
+ "**/*.tsx",
36
+ ".next/types/**/*.ts",
37
+ ".next/dev/types/**/*.ts"
38
+ ],
39
+ "exclude": [
40
+ "node_modules"
41
+ ]
42
+ }