@ash-ai/dashboard 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/app/agents/detail/page.tsx +182 -0
- package/app/agents/eval-compare/page.tsx +445 -0
- package/app/agents/eval-run/page.tsx +331 -0
- package/app/agents/eval-runs/page.tsx +376 -0
- package/app/agents/evals/page.tsx +448 -0
- package/app/agents/knowledge/page.tsx +202 -0
- package/app/agents/page.tsx +5 -3
- package/app/agents/versions/page.tsx +280 -0
- package/app/sessions/page.tsx +21 -15
- package/lib/hooks.ts +120 -0
- package/out/404/index.html +1 -1
- package/out/404.html +1 -1
- package/out/_next/static/chunks/839-186b95b4d095e127.js +1 -0
- package/out/_next/static/chunks/90-8e47c31ab931867f.js +1 -0
- package/out/_next/static/chunks/{929-6faf1adeb65ee383.js → 929-b631fe082fe4e852.js} +1 -1
- package/out/_next/static/chunks/app/agents/detail/page-7427483b9c74ad63.js +1 -0
- package/out/_next/static/chunks/app/agents/eval-compare/page-bc4e8051ba07e56f.js +1 -0
- package/out/_next/static/chunks/app/agents/eval-run/page-a38dac74d1b3787d.js +1 -0
- package/out/_next/static/chunks/app/agents/eval-runs/page-af2fb33c0fce7934.js +1 -0
- package/out/_next/static/chunks/app/agents/evals/page-6bdc4b839a7a8eda.js +1 -0
- package/out/_next/static/chunks/app/agents/knowledge/page-0e02b14bfa2a6d04.js +1 -0
- package/out/_next/static/chunks/app/agents/page-99f179eb7c41ebd4.js +1 -0
- package/out/_next/static/chunks/app/agents/versions/page-c482d9bad8f35df6.js +1 -0
- package/out/_next/static/chunks/app/analytics/page-ca5d8c60e62118ed.js +1 -0
- package/out/_next/static/chunks/app/layout-b06d1caafc026d0c.js +1 -0
- package/out/_next/static/chunks/app/logs/page-1a7df17a605f36d3.js +1 -0
- package/out/_next/static/chunks/app/page-9e02cb0e8897ab5d.js +1 -0
- package/out/_next/static/chunks/app/playground/{page-10d3461f118bfb21.js → page-cb17c2ffaeb31b4e.js} +1 -1
- package/out/_next/static/chunks/app/queue/page-6013b93817822c75.js +1 -0
- package/out/_next/static/chunks/app/sessions/page-add67d96ab66b690.js +1 -0
- package/out/_next/static/chunks/app/settings/credentials/page-ffe97ffb2f60229d.js +1 -0
- package/out/_next/static/css/ab505eeeff3f7df5.css +1 -0
- package/out/_next/static/sXYgh3eUKXRKt1T_1T3tk/_buildManifest.js +1 -0
- package/out/agents/detail/index.html +1 -0
- package/out/agents/detail/index.txt +22 -0
- package/out/agents/eval-compare/index.html +1 -0
- package/out/agents/eval-compare/index.txt +22 -0
- package/out/agents/eval-run/index.html +1 -0
- package/out/agents/eval-run/index.txt +22 -0
- package/out/agents/eval-runs/index.html +1 -0
- package/out/agents/eval-runs/index.txt +22 -0
- package/out/agents/evals/index.html +1 -0
- package/out/agents/evals/index.txt +22 -0
- package/out/agents/index.html +1 -1
- package/out/agents/index.txt +6 -6
- package/out/agents/knowledge/index.html +1 -0
- package/out/agents/knowledge/index.txt +22 -0
- package/out/agents/versions/index.html +1 -0
- package/out/agents/versions/index.txt +22 -0
- package/out/analytics/index.html +1 -1
- package/out/analytics/index.txt +6 -6
- package/out/index.html +1 -1
- package/out/index.txt +6 -6
- package/out/logs/index.html +1 -1
- package/out/logs/index.txt +6 -6
- package/out/playground/index.html +1 -1
- package/out/playground/index.txt +6 -6
- package/out/queue/index.html +1 -1
- package/out/queue/index.txt +6 -6
- package/out/sessions/index.html +1 -1
- package/out/sessions/index.txt +6 -6
- package/out/settings/api-keys/index.html +1 -1
- package/out/settings/api-keys/index.txt +6 -6
- package/out/settings/credentials/index.html +1 -1
- package/out/settings/credentials/index.txt +6 -6
- package/package.json +4 -4
- package/out/_next/static/ALf6-9rl7RWKPirFYjchn/_buildManifest.js +0 -1
- package/out/_next/static/chunks/322-bab4df5c5188e993.js +0 -1
- package/out/_next/static/chunks/432-11ec8af7ccfbd019.js +0 -1
- package/out/_next/static/chunks/app/agents/page-5f872b5fa12d7854.js +0 -1
- package/out/_next/static/chunks/app/analytics/page-bb296f848e25a94f.js +0 -1
- package/out/_next/static/chunks/app/layout-f5d1d76b525135c7.js +0 -1
- package/out/_next/static/chunks/app/logs/page-5165b556d13654ae.js +0 -1
- package/out/_next/static/chunks/app/page-d1e6d7bff1216f08.js +0 -1
- package/out/_next/static/chunks/app/queue/page-50142f2cfb3664e7.js +0 -1
- package/out/_next/static/chunks/app/sessions/page-2410b352f297ae91.js +0 -1
- package/out/_next/static/chunks/app/settings/credentials/page-deb5556bfe57b8b9.js +0 -1
- package/out/_next/static/css/15bfa5d891bcf58c.css +0 -1
- /package/out/_next/static/{ALf6-9rl7RWKPirFYjchn → sXYgh3eUKXRKt1T_1T3tk}/_ssgManifest.js +0 -0
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
'use client'
|
|
2
|
+
|
|
3
|
+
import { Suspense, useState, useEffect } from 'react'
|
|
4
|
+
import { useSearchParams } from 'next/navigation'
|
|
5
|
+
import Link from 'next/link'
|
|
6
|
+
import { getClient } from '@/lib/client'
|
|
7
|
+
import { Card, CardContent } from '@/components/ui/card'
|
|
8
|
+
import { Badge } from '@/components/ui/badge'
|
|
9
|
+
import { ShimmerBlock } from '@/components/ui/shimmer'
|
|
10
|
+
import { formatRelativeTime } from '@/lib/utils'
|
|
11
|
+
import {
|
|
12
|
+
ArrowLeft,
|
|
13
|
+
Bot,
|
|
14
|
+
GitBranch,
|
|
15
|
+
BookOpen,
|
|
16
|
+
FlaskConical,
|
|
17
|
+
FolderOpen,
|
|
18
|
+
Clock,
|
|
19
|
+
} from 'lucide-react'
|
|
20
|
+
import type { Agent } from '@ash-ai/shared'
|
|
21
|
+
|
|
22
|
+
function AgentDetailContent() {
|
|
23
|
+
const searchParams = useSearchParams()
|
|
24
|
+
const name = searchParams.get('name')
|
|
25
|
+
const [agent, setAgent] = useState<Agent | null>(null)
|
|
26
|
+
const [loading, setLoading] = useState(true)
|
|
27
|
+
const [error, setError] = useState<string | null>(null)
|
|
28
|
+
|
|
29
|
+
useEffect(() => {
|
|
30
|
+
if (!name) {
|
|
31
|
+
setLoading(false)
|
|
32
|
+
return
|
|
33
|
+
}
|
|
34
|
+
async function fetchAgent() {
|
|
35
|
+
try {
|
|
36
|
+
const agents = await getClient().listAgents()
|
|
37
|
+
const found = agents.find((a) => a.name === name)
|
|
38
|
+
if (found) {
|
|
39
|
+
setAgent(found)
|
|
40
|
+
} else {
|
|
41
|
+
setError(`Agent "${name}" not found`)
|
|
42
|
+
}
|
|
43
|
+
} catch (e) {
|
|
44
|
+
setError(e instanceof Error ? e.message : 'Failed to fetch agent')
|
|
45
|
+
} finally {
|
|
46
|
+
setLoading(false)
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
fetchAgent()
|
|
50
|
+
}, [name])
|
|
51
|
+
|
|
52
|
+
if (!name) {
|
|
53
|
+
return (
|
|
54
|
+
<div className="text-center py-16">
|
|
55
|
+
<p className="text-white/50">No agent name specified.</p>
|
|
56
|
+
<Link href="/agents" className="text-indigo-400 hover:text-indigo-300 text-sm mt-2 inline-block">
|
|
57
|
+
Back to agents
|
|
58
|
+
</Link>
|
|
59
|
+
</div>
|
|
60
|
+
)
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (loading) {
|
|
64
|
+
return (
|
|
65
|
+
<div className="space-y-6">
|
|
66
|
+
<ShimmerBlock height={80} />
|
|
67
|
+
<div className="grid grid-cols-1 gap-4 sm:grid-cols-3">
|
|
68
|
+
{[1, 2, 3].map((i) => (
|
|
69
|
+
<ShimmerBlock key={i} height={120} />
|
|
70
|
+
))}
|
|
71
|
+
</div>
|
|
72
|
+
</div>
|
|
73
|
+
)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (error || !agent) {
|
|
77
|
+
return (
|
|
78
|
+
<div className="text-center py-16">
|
|
79
|
+
<p className="text-red-400">{error || 'Agent not found'}</p>
|
|
80
|
+
<Link href="/agents" className="text-indigo-400 hover:text-indigo-300 text-sm mt-2 inline-block">
|
|
81
|
+
Back to agents
|
|
82
|
+
</Link>
|
|
83
|
+
</div>
|
|
84
|
+
)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const tabs = [
|
|
88
|
+
{
|
|
89
|
+
label: 'Versions',
|
|
90
|
+
href: `/agents/versions?name=${encodeURIComponent(agent.name)}`,
|
|
91
|
+
icon: GitBranch,
|
|
92
|
+
description: 'Manage agent versions and system prompts',
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
label: 'Knowledge',
|
|
96
|
+
href: `/agents/knowledge?name=${encodeURIComponent(agent.name)}`,
|
|
97
|
+
icon: BookOpen,
|
|
98
|
+
description: 'Upload and manage knowledge base files',
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
label: 'Evals',
|
|
102
|
+
href: `/agents/evals?name=${encodeURIComponent(agent.name)}`,
|
|
103
|
+
icon: FlaskConical,
|
|
104
|
+
description: 'Define and run evaluation test cases',
|
|
105
|
+
},
|
|
106
|
+
]
|
|
107
|
+
|
|
108
|
+
return (
|
|
109
|
+
<div className="space-y-6">
|
|
110
|
+
{/* Back link */}
|
|
111
|
+
<Link
|
|
112
|
+
href="/agents"
|
|
113
|
+
className="inline-flex items-center gap-1.5 text-sm text-white/50 hover:text-white transition-colors"
|
|
114
|
+
>
|
|
115
|
+
<ArrowLeft className="h-4 w-4" />
|
|
116
|
+
Back to agents
|
|
117
|
+
</Link>
|
|
118
|
+
|
|
119
|
+
{/* Agent header */}
|
|
120
|
+
<Card>
|
|
121
|
+
<CardContent>
|
|
122
|
+
<div className="flex items-start gap-4">
|
|
123
|
+
<div className="flex h-12 w-12 items-center justify-center rounded-xl bg-indigo-500/10">
|
|
124
|
+
<Bot className="h-6 w-6 text-indigo-400" />
|
|
125
|
+
</div>
|
|
126
|
+
<div className="min-w-0 flex-1">
|
|
127
|
+
<h1 className="text-xl font-bold text-white">{agent.name}</h1>
|
|
128
|
+
{agent.description && (
|
|
129
|
+
<p className="mt-1 text-sm text-white/50">{agent.description}</p>
|
|
130
|
+
)}
|
|
131
|
+
<div className="flex flex-wrap items-center gap-3 mt-3">
|
|
132
|
+
{agent.model && <Badge variant="info">{agent.model}</Badge>}
|
|
133
|
+
{agent.status && (
|
|
134
|
+
<Badge variant={agent.status === 'active' ? 'success' : 'default'}>
|
|
135
|
+
{agent.status}
|
|
136
|
+
</Badge>
|
|
137
|
+
)}
|
|
138
|
+
{agent.path && (
|
|
139
|
+
<span className="inline-flex items-center gap-1 text-xs text-white/30">
|
|
140
|
+
<FolderOpen className="h-3 w-3" />
|
|
141
|
+
{agent.path}
|
|
142
|
+
</span>
|
|
143
|
+
)}
|
|
144
|
+
{agent.createdAt && (
|
|
145
|
+
<span className="inline-flex items-center gap-1 text-xs text-white/30">
|
|
146
|
+
<Clock className="h-3 w-3" />
|
|
147
|
+
Created {formatRelativeTime(agent.createdAt)}
|
|
148
|
+
</span>
|
|
149
|
+
)}
|
|
150
|
+
</div>
|
|
151
|
+
</div>
|
|
152
|
+
</div>
|
|
153
|
+
</CardContent>
|
|
154
|
+
</Card>
|
|
155
|
+
|
|
156
|
+
{/* Tab cards */}
|
|
157
|
+
<div className="grid grid-cols-1 gap-4 sm:grid-cols-3">
|
|
158
|
+
{tabs.map((tab) => (
|
|
159
|
+
<Link key={tab.label} href={tab.href}>
|
|
160
|
+
<Card className="hover:border-white/20 hover:bg-white/[0.02] transition-all cursor-pointer h-full">
|
|
161
|
+
<CardContent>
|
|
162
|
+
<div className="flex items-center gap-3 mb-2">
|
|
163
|
+
<tab.icon className="h-5 w-5 text-indigo-400" />
|
|
164
|
+
<h3 className="text-sm font-semibold text-white">{tab.label}</h3>
|
|
165
|
+
</div>
|
|
166
|
+
<p className="text-xs text-white/40">{tab.description}</p>
|
|
167
|
+
</CardContent>
|
|
168
|
+
</Card>
|
|
169
|
+
</Link>
|
|
170
|
+
))}
|
|
171
|
+
</div>
|
|
172
|
+
</div>
|
|
173
|
+
)
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export default function AgentDetailPage() {
|
|
177
|
+
return (
|
|
178
|
+
<Suspense fallback={<ShimmerBlock height={200} />}>
|
|
179
|
+
<AgentDetailContent />
|
|
180
|
+
</Suspense>
|
|
181
|
+
)
|
|
182
|
+
}
|
|
@@ -0,0 +1,445 @@
|
|
|
1
|
+
'use client'
|
|
2
|
+
|
|
3
|
+
import { Suspense, useState, useEffect } from 'react'
|
|
4
|
+
import { useSearchParams } from 'next/navigation'
|
|
5
|
+
import Link from 'next/link'
|
|
6
|
+
import { getClient } from '@/lib/client'
|
|
7
|
+
import { Card, CardContent } from '@/components/ui/card'
|
|
8
|
+
import { Badge } from '@/components/ui/badge'
|
|
9
|
+
import { ShimmerBlock } from '@/components/ui/shimmer'
|
|
10
|
+
import { formatRelativeTime, truncateId } from '@/lib/utils'
|
|
11
|
+
import {
|
|
12
|
+
ArrowLeft,
|
|
13
|
+
ArrowUpRight,
|
|
14
|
+
ArrowDownRight,
|
|
15
|
+
Minus,
|
|
16
|
+
ChevronDown,
|
|
17
|
+
ChevronRight,
|
|
18
|
+
} from 'lucide-react'
|
|
19
|
+
import type { EvalRunComparison } from '@ash-ai/shared'
|
|
20
|
+
|
|
21
|
+
function scoreColor(score: number | null): string {
|
|
22
|
+
if (score === null) return 'text-white/30'
|
|
23
|
+
if (score >= 0.8) return 'text-green-400'
|
|
24
|
+
if (score >= 0.5) return 'text-yellow-400'
|
|
25
|
+
return 'text-red-400'
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function DiffIndicator({ a, b }: { a: number | null; b: number | null }) {
|
|
29
|
+
if (a === null || b === null) return <Minus className="h-3 w-3 text-white/20" />
|
|
30
|
+
const diff = b - a
|
|
31
|
+
if (Math.abs(diff) < 0.01) return <Minus className="h-3 w-3 text-white/30" />
|
|
32
|
+
if (diff > 0) return <ArrowUpRight className="h-3 w-3 text-green-400" />
|
|
33
|
+
return <ArrowDownRight className="h-3 w-3 text-red-400" />
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function diffValue(a: number | null, b: number | null): string {
|
|
37
|
+
if (a === null || b === null) return '--'
|
|
38
|
+
const diff = b - a
|
|
39
|
+
const sign = diff >= 0 ? '+' : ''
|
|
40
|
+
return `${sign}${diff.toFixed(2)}`
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function diffColor(a: number | null, b: number | null): string {
|
|
44
|
+
if (a === null || b === null) return 'text-white/30'
|
|
45
|
+
const diff = b - a
|
|
46
|
+
if (Math.abs(diff) < 0.01) return 'text-white/30'
|
|
47
|
+
return diff > 0 ? 'text-green-400' : 'text-red-400'
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function EvalCompareContent() {
|
|
51
|
+
const searchParams = useSearchParams()
|
|
52
|
+
const name = searchParams.get('name')
|
|
53
|
+
const runA = searchParams.get('runA')
|
|
54
|
+
const runB = searchParams.get('runB')
|
|
55
|
+
|
|
56
|
+
const [comparison, setComparison] = useState<EvalRunComparison | null>(null)
|
|
57
|
+
const [loading, setLoading] = useState(true)
|
|
58
|
+
const [error, setError] = useState<string | null>(null)
|
|
59
|
+
const [expandedCase, setExpandedCase] = useState<string | null>(null)
|
|
60
|
+
|
|
61
|
+
useEffect(() => {
|
|
62
|
+
if (!name || !runA || !runB) {
|
|
63
|
+
setLoading(false)
|
|
64
|
+
return
|
|
65
|
+
}
|
|
66
|
+
async function fetchComparison() {
|
|
67
|
+
try {
|
|
68
|
+
const data = await getClient().compareEvalRuns(name!, runA!, runB!)
|
|
69
|
+
setComparison(data)
|
|
70
|
+
} catch (e) {
|
|
71
|
+
setError(e instanceof Error ? e.message : 'Failed to load comparison')
|
|
72
|
+
} finally {
|
|
73
|
+
setLoading(false)
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
fetchComparison()
|
|
77
|
+
}, [name, runA, runB])
|
|
78
|
+
|
|
79
|
+
if (!name || !runA || !runB) {
|
|
80
|
+
return (
|
|
81
|
+
<div className="text-center py-16">
|
|
82
|
+
<p className="text-white/50">Missing comparison parameters.</p>
|
|
83
|
+
<Link href="/agents" className="text-indigo-400 hover:text-indigo-300 text-sm mt-2 inline-block">
|
|
84
|
+
Back to agents
|
|
85
|
+
</Link>
|
|
86
|
+
</div>
|
|
87
|
+
)
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (loading) {
|
|
91
|
+
return (
|
|
92
|
+
<div className="space-y-6">
|
|
93
|
+
<ShimmerBlock height={120} />
|
|
94
|
+
<ShimmerBlock height={200} />
|
|
95
|
+
</div>
|
|
96
|
+
)
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (error || !comparison) {
|
|
100
|
+
return (
|
|
101
|
+
<div className="text-center py-16">
|
|
102
|
+
<p className="text-red-400">{error || 'Failed to load comparison'}</p>
|
|
103
|
+
<Link
|
|
104
|
+
href={`/agents/eval-runs?name=${encodeURIComponent(name)}`}
|
|
105
|
+
className="text-indigo-400 hover:text-indigo-300 text-sm mt-2 inline-block"
|
|
106
|
+
>
|
|
107
|
+
Back to eval runs
|
|
108
|
+
</Link>
|
|
109
|
+
</div>
|
|
110
|
+
)
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const { runA: metaA, runB: metaB, results } = comparison
|
|
114
|
+
|
|
115
|
+
return (
|
|
116
|
+
<div className="space-y-6">
|
|
117
|
+
{/* Back link */}
|
|
118
|
+
<Link
|
|
119
|
+
href={`/agents/eval-runs?name=${encodeURIComponent(name)}`}
|
|
120
|
+
className="inline-flex items-center gap-1.5 text-sm text-white/50 hover:text-white transition-colors"
|
|
121
|
+
>
|
|
122
|
+
<ArrowLeft className="h-4 w-4" />
|
|
123
|
+
Back to eval runs
|
|
124
|
+
</Link>
|
|
125
|
+
|
|
126
|
+
<h1 className="text-2xl font-bold text-white">Run Comparison</h1>
|
|
127
|
+
|
|
128
|
+
{/* Summary Comparison */}
|
|
129
|
+
<div className="grid grid-cols-1 sm:grid-cols-3 gap-4">
|
|
130
|
+
{/* Run A */}
|
|
131
|
+
<Card>
|
|
132
|
+
<CardContent>
|
|
133
|
+
<div className="text-xs text-white/40 mb-2">Run A</div>
|
|
134
|
+
<div className="flex items-center gap-2 mb-2">
|
|
135
|
+
<span className="text-sm font-semibold text-white">{truncateId(metaA.id)}</span>
|
|
136
|
+
{metaA.versionNumber !== null && metaA.versionNumber !== undefined && (
|
|
137
|
+
<Badge variant="default">v{metaA.versionNumber}</Badge>
|
|
138
|
+
)}
|
|
139
|
+
</div>
|
|
140
|
+
<div className="text-xs text-white/30">{formatRelativeTime(metaA.createdAt)}</div>
|
|
141
|
+
{metaA.summary && (
|
|
142
|
+
<div className="mt-3 pt-3 border-t border-white/5 space-y-1">
|
|
143
|
+
<div className="flex justify-between">
|
|
144
|
+
<span className="text-xs text-white/40">Pass Rate</span>
|
|
145
|
+
<span className={`text-xs font-semibold ${scoreColor(metaA.summary.passRate)}`}>
|
|
146
|
+
{(metaA.summary.passRate * 100).toFixed(0)}%
|
|
147
|
+
</span>
|
|
148
|
+
</div>
|
|
149
|
+
<div className="flex justify-between">
|
|
150
|
+
<span className="text-xs text-white/40">Topic</span>
|
|
151
|
+
<span className={`text-xs font-semibold ${scoreColor(metaA.summary.avgTopicScore)}`}>
|
|
152
|
+
{metaA.summary.avgTopicScore.toFixed(2)}
|
|
153
|
+
</span>
|
|
154
|
+
</div>
|
|
155
|
+
<div className="flex justify-between">
|
|
156
|
+
<span className="text-xs text-white/40">Safety</span>
|
|
157
|
+
<span className={`text-xs font-semibold ${scoreColor(metaA.summary.avgSafetyScore)}`}>
|
|
158
|
+
{metaA.summary.avgSafetyScore.toFixed(2)}
|
|
159
|
+
</span>
|
|
160
|
+
</div>
|
|
161
|
+
<div className="flex justify-between">
|
|
162
|
+
<span className="text-xs text-white/40">Latency</span>
|
|
163
|
+
<span className="text-xs font-semibold text-white">
|
|
164
|
+
{metaA.summary.avgLatencyMs.toFixed(0)}ms
|
|
165
|
+
</span>
|
|
166
|
+
</div>
|
|
167
|
+
</div>
|
|
168
|
+
)}
|
|
169
|
+
</CardContent>
|
|
170
|
+
</Card>
|
|
171
|
+
|
|
172
|
+
{/* Diff */}
|
|
173
|
+
<Card className="bg-white/[0.02]">
|
|
174
|
+
<CardContent>
|
|
175
|
+
<div className="text-xs text-white/40 mb-2">Difference (B - A)</div>
|
|
176
|
+
{metaA.summary && metaB.summary ? (
|
|
177
|
+
<div className="space-y-3 mt-3">
|
|
178
|
+
<DiffRow
|
|
179
|
+
label="Pass Rate"
|
|
180
|
+
a={metaA.summary.passRate}
|
|
181
|
+
b={metaB.summary.passRate}
|
|
182
|
+
format={(v) => `${(v * 100).toFixed(0)}%`}
|
|
183
|
+
formatDiff={(d) => `${(d * 100).toFixed(1)}pp`}
|
|
184
|
+
/>
|
|
185
|
+
<DiffRow
|
|
186
|
+
label="Topic Score"
|
|
187
|
+
a={metaA.summary.avgTopicScore}
|
|
188
|
+
b={metaB.summary.avgTopicScore}
|
|
189
|
+
/>
|
|
190
|
+
<DiffRow
|
|
191
|
+
label="Safety Score"
|
|
192
|
+
a={metaA.summary.avgSafetyScore}
|
|
193
|
+
b={metaB.summary.avgSafetyScore}
|
|
194
|
+
/>
|
|
195
|
+
<DiffRow
|
|
196
|
+
label="Latency"
|
|
197
|
+
a={metaA.summary.avgLatencyMs}
|
|
198
|
+
b={metaB.summary.avgLatencyMs}
|
|
199
|
+
format={(v) => `${v.toFixed(0)}ms`}
|
|
200
|
+
formatDiff={(d) => `${d > 0 ? '+' : ''}${d.toFixed(0)}ms`}
|
|
201
|
+
invertColor
|
|
202
|
+
/>
|
|
203
|
+
</div>
|
|
204
|
+
) : (
|
|
205
|
+
<p className="text-xs text-white/30 mt-4">
|
|
206
|
+
Summary data not available for both runs.
|
|
207
|
+
</p>
|
|
208
|
+
)}
|
|
209
|
+
</CardContent>
|
|
210
|
+
</Card>
|
|
211
|
+
|
|
212
|
+
{/* Run B */}
|
|
213
|
+
<Card>
|
|
214
|
+
<CardContent>
|
|
215
|
+
<div className="text-xs text-white/40 mb-2">Run B</div>
|
|
216
|
+
<div className="flex items-center gap-2 mb-2">
|
|
217
|
+
<span className="text-sm font-semibold text-white">{truncateId(metaB.id)}</span>
|
|
218
|
+
{metaB.versionNumber !== null && metaB.versionNumber !== undefined && (
|
|
219
|
+
<Badge variant="default">v{metaB.versionNumber}</Badge>
|
|
220
|
+
)}
|
|
221
|
+
</div>
|
|
222
|
+
<div className="text-xs text-white/30">{formatRelativeTime(metaB.createdAt)}</div>
|
|
223
|
+
{metaB.summary && (
|
|
224
|
+
<div className="mt-3 pt-3 border-t border-white/5 space-y-1">
|
|
225
|
+
<div className="flex justify-between">
|
|
226
|
+
<span className="text-xs text-white/40">Pass Rate</span>
|
|
227
|
+
<span className={`text-xs font-semibold ${scoreColor(metaB.summary.passRate)}`}>
|
|
228
|
+
{(metaB.summary.passRate * 100).toFixed(0)}%
|
|
229
|
+
</span>
|
|
230
|
+
</div>
|
|
231
|
+
<div className="flex justify-between">
|
|
232
|
+
<span className="text-xs text-white/40">Topic</span>
|
|
233
|
+
<span className={`text-xs font-semibold ${scoreColor(metaB.summary.avgTopicScore)}`}>
|
|
234
|
+
{metaB.summary.avgTopicScore.toFixed(2)}
|
|
235
|
+
</span>
|
|
236
|
+
</div>
|
|
237
|
+
<div className="flex justify-between">
|
|
238
|
+
<span className="text-xs text-white/40">Safety</span>
|
|
239
|
+
<span className={`text-xs font-semibold ${scoreColor(metaB.summary.avgSafetyScore)}`}>
|
|
240
|
+
{metaB.summary.avgSafetyScore.toFixed(2)}
|
|
241
|
+
</span>
|
|
242
|
+
</div>
|
|
243
|
+
<div className="flex justify-between">
|
|
244
|
+
<span className="text-xs text-white/40">Latency</span>
|
|
245
|
+
<span className="text-xs font-semibold text-white">
|
|
246
|
+
{metaB.summary.avgLatencyMs.toFixed(0)}ms
|
|
247
|
+
</span>
|
|
248
|
+
</div>
|
|
249
|
+
</div>
|
|
250
|
+
)}
|
|
251
|
+
</CardContent>
|
|
252
|
+
</Card>
|
|
253
|
+
</div>
|
|
254
|
+
|
|
255
|
+
{/* Per-case comparison */}
|
|
256
|
+
<div>
|
|
257
|
+
<h2 className="text-lg font-semibold text-white mb-3">Per-Case Comparison</h2>
|
|
258
|
+
{results.length === 0 ? (
|
|
259
|
+
<p className="text-sm text-white/40">No per-case results available.</p>
|
|
260
|
+
) : (
|
|
261
|
+
<div className="space-y-2">
|
|
262
|
+
{results.map((item) => {
|
|
263
|
+
const isExpanded = expandedCase === item.caseId
|
|
264
|
+
return (
|
|
265
|
+
<Card key={item.caseId}>
|
|
266
|
+
<CardContent className="!py-3">
|
|
267
|
+
<button
|
|
268
|
+
onClick={() => setExpandedCase(isExpanded ? null : item.caseId)}
|
|
269
|
+
className="flex items-center justify-between w-full text-left"
|
|
270
|
+
>
|
|
271
|
+
<div className="flex items-center gap-2 min-w-0 flex-1">
|
|
272
|
+
{isExpanded ? (
|
|
273
|
+
<ChevronDown className="h-4 w-4 text-white/40 flex-shrink-0" />
|
|
274
|
+
) : (
|
|
275
|
+
<ChevronRight className="h-4 w-4 text-white/40 flex-shrink-0" />
|
|
276
|
+
)}
|
|
277
|
+
<span className="text-sm text-white truncate">{item.question}</span>
|
|
278
|
+
</div>
|
|
279
|
+
<div className="flex items-center gap-4 flex-shrink-0 ml-2">
|
|
280
|
+
{/* Topic score comparison */}
|
|
281
|
+
<div className="flex items-center gap-1">
|
|
282
|
+
<span className={`text-xs ${scoreColor(item.resultA?.topicScore ?? null)}`}>
|
|
283
|
+
{item.resultA?.topicScore?.toFixed(2) ?? '--'}
|
|
284
|
+
</span>
|
|
285
|
+
<DiffIndicator
|
|
286
|
+
a={item.resultA?.topicScore ?? null}
|
|
287
|
+
b={item.resultB?.topicScore ?? null}
|
|
288
|
+
/>
|
|
289
|
+
<span className={`text-xs ${scoreColor(item.resultB?.topicScore ?? null)}`}>
|
|
290
|
+
{item.resultB?.topicScore?.toFixed(2) ?? '--'}
|
|
291
|
+
</span>
|
|
292
|
+
</div>
|
|
293
|
+
{/* Safety score comparison */}
|
|
294
|
+
<div className="flex items-center gap-1">
|
|
295
|
+
<span className={`text-xs ${scoreColor(item.resultA?.safetyScore ?? null)}`}>
|
|
296
|
+
{item.resultA?.safetyScore?.toFixed(2) ?? '--'}
|
|
297
|
+
</span>
|
|
298
|
+
<DiffIndicator
|
|
299
|
+
a={item.resultA?.safetyScore ?? null}
|
|
300
|
+
b={item.resultB?.safetyScore ?? null}
|
|
301
|
+
/>
|
|
302
|
+
<span className={`text-xs ${scoreColor(item.resultB?.safetyScore ?? null)}`}>
|
|
303
|
+
{item.resultB?.safetyScore?.toFixed(2) ?? '--'}
|
|
304
|
+
</span>
|
|
305
|
+
</div>
|
|
306
|
+
</div>
|
|
307
|
+
</button>
|
|
308
|
+
{isExpanded && (
|
|
309
|
+
<div className="mt-3 pt-3 border-t border-white/5">
|
|
310
|
+
<div className="grid grid-cols-2 gap-4">
|
|
311
|
+
{/* Run A response */}
|
|
312
|
+
<div>
|
|
313
|
+
<div className="text-xs font-medium text-white/40 mb-1">
|
|
314
|
+
Run A Response
|
|
315
|
+
</div>
|
|
316
|
+
{item.resultA?.agentResponse ? (
|
|
317
|
+
<div className="text-xs text-white/60 bg-black/20 rounded-lg p-3 whitespace-pre-wrap max-h-40 overflow-y-auto">
|
|
318
|
+
{item.resultA.agentResponse}
|
|
319
|
+
</div>
|
|
320
|
+
) : (
|
|
321
|
+
<p className="text-xs text-white/20 italic">No response</p>
|
|
322
|
+
)}
|
|
323
|
+
{item.resultA && (
|
|
324
|
+
<div className="flex gap-3 mt-2">
|
|
325
|
+
{item.resultA.topicScore !== null && (
|
|
326
|
+
<span className={`text-xs ${scoreColor(item.resultA.topicScore)}`}>
|
|
327
|
+
Topic: {item.resultA.topicScore.toFixed(2)}
|
|
328
|
+
</span>
|
|
329
|
+
)}
|
|
330
|
+
{item.resultA.safetyScore !== null && (
|
|
331
|
+
<span className={`text-xs ${scoreColor(item.resultA.safetyScore)}`}>
|
|
332
|
+
Safety: {item.resultA.safetyScore.toFixed(2)}
|
|
333
|
+
</span>
|
|
334
|
+
)}
|
|
335
|
+
{item.resultA.latencyMs !== null && (
|
|
336
|
+
<span className="text-xs text-white/30">
|
|
337
|
+
{item.resultA.latencyMs.toFixed(0)}ms
|
|
338
|
+
</span>
|
|
339
|
+
)}
|
|
340
|
+
</div>
|
|
341
|
+
)}
|
|
342
|
+
</div>
|
|
343
|
+
{/* Run B response */}
|
|
344
|
+
<div>
|
|
345
|
+
<div className="text-xs font-medium text-white/40 mb-1">
|
|
346
|
+
Run B Response
|
|
347
|
+
</div>
|
|
348
|
+
{item.resultB?.agentResponse ? (
|
|
349
|
+
<div className="text-xs text-white/60 bg-black/20 rounded-lg p-3 whitespace-pre-wrap max-h-40 overflow-y-auto">
|
|
350
|
+
{item.resultB.agentResponse}
|
|
351
|
+
</div>
|
|
352
|
+
) : (
|
|
353
|
+
<p className="text-xs text-white/20 italic">No response</p>
|
|
354
|
+
)}
|
|
355
|
+
{item.resultB && (
|
|
356
|
+
<div className="flex gap-3 mt-2">
|
|
357
|
+
{item.resultB.topicScore !== null && (
|
|
358
|
+
<span className={`text-xs ${scoreColor(item.resultB.topicScore)}`}>
|
|
359
|
+
Topic: {item.resultB.topicScore.toFixed(2)}
|
|
360
|
+
</span>
|
|
361
|
+
)}
|
|
362
|
+
{item.resultB.safetyScore !== null && (
|
|
363
|
+
<span className={`text-xs ${scoreColor(item.resultB.safetyScore)}`}>
|
|
364
|
+
Safety: {item.resultB.safetyScore.toFixed(2)}
|
|
365
|
+
</span>
|
|
366
|
+
)}
|
|
367
|
+
{item.resultB.latencyMs !== null && (
|
|
368
|
+
<span className="text-xs text-white/30">
|
|
369
|
+
{item.resultB.latencyMs.toFixed(0)}ms
|
|
370
|
+
</span>
|
|
371
|
+
)}
|
|
372
|
+
</div>
|
|
373
|
+
)}
|
|
374
|
+
</div>
|
|
375
|
+
</div>
|
|
376
|
+
</div>
|
|
377
|
+
)}
|
|
378
|
+
</CardContent>
|
|
379
|
+
</Card>
|
|
380
|
+
)
|
|
381
|
+
})}
|
|
382
|
+
</div>
|
|
383
|
+
)}
|
|
384
|
+
</div>
|
|
385
|
+
</div>
|
|
386
|
+
)
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
function DiffRow({
|
|
390
|
+
label,
|
|
391
|
+
a,
|
|
392
|
+
b,
|
|
393
|
+
format = (v) => v.toFixed(2),
|
|
394
|
+
formatDiff,
|
|
395
|
+
invertColor = false,
|
|
396
|
+
}: {
|
|
397
|
+
label: string
|
|
398
|
+
a: number
|
|
399
|
+
b: number
|
|
400
|
+
format?: (v: number) => string
|
|
401
|
+
formatDiff?: (d: number) => string
|
|
402
|
+
invertColor?: boolean
|
|
403
|
+
}) {
|
|
404
|
+
const diff = b - a
|
|
405
|
+
const absDiff = Math.abs(diff)
|
|
406
|
+
const isPositive = diff > 0
|
|
407
|
+
const isNeutral = absDiff < 0.01
|
|
408
|
+
|
|
409
|
+
let colorClass = 'text-white/30'
|
|
410
|
+
if (!isNeutral) {
|
|
411
|
+
if (invertColor) {
|
|
412
|
+
colorClass = isPositive ? 'text-red-400' : 'text-green-400'
|
|
413
|
+
} else {
|
|
414
|
+
colorClass = isPositive ? 'text-green-400' : 'text-red-400'
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
const diffStr = formatDiff
|
|
419
|
+
? formatDiff(diff)
|
|
420
|
+
: `${diff >= 0 ? '+' : ''}${diff.toFixed(2)}`
|
|
421
|
+
|
|
422
|
+
return (
|
|
423
|
+
<div className="flex items-center justify-between">
|
|
424
|
+
<span className="text-xs text-white/50">{label}</span>
|
|
425
|
+
<div className="flex items-center gap-2">
|
|
426
|
+
{!isNeutral && (
|
|
427
|
+
isPositive === !invertColor ? (
|
|
428
|
+
<ArrowUpRight className={`h-3 w-3 ${colorClass}`} />
|
|
429
|
+
) : (
|
|
430
|
+
<ArrowDownRight className={`h-3 w-3 ${colorClass}`} />
|
|
431
|
+
)
|
|
432
|
+
)}
|
|
433
|
+
<span className={`text-xs font-semibold ${colorClass}`}>{diffStr}</span>
|
|
434
|
+
</div>
|
|
435
|
+
</div>
|
|
436
|
+
)
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
export default function EvalComparePage() {
|
|
440
|
+
return (
|
|
441
|
+
<Suspense fallback={<ShimmerBlock height={200} />}>
|
|
442
|
+
<EvalCompareContent />
|
|
443
|
+
</Suspense>
|
|
444
|
+
)
|
|
445
|
+
}
|