@swarmclawai/swarmclaw 1.5.71 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,632 @@
1
+ 'use client'
2
+
3
+ import { useCallback, useEffect, useMemo, useState } from 'react'
4
+ import { toast } from 'sonner'
5
+ import { MainContent } from '@/components/layout/main-content'
6
+ import { RunList } from '@/components/runs/run-list'
7
+ import { PageLoader } from '@/components/ui/page-loader'
8
+ import { useWs } from '@/hooks/use-ws'
9
+ import { api } from '@/lib/app/api-client'
10
+ import {
11
+ buildQualityOverviewSummary,
12
+ groupApprovalsByCategory,
13
+ summarizeEvalRuns,
14
+ summarizeRunHealth,
15
+ } from '@/lib/quality/quality-summary'
16
+ import { cn } from '@/lib/utils'
17
+ import { useAppStore } from '@/stores/use-app-store'
18
+ import type { EvalRun, EvalSuiteResult } from '@/lib/server/eval/types'
19
+ import type { Agent, ApprovalRequest, SessionRunRecord } from '@/types'
20
+
21
+ type QualityTab = 'overview' | 'evals' | 'approvals' | 'runs'
22
+
23
+ interface EvalSuiteSummary {
24
+ name: string
25
+ count: number
26
+ maxScore: number
27
+ categories: string[]
28
+ }
29
+
30
+ interface EvalScenarioSummary {
31
+ id: string
32
+ name: string
33
+ category: string
34
+ suite: string
35
+ description: string
36
+ tools: string[]
37
+ timeoutMs: number
38
+ criteriaCount: number
39
+ maxScore: number
40
+ }
41
+
42
+ const TABS: Array<{ id: QualityTab; label: string }> = [
43
+ { id: 'overview', label: 'Overview' },
44
+ { id: 'evals', label: 'Eval Lab' },
45
+ { id: 'approvals', label: 'Approval Desk' },
46
+ { id: 'runs', label: 'Run Review' },
47
+ ]
48
+
49
+ function formatPercent(value: number | null): string {
50
+ return value == null ? 'n/a' : `${value}%`
51
+ }
52
+
53
+ function scorePercent(score: number, maxScore: number): number | null {
54
+ if (!Number.isFinite(score) || !Number.isFinite(maxScore) || maxScore <= 0) return null
55
+ return Math.round((score / maxScore) * 100)
56
+ }
57
+
58
+ function formatTimestamp(at: number | null | undefined): string {
59
+ if (!at) return 'not recorded'
60
+ return new Date(at).toLocaleString()
61
+ }
62
+
63
+ function formatDuration(ms: number): string {
64
+ if (ms < 1000) return `${ms}ms`
65
+ const sec = Math.round(ms / 1000)
66
+ if (sec < 60) return `${sec}s`
67
+ const min = Math.round(sec / 60)
68
+ if (min < 60) return `${min}m`
69
+ return `${Math.round((min / 60) * 10) / 10}h`
70
+ }
71
+
72
+ function agentLabel(agent: Agent | undefined, id: string): string {
73
+ return agent ? `${agent.name} (${agent.model || agent.provider})` : id
74
+ }
75
+
76
+ function StatTile({ label, value, hint, tone = 'default' }: {
77
+ label: string
78
+ value: string
79
+ hint: string
80
+ tone?: 'default' | 'good' | 'warn' | 'danger'
81
+ }) {
82
+ const toneClass = {
83
+ default: 'text-text',
84
+ good: 'text-emerald-300',
85
+ warn: 'text-amber-300',
86
+ danger: 'text-rose-300',
87
+ }[tone]
88
+ return (
89
+ <div className="rounded-[14px] border border-white/[0.06] bg-white/[0.03] px-4 py-3">
90
+ <div className="text-[10px] font-700 uppercase tracking-[0.12em] text-text-3/55">{label}</div>
91
+ <div className={cn('mt-2 font-display text-[26px] font-700 tracking-[-0.03em]', toneClass)}>{value}</div>
92
+ <div className="mt-1 text-[12px] leading-relaxed text-text-3/68">{hint}</div>
93
+ </div>
94
+ )
95
+ }
96
+
97
+ function EmptyState({ title, description }: { title: string; description: string }) {
98
+ return (
99
+ <div className="rounded-[14px] border border-dashed border-white/[0.08] bg-white/[0.02] px-4 py-6">
100
+ <div className="text-[13px] font-700 text-text">{title}</div>
101
+ <p className="mt-1 text-[12px] leading-relaxed text-text-3/65">{description}</p>
102
+ </div>
103
+ )
104
+ }
105
+
106
+ export function QualityWorkspace() {
107
+ const agents = useAppStore((s) => s.agents)
108
+ const agentOptions = useMemo(
109
+ () => Object.values(agents).filter((agent) => !agent.trashedAt),
110
+ [agents],
111
+ )
112
+
113
+ const [activeTab, setActiveTab] = useState<QualityTab>('overview')
114
+ const [runs, setRuns] = useState<SessionRunRecord[]>([])
115
+ const [evalRuns, setEvalRuns] = useState<EvalRun[]>([])
116
+ const [approvals, setApprovals] = useState<ApprovalRequest[]>([])
117
+ const [suites, setSuites] = useState<EvalSuiteSummary[]>([])
118
+ const [scenarios, setScenarios] = useState<EvalScenarioSummary[]>([])
119
+ const [loading, setLoading] = useState(true)
120
+ const [refreshing, setRefreshing] = useState(false)
121
+ const [loadError, setLoadError] = useState<string | null>(null)
122
+ const [selectedAgentId, setSelectedAgentId] = useState('')
123
+ const [selectedSuite, setSelectedSuite] = useState('core')
124
+ const [selectedScenarioId, setSelectedScenarioId] = useState('')
125
+ const [evalBusy, setEvalBusy] = useState<string | null>(null)
126
+ const [approvalBusy, setApprovalBusy] = useState<string | null>(null)
127
+
128
+ const loadQualityData = useCallback(async (opts: { silent?: boolean } = {}) => {
129
+ if (opts.silent) setRefreshing(true)
130
+ else setLoading(true)
131
+ setLoadError(null)
132
+ try {
133
+ const [nextRuns, nextEvalRuns, nextApprovals, nextSuites, nextScenarios] = await Promise.all([
134
+ api<SessionRunRecord[]>('GET', '/runs?limit=200'),
135
+ api<EvalRun[]>('GET', '/eval/run?limit=100'),
136
+ api<ApprovalRequest[]>('GET', '/approvals'),
137
+ api<EvalSuiteSummary[]>('GET', '/eval/suites'),
138
+ api<EvalScenarioSummary[]>('GET', '/eval/scenarios'),
139
+ ])
140
+ setRuns(Array.isArray(nextRuns) ? nextRuns : [])
141
+ setEvalRuns(Array.isArray(nextEvalRuns) ? nextEvalRuns : [])
142
+ setApprovals(Array.isArray(nextApprovals) ? nextApprovals : [])
143
+ setSuites(Array.isArray(nextSuites) ? nextSuites : [])
144
+ setScenarios(Array.isArray(nextScenarios) ? nextScenarios : [])
145
+ } catch (err) {
146
+ const message = err instanceof Error ? err.message : 'Unable to load quality data'
147
+ setLoadError(message)
148
+ if (!opts.silent) toast.error(message)
149
+ } finally {
150
+ setLoading(false)
151
+ setRefreshing(false)
152
+ }
153
+ }, [])
154
+
155
+ useEffect(() => {
156
+ void loadQualityData()
157
+ }, [loadQualityData])
158
+
159
+ useWs('runs', () => { void loadQualityData({ silent: true }) }, 5000)
160
+
161
+ useEffect(() => {
162
+ if (!selectedAgentId && agentOptions[0]) setSelectedAgentId(agentOptions[0].id)
163
+ }, [agentOptions, selectedAgentId])
164
+
165
+ useEffect(() => {
166
+ if (!selectedScenarioId && scenarios[0]) setSelectedScenarioId(scenarios[0].id)
167
+ }, [scenarios, selectedScenarioId])
168
+
169
+ useEffect(() => {
170
+ if (!suites.some((suite) => suite.name === selectedSuite) && suites[0]) {
171
+ setSelectedSuite(suites[0].name)
172
+ }
173
+ }, [selectedSuite, suites])
174
+
175
+ const scenarioById = useMemo(() => {
176
+ return new Map(scenarios.map((scenario) => [scenario.id, scenario]))
177
+ }, [scenarios])
178
+
179
+ const runHealth = useMemo(() => summarizeRunHealth(runs), [runs])
180
+ const evalSummary = useMemo(() => summarizeEvalRuns(evalRuns), [evalRuns])
181
+ const approvalGroups = useMemo(() => groupApprovalsByCategory(approvals), [approvals])
182
+ const overview = useMemo(() => buildQualityOverviewSummary({ runs, evalRuns, approvals }), [approvals, evalRuns, runs])
183
+ const selectedSuiteScenarios = useMemo(
184
+ () => scenarios.filter((scenario) => scenario.suite === selectedSuite),
185
+ [scenarios, selectedSuite],
186
+ )
187
+
188
+ const runScenario = useCallback(async () => {
189
+ if (!selectedAgentId || !selectedScenarioId) {
190
+ toast.error('Choose an agent and scenario first')
191
+ return
192
+ }
193
+ setEvalBusy(`scenario:${selectedScenarioId}`)
194
+ try {
195
+ await api<EvalRun>('POST', '/eval/run', { agentId: selectedAgentId, scenarioId: selectedScenarioId }, { timeoutMs: 180_000 })
196
+ toast.success('Eval scenario completed')
197
+ await loadQualityData({ silent: true })
198
+ } catch (err) {
199
+ toast.error(err instanceof Error ? err.message : 'Eval scenario failed')
200
+ } finally {
201
+ setEvalBusy(null)
202
+ }
203
+ }, [loadQualityData, selectedAgentId, selectedScenarioId])
204
+
205
+ const runSuite = useCallback(async (suiteName: string) => {
206
+ if (!selectedAgentId) {
207
+ toast.error('Choose an agent first')
208
+ return
209
+ }
210
+ setEvalBusy(`suite:${suiteName}`)
211
+ try {
212
+ const result = await api<EvalSuiteResult>('POST', '/eval/suite', { agentId: selectedAgentId, suite: suiteName }, { timeoutMs: 300_000 })
213
+ toast.success(`Suite completed at ${Math.round(result.percentage)}%`)
214
+ await loadQualityData({ silent: true })
215
+ } catch (err) {
216
+ toast.error(err instanceof Error ? err.message : 'Eval suite failed')
217
+ } finally {
218
+ setEvalBusy(null)
219
+ }
220
+ }, [loadQualityData, selectedAgentId])
221
+
222
+ const actOnApproval = useCallback(async (approval: ApprovalRequest, approved: boolean) => {
223
+ setApprovalBusy(approval.id)
224
+ try {
225
+ await api('POST', '/approvals', { id: approval.id, approved })
226
+ toast.success(approved ? 'Approval granted' : 'Approval denied')
227
+ await loadQualityData({ silent: true })
228
+ } catch (err) {
229
+ toast.error(err instanceof Error ? err.message : 'Unable to update approval')
230
+ } finally {
231
+ setApprovalBusy(null)
232
+ }
233
+ }, [loadQualityData])
234
+
235
+ if (loading) {
236
+ return (
237
+ <MainContent>
238
+ <PageLoader label="Loading quality center..." />
239
+ </MainContent>
240
+ )
241
+ }
242
+
243
+ return (
244
+ <MainContent>
245
+ <div className="flex-1 overflow-y-auto">
246
+ <div className="mx-auto flex w-full max-w-[1280px] flex-col gap-6 px-4 py-5 sm:px-6 lg:px-8">
247
+ <div className="flex flex-col gap-4 lg:flex-row lg:items-start lg:justify-between">
248
+ <div>
249
+ <div className="text-[10px] font-700 uppercase tracking-[0.16em] text-accent-bright/75">Operator Quality Center</div>
250
+ <h1 className="mt-2 font-display text-[28px] font-700 tracking-[-0.03em] text-text">Quality</h1>
251
+ <p className="mt-2 max-w-[720px] text-[13px] leading-relaxed text-text-3/70">
252
+ Evals, approvals, run evidence, and release readiness in one operator workspace.
253
+ </p>
254
+ </div>
255
+ <div className="flex flex-wrap items-center gap-2">
256
+ {refreshing && <span className="text-[11px] text-text-3/60">Refreshing...</span>}
257
+ <button
258
+ type="button"
259
+ onClick={() => void loadQualityData({ silent: true })}
260
+ className="inline-flex items-center gap-2 rounded-[10px] border border-white/[0.08] bg-white/[0.04] px-3 py-2 text-[12px] font-700 text-text-2 transition-colors hover:bg-white/[0.08]"
261
+ >
262
+ <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round">
263
+ <path d="M21 12a9 9 0 0 1-15.5 6.2" /><path d="M3 12A9 9 0 0 1 18.5 5.8" /><path d="M3 19v-5h5" /><path d="M21 5v5h-5" />
264
+ </svg>
265
+ Refresh
266
+ </button>
267
+ </div>
268
+ </div>
269
+
270
+ {loadError && (
271
+ <div className="rounded-[12px] border border-rose-500/25 bg-rose-500/[0.06] px-4 py-3 text-[12px] text-rose-200">
272
+ {loadError}
273
+ </div>
274
+ )}
275
+
276
+ <div className="flex gap-1 overflow-x-auto rounded-[12px] border border-white/[0.06] bg-white/[0.025] p-1">
277
+ {TABS.map((tab) => (
278
+ <button
279
+ key={tab.id}
280
+ type="button"
281
+ onClick={() => setActiveTab(tab.id)}
282
+ className={cn(
283
+ 'min-w-fit rounded-[9px] px-3 py-2 text-[12px] font-700 transition-colors',
284
+ activeTab === tab.id
285
+ ? 'bg-white/[0.1] text-text'
286
+ : 'text-text-3 hover:bg-white/[0.05] hover:text-text-2',
287
+ )}
288
+ >
289
+ {tab.label}
290
+ </button>
291
+ ))}
292
+ </div>
293
+
294
+ {activeTab === 'overview' && (
295
+ <div className="flex flex-col gap-6">
296
+ <div className="grid gap-3 md:grid-cols-2 xl:grid-cols-4">
297
+ <StatTile
298
+ label="Needs Attention"
299
+ value={String(overview.needsAttention)}
300
+ hint="Failed runs, failed evals, and pending approvals."
301
+ tone={overview.needsAttention > 0 ? 'danger' : 'good'}
302
+ />
303
+ <StatTile
304
+ label="Active Runs"
305
+ value={String(overview.activeRuns)}
306
+ hint={`${runHealth.byStatus.running} running, ${runHealth.byStatus.queued} queued.`}
307
+ tone={overview.activeRuns > 0 ? 'warn' : 'default'}
308
+ />
309
+ <StatTile
310
+ label="Pending Approvals"
311
+ value={String(overview.pendingApprovals)}
312
+ hint={`${approvalGroups.categories.length} approval group${approvalGroups.categories.length === 1 ? '' : 's'}.`}
313
+ tone={overview.pendingApprovals > 0 ? 'warn' : 'good'}
314
+ />
315
+ <StatTile
316
+ label="Eval Average"
317
+ value={formatPercent(overview.evalAveragePercent)}
318
+ hint={`${evalSummary.completedRuns} completed eval run${evalSummary.completedRuns === 1 ? '' : 's'}.`}
319
+ tone={overview.evalAveragePercent == null || overview.evalAveragePercent >= 80 ? 'good' : 'warn'}
320
+ />
321
+ </div>
322
+
323
+ <div className="grid gap-4 lg:grid-cols-[1.2fr_0.8fr]">
324
+ <section className="rounded-[16px] border border-white/[0.06] bg-white/[0.025] p-4">
325
+ <div className="mb-4 flex flex-wrap items-center justify-between gap-2">
326
+ <div>
327
+ <h2 className="font-display text-[15px] font-700 text-text">Needs Attention</h2>
328
+ <p className="mt-1 text-[12px] text-text-3/65">Shortest path to unblock operator review.</p>
329
+ </div>
330
+ <div className="flex flex-wrap gap-2">
331
+ <button onClick={() => setActiveTab('evals')} className="rounded-[9px] border border-white/[0.08] px-2.5 py-1.5 text-[11px] font-700 text-text-2 hover:bg-white/[0.05]">Eval Lab</button>
332
+ <button onClick={() => setActiveTab('approvals')} className="rounded-[9px] border border-white/[0.08] px-2.5 py-1.5 text-[11px] font-700 text-text-2 hover:bg-white/[0.05]">Approvals</button>
333
+ <button onClick={() => setActiveTab('runs')} className="rounded-[9px] border border-white/[0.08] px-2.5 py-1.5 text-[11px] font-700 text-text-2 hover:bg-white/[0.05]">Runs</button>
334
+ </div>
335
+ </div>
336
+ {runHealth.recentFailures.length === 0 && approvalGroups.totalPending === 0 && evalSummary.failedRuns === 0 ? (
337
+ <EmptyState title="No quality blockers" description="Recent runs, evals, and approvals do not need immediate operator action." />
338
+ ) : (
339
+ <div className="grid gap-2 md:grid-cols-2">
340
+ {runHealth.recentFailures.slice(0, 4).map((run) => (
341
+ <button
342
+ key={run.id}
343
+ onClick={() => setActiveTab('runs')}
344
+ className="rounded-[12px] border border-rose-500/20 bg-rose-500/[0.04] px-3 py-3 text-left transition-colors hover:bg-rose-500/[0.07]"
345
+ >
346
+ <div className="text-[11px] font-700 uppercase tracking-[0.1em] text-rose-300">Failed Run</div>
347
+ <div className="mt-1 truncate text-[13px] font-600 text-text">{run.messagePreview || run.id}</div>
348
+ <div className="mt-1 text-[11px] text-text-3/60">{run.source} - {formatTimestamp(run.endedAt ?? run.queuedAt)}</div>
349
+ </button>
350
+ ))}
351
+ {approvalGroups.categories.slice(0, 4).map((group) => (
352
+ <button
353
+ key={group.category}
354
+ onClick={() => setActiveTab('approvals')}
355
+ className="rounded-[12px] border border-amber-500/20 bg-amber-500/[0.04] px-3 py-3 text-left transition-colors hover:bg-amber-500/[0.07]"
356
+ >
357
+ <div className="text-[11px] font-700 uppercase tracking-[0.1em] text-amber-300">Approval</div>
358
+ <div className="mt-1 text-[13px] font-600 text-text">{group.count} pending {group.category.replaceAll('_', ' ')}</div>
359
+ <div className="mt-1 text-[11px] text-text-3/60">{group.approvals[0]?.title || 'Review request'}</div>
360
+ </button>
361
+ ))}
362
+ </div>
363
+ )}
364
+ </section>
365
+
366
+ <section className="rounded-[16px] border border-white/[0.06] bg-white/[0.025] p-4">
367
+ <h2 className="font-display text-[15px] font-700 text-text">Latest Eval Scores</h2>
368
+ <p className="mt-1 text-[12px] text-text-3/65">Most recent scored evidence across agents.</p>
369
+ <div className="mt-4 flex flex-col gap-2">
370
+ {evalRuns.slice(0, 5).length === 0 ? (
371
+ <EmptyState title="No eval history" description="Run a scenario or suite to start building score history." />
372
+ ) : (
373
+ evalRuns.slice(0, 5).map((run) => {
374
+ const percent = scorePercent(run.score, run.maxScore)
375
+ return (
376
+ <div key={run.id} className="rounded-[12px] border border-white/[0.06] bg-white/[0.025] px-3 py-3">
377
+ <div className="flex items-center justify-between gap-3">
378
+ <div className="min-w-0">
379
+ <div className="truncate text-[13px] font-700 text-text">{scenarioById.get(run.scenarioId)?.name || run.scenarioId}</div>
380
+ <div className="mt-1 text-[11px] text-text-3/60">{agentLabel(agents[run.agentId], run.agentId)}</div>
381
+ </div>
382
+ <div className={cn('shrink-0 text-[16px] font-display font-700', percent == null || percent >= 80 ? 'text-emerald-300' : 'text-amber-300')}>
383
+ {formatPercent(percent)}
384
+ </div>
385
+ </div>
386
+ </div>
387
+ )
388
+ })
389
+ )}
390
+ </div>
391
+ </section>
392
+ </div>
393
+ </div>
394
+ )}
395
+
396
+ {activeTab === 'evals' && (
397
+ <div className="grid gap-5 xl:grid-cols-[360px_1fr]">
398
+ <section className="rounded-[16px] border border-white/[0.06] bg-white/[0.025] p-4">
399
+ <h2 className="font-display text-[15px] font-700 text-text">Eval Lab</h2>
400
+ <p className="mt-1 text-[12px] leading-relaxed text-text-3/65">Run focused scenarios or complete suites against one agent.</p>
401
+ <div className="mt-4 flex flex-col gap-3">
402
+ <label className="flex flex-col gap-1.5">
403
+ <span className="text-[10px] font-700 uppercase tracking-[0.12em] text-text-3/55">Agent</span>
404
+ <select
405
+ value={selectedAgentId}
406
+ onChange={(event) => setSelectedAgentId(event.target.value)}
407
+ className="rounded-[10px] border border-white/[0.08] bg-white/[0.04] px-3 py-2 text-[12px] text-text outline-none"
408
+ >
409
+ {agentOptions.length === 0 && <option value="">No agents available</option>}
410
+ {agentOptions.map((agent) => (
411
+ <option key={agent.id} value={agent.id}>{agent.name}</option>
412
+ ))}
413
+ </select>
414
+ </label>
415
+ <label className="flex flex-col gap-1.5">
416
+ <span className="text-[10px] font-700 uppercase tracking-[0.12em] text-text-3/55">Scenario</span>
417
+ <select
418
+ value={selectedScenarioId}
419
+ onChange={(event) => setSelectedScenarioId(event.target.value)}
420
+ className="rounded-[10px] border border-white/[0.08] bg-white/[0.04] px-3 py-2 text-[12px] text-text outline-none"
421
+ >
422
+ {scenarios.map((scenario) => (
423
+ <option key={scenario.id} value={scenario.id}>{scenario.name}</option>
424
+ ))}
425
+ </select>
426
+ </label>
427
+ {selectedScenarioId && scenarioById.get(selectedScenarioId) && (
428
+ <div className="rounded-[12px] border border-white/[0.06] bg-white/[0.025] px-3 py-3">
429
+ <div className="text-[13px] font-700 text-text">{scenarioById.get(selectedScenarioId)!.name}</div>
430
+ <p className="mt-1 text-[12px] leading-relaxed text-text-3/65">{scenarioById.get(selectedScenarioId)!.description}</p>
431
+ <div className="mt-2 flex flex-wrap gap-1.5">
432
+ <span className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">{scenarioById.get(selectedScenarioId)!.category}</span>
433
+ <span className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">{scenarioById.get(selectedScenarioId)!.criteriaCount} criteria</span>
434
+ <span className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">{formatDuration(scenarioById.get(selectedScenarioId)!.timeoutMs)}</span>
435
+ </div>
436
+ </div>
437
+ )}
438
+ <button
439
+ type="button"
440
+ disabled={!selectedAgentId || !selectedScenarioId || !!evalBusy}
441
+ onClick={() => void runScenario()}
442
+ className="inline-flex items-center justify-center gap-2 rounded-[10px] bg-accent-bright px-3 py-2.5 text-[12px] font-800 text-black transition-opacity hover:opacity-90 disabled:cursor-not-allowed disabled:opacity-40"
443
+ >
444
+ <svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor">
445
+ <path d="M8 5v14l11-7z" />
446
+ </svg>
447
+ {evalBusy?.startsWith('scenario:') ? 'Running Scenario' : 'Run Scenario'}
448
+ </button>
449
+ </div>
450
+ </section>
451
+
452
+ <div className="flex flex-col gap-5">
453
+ <section className="rounded-[16px] border border-white/[0.06] bg-white/[0.025] p-4">
454
+ <div className="flex flex-wrap items-center justify-between gap-3">
455
+ <div>
456
+ <h2 className="font-display text-[15px] font-700 text-text">Suites</h2>
457
+ <p className="mt-1 text-[12px] text-text-3/65">Release-oriented eval suites available through the existing eval API.</p>
458
+ </div>
459
+ <select
460
+ value={selectedSuite}
461
+ onChange={(event) => setSelectedSuite(event.target.value)}
462
+ className="rounded-[10px] border border-white/[0.08] bg-white/[0.04] px-3 py-2 text-[12px] text-text outline-none"
463
+ >
464
+ {suites.map((suite) => (
465
+ <option key={suite.name} value={suite.name}>{suite.name}</option>
466
+ ))}
467
+ </select>
468
+ </div>
469
+ <div className="mt-4 grid gap-3 md:grid-cols-2 xl:grid-cols-3">
470
+ {suites.map((suite) => (
471
+ <div key={suite.name} className="rounded-[14px] border border-white/[0.06] bg-white/[0.025] p-3">
472
+ <div className="flex items-start justify-between gap-3">
473
+ <div>
474
+ <div className="text-[13px] font-800 text-text">{suite.name}</div>
475
+ <div className="mt-1 text-[11px] text-text-3/65">{suite.count} scenarios - {suite.maxScore} max score</div>
476
+ </div>
477
+ <button
478
+ type="button"
479
+ disabled={!selectedAgentId || !!evalBusy}
480
+ onClick={() => void runSuite(suite.name)}
481
+ className="rounded-[8px] border border-white/[0.08] px-2 py-1 text-[11px] font-700 text-text-2 transition-colors hover:bg-white/[0.06] disabled:cursor-not-allowed disabled:opacity-40"
482
+ >
483
+ {evalBusy === `suite:${suite.name}` ? 'Running' : 'Run'}
484
+ </button>
485
+ </div>
486
+ <div className="mt-3 flex flex-wrap gap-1.5">
487
+ {suite.categories.map((category) => (
488
+ <span key={category} className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">{category}</span>
489
+ ))}
490
+ </div>
491
+ </div>
492
+ ))}
493
+ </div>
494
+ <div className="mt-4 text-[11px] text-text-3/60">
495
+ {selectedSuiteScenarios.length} scenario{selectedSuiteScenarios.length === 1 ? '' : 's'} selected in {selectedSuite}.
496
+ </div>
497
+ </section>
498
+
499
+ <section className="rounded-[16px] border border-white/[0.06] bg-white/[0.025] p-4">
500
+ <h2 className="font-display text-[15px] font-700 text-text">Score History</h2>
501
+ <div className="mt-4 grid gap-3 lg:grid-cols-2">
502
+ {evalRuns.length === 0 ? (
503
+ <EmptyState title="No eval results yet" description="Run a scenario or suite to see criteria scores and evidence." />
504
+ ) : (
505
+ evalRuns.slice(0, 12).map((run) => {
506
+ const percent = scorePercent(run.score, run.maxScore)
507
+ const scenario = scenarioById.get(run.scenarioId)
508
+ return (
509
+ <div key={run.id} className="rounded-[14px] border border-white/[0.06] bg-white/[0.025] p-3">
510
+ <div className="flex items-start justify-between gap-3">
511
+ <div className="min-w-0">
512
+ <div className="truncate text-[13px] font-800 text-text">{scenario?.name || run.scenarioId}</div>
513
+ <div className="mt-1 text-[11px] text-text-3/60">{agentLabel(agents[run.agentId], run.agentId)}</div>
514
+ <div className="mt-1 text-[10px] text-text-3/50">{formatTimestamp(run.endedAt ?? run.startedAt)}</div>
515
+ </div>
516
+ <div className={cn('rounded-[8px] px-2 py-1 text-[13px] font-800', percent == null || percent >= 80 ? 'bg-emerald-500/10 text-emerald-300' : 'bg-amber-500/10 text-amber-300')}>
517
+ {formatPercent(percent)}
518
+ </div>
519
+ </div>
520
+ <div className="mt-3 flex flex-col gap-2">
521
+ {run.details.slice(0, 3).map((detail) => (
522
+ <div key={detail.criterion} className="rounded-[10px] bg-white/[0.025] px-3 py-2">
523
+ <div className="flex items-center justify-between gap-3">
524
+ <div className="text-[11px] font-700 text-text-2">{detail.criterion}</div>
525
+ <div className="text-[10px] text-text-3/70">{detail.score}/{detail.maxScore}</div>
526
+ </div>
527
+ {detail.evidence && <p className="mt-1 line-clamp-2 text-[11px] leading-relaxed text-text-3/65">{detail.evidence}</p>}
528
+ </div>
529
+ ))}
530
+ {run.details.length > 3 && (
531
+ <div className="text-[10px] text-text-3/50">+{run.details.length - 3} more criteria</div>
532
+ )}
533
+ {run.error && <div className="rounded-[10px] bg-rose-500/[0.06] px-3 py-2 text-[11px] text-rose-200">{run.error}</div>}
534
+ </div>
535
+ </div>
536
+ )
537
+ })
538
+ )}
539
+ </div>
540
+ </section>
541
+ </div>
542
+ </div>
543
+ )}
544
+
545
+ {activeTab === 'approvals' && (
546
+ <section className="rounded-[16px] border border-white/[0.06] bg-white/[0.025] p-4">
547
+ <div className="flex flex-wrap items-center justify-between gap-3">
548
+ <div>
549
+ <h2 className="font-display text-[15px] font-700 text-text">Approval Desk</h2>
550
+ <p className="mt-1 text-[12px] text-text-3/65">Pending human-loop, tool, connector, skill, agent, and budget requests.</p>
551
+ </div>
552
+ <div className="rounded-full border border-white/[0.08] bg-white/[0.04] px-3 py-1 text-[11px] font-700 text-text-3">
553
+ {approvalGroups.totalPending} pending
554
+ </div>
555
+ </div>
556
+ <div className="mt-4 flex flex-col gap-4">
557
+ {approvalGroups.totalPending === 0 ? (
558
+ <EmptyState title="No pending approvals" description="The approval queue is clear." />
559
+ ) : (
560
+ approvalGroups.categories.map((group) => (
561
+ <div key={group.category} className="rounded-[14px] border border-white/[0.06] bg-white/[0.02] p-3">
562
+ <div className="mb-3 flex items-center justify-between gap-3">
563
+ <div className="text-[12px] font-800 uppercase tracking-[0.1em] text-text-2">{group.category.replaceAll('_', ' ')}</div>
564
+ <div className="text-[11px] font-700 text-text-3/65">{group.count} request{group.count === 1 ? '' : 's'}</div>
565
+ </div>
566
+ <div className="grid gap-2 lg:grid-cols-2">
567
+ {group.approvals.map((approval) => (
568
+ <div key={approval.id} className="rounded-[12px] border border-white/[0.06] bg-surface px-3 py-3">
569
+ <div className="text-[13px] font-800 text-text">{approval.title}</div>
570
+ {approval.description && <p className="mt-1 text-[12px] leading-relaxed text-text-3/65">{approval.description}</p>}
571
+ <div className="mt-2 flex flex-wrap gap-x-3 gap-y-1 text-[10px] text-text-3/55">
572
+ <span>{formatTimestamp(approval.createdAt)}</span>
573
+ {approval.agentId && <span>agent {agents[approval.agentId]?.name || approval.agentId}</span>}
574
+ {approval.sessionId && <span>session {approval.sessionId.slice(0, 8)}</span>}
575
+ </div>
576
+ <div className="mt-3 flex gap-2">
577
+ <button
578
+ type="button"
579
+ disabled={approvalBusy === approval.id}
580
+ onClick={() => void actOnApproval(approval, true)}
581
+ className="inline-flex items-center gap-1.5 rounded-[9px] bg-emerald-400 px-3 py-1.5 text-[11px] font-800 text-black transition-opacity hover:opacity-90 disabled:opacity-40"
582
+ >
583
+ <svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="3" strokeLinecap="round">
584
+ <path d="M20 6L9 17l-5-5" />
585
+ </svg>
586
+ Approve
587
+ </button>
588
+ <button
589
+ type="button"
590
+ disabled={approvalBusy === approval.id}
591
+ onClick={() => void actOnApproval(approval, false)}
592
+ className="inline-flex items-center gap-1.5 rounded-[9px] border border-rose-400/25 bg-rose-500/[0.06] px-3 py-1.5 text-[11px] font-800 text-rose-200 transition-colors hover:bg-rose-500/[0.1] disabled:opacity-40"
593
+ >
594
+ <svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="3" strokeLinecap="round">
595
+ <path d="M18 6L6 18" /><path d="M6 6l12 12" />
596
+ </svg>
597
+ Deny
598
+ </button>
599
+ </div>
600
+ </div>
601
+ ))}
602
+ </div>
603
+ </div>
604
+ ))
605
+ )}
606
+ </div>
607
+ </section>
608
+ )}
609
+
610
+ {activeTab === 'runs' && (
611
+ <div className="flex min-h-[680px] flex-col rounded-[16px] border border-white/[0.06] bg-white/[0.025]">
612
+ <div className="border-b border-white/[0.06] px-5 py-4">
613
+ <div className="flex flex-col gap-3 lg:flex-row lg:items-center lg:justify-between">
614
+ <div>
615
+ <h2 className="font-display text-[15px] font-700 text-text">Run Review</h2>
616
+ <p className="mt-1 text-[12px] text-text-3/65">Filter recent runs and open replay evidence from the detail sheet.</p>
617
+ </div>
618
+ <div className="flex flex-wrap gap-2">
619
+ <span className="rounded-full bg-rose-500/[0.08] px-2.5 py-1 text-[11px] font-700 text-rose-300">{runHealth.byStatus.failed} failed</span>
620
+ <span className="rounded-full bg-blue-500/[0.08] px-2.5 py-1 text-[11px] font-700 text-blue-300">{runHealth.byStatus.running} running</span>
621
+ <span className="rounded-full bg-emerald-500/[0.08] px-2.5 py-1 text-[11px] font-700 text-emerald-300">{runHealth.byStatus.completed} completed</span>
622
+ </div>
623
+ </div>
624
+ </div>
625
+ <RunList />
626
+ </div>
627
+ )}
628
+ </div>
629
+ </div>
630
+ </MainContent>
631
+ )
632
+ }