@swarmclawai/swarmclaw 1.5.71 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -0
- package/package.json +2 -2
- package/src/app/home/page.tsx +11 -3
- package/src/app/quality/page.tsx +7 -0
- package/src/components/home/home-launchpad.tsx +32 -0
- package/src/components/layout/sidebar-rail.tsx +6 -0
- package/src/components/quality/quality-workspace.tsx +632 -0
- package/src/components/runs/run-list.tsx +53 -2
- package/src/components/shared/command-palette.tsx +1 -0
- package/src/lib/app/navigation.ts +1 -0
- package/src/lib/app/view-constants.test.ts +8 -1
- package/src/lib/app/view-constants.ts +9 -1
- package/src/lib/quality/quality-summary.test.ts +122 -0
- package/src/lib/quality/quality-summary.ts +150 -0
- package/src/lib/server/missions/mission-templates.test.ts +22 -0
- package/src/lib/server/missions/mission-templates.ts +116 -0
- package/src/types/session.ts +1 -1
|
@@ -0,0 +1,632 @@
|
|
|
1
|
+
'use client'
|
|
2
|
+
|
|
3
|
+
import { useCallback, useEffect, useMemo, useState } from 'react'
|
|
4
|
+
import { toast } from 'sonner'
|
|
5
|
+
import { MainContent } from '@/components/layout/main-content'
|
|
6
|
+
import { RunList } from '@/components/runs/run-list'
|
|
7
|
+
import { PageLoader } from '@/components/ui/page-loader'
|
|
8
|
+
import { useWs } from '@/hooks/use-ws'
|
|
9
|
+
import { api } from '@/lib/app/api-client'
|
|
10
|
+
import {
|
|
11
|
+
buildQualityOverviewSummary,
|
|
12
|
+
groupApprovalsByCategory,
|
|
13
|
+
summarizeEvalRuns,
|
|
14
|
+
summarizeRunHealth,
|
|
15
|
+
} from '@/lib/quality/quality-summary'
|
|
16
|
+
import { cn } from '@/lib/utils'
|
|
17
|
+
import { useAppStore } from '@/stores/use-app-store'
|
|
18
|
+
import type { EvalRun, EvalSuiteResult } from '@/lib/server/eval/types'
|
|
19
|
+
import type { Agent, ApprovalRequest, SessionRunRecord } from '@/types'
|
|
20
|
+
|
|
21
|
+
type QualityTab = 'overview' | 'evals' | 'approvals' | 'runs'
|
|
22
|
+
|
|
23
|
+
interface EvalSuiteSummary {
|
|
24
|
+
name: string
|
|
25
|
+
count: number
|
|
26
|
+
maxScore: number
|
|
27
|
+
categories: string[]
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
interface EvalScenarioSummary {
|
|
31
|
+
id: string
|
|
32
|
+
name: string
|
|
33
|
+
category: string
|
|
34
|
+
suite: string
|
|
35
|
+
description: string
|
|
36
|
+
tools: string[]
|
|
37
|
+
timeoutMs: number
|
|
38
|
+
criteriaCount: number
|
|
39
|
+
maxScore: number
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const TABS: Array<{ id: QualityTab; label: string }> = [
|
|
43
|
+
{ id: 'overview', label: 'Overview' },
|
|
44
|
+
{ id: 'evals', label: 'Eval Lab' },
|
|
45
|
+
{ id: 'approvals', label: 'Approval Desk' },
|
|
46
|
+
{ id: 'runs', label: 'Run Review' },
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
function formatPercent(value: number | null): string {
|
|
50
|
+
return value == null ? 'n/a' : `${value}%`
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function scorePercent(score: number, maxScore: number): number | null {
|
|
54
|
+
if (!Number.isFinite(score) || !Number.isFinite(maxScore) || maxScore <= 0) return null
|
|
55
|
+
return Math.round((score / maxScore) * 100)
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function formatTimestamp(at: number | null | undefined): string {
|
|
59
|
+
if (!at) return 'not recorded'
|
|
60
|
+
return new Date(at).toLocaleString()
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function formatDuration(ms: number): string {
|
|
64
|
+
if (ms < 1000) return `${ms}ms`
|
|
65
|
+
const sec = Math.round(ms / 1000)
|
|
66
|
+
if (sec < 60) return `${sec}s`
|
|
67
|
+
const min = Math.round(sec / 60)
|
|
68
|
+
if (min < 60) return `${min}m`
|
|
69
|
+
return `${Math.round((min / 60) * 10) / 10}h`
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function agentLabel(agent: Agent | undefined, id: string): string {
|
|
73
|
+
return agent ? `${agent.name} (${agent.model || agent.provider})` : id
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function StatTile({ label, value, hint, tone = 'default' }: {
|
|
77
|
+
label: string
|
|
78
|
+
value: string
|
|
79
|
+
hint: string
|
|
80
|
+
tone?: 'default' | 'good' | 'warn' | 'danger'
|
|
81
|
+
}) {
|
|
82
|
+
const toneClass = {
|
|
83
|
+
default: 'text-text',
|
|
84
|
+
good: 'text-emerald-300',
|
|
85
|
+
warn: 'text-amber-300',
|
|
86
|
+
danger: 'text-rose-300',
|
|
87
|
+
}[tone]
|
|
88
|
+
return (
|
|
89
|
+
<div className="rounded-[14px] border border-white/[0.06] bg-white/[0.03] px-4 py-3">
|
|
90
|
+
<div className="text-[10px] font-700 uppercase tracking-[0.12em] text-text-3/55">{label}</div>
|
|
91
|
+
<div className={cn('mt-2 font-display text-[26px] font-700 tracking-[-0.03em]', toneClass)}>{value}</div>
|
|
92
|
+
<div className="mt-1 text-[12px] leading-relaxed text-text-3/68">{hint}</div>
|
|
93
|
+
</div>
|
|
94
|
+
)
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function EmptyState({ title, description }: { title: string; description: string }) {
|
|
98
|
+
return (
|
|
99
|
+
<div className="rounded-[14px] border border-dashed border-white/[0.08] bg-white/[0.02] px-4 py-6">
|
|
100
|
+
<div className="text-[13px] font-700 text-text">{title}</div>
|
|
101
|
+
<p className="mt-1 text-[12px] leading-relaxed text-text-3/65">{description}</p>
|
|
102
|
+
</div>
|
|
103
|
+
)
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export function QualityWorkspace() {
|
|
107
|
+
const agents = useAppStore((s) => s.agents)
|
|
108
|
+
const agentOptions = useMemo(
|
|
109
|
+
() => Object.values(agents).filter((agent) => !agent.trashedAt),
|
|
110
|
+
[agents],
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
const [activeTab, setActiveTab] = useState<QualityTab>('overview')
|
|
114
|
+
const [runs, setRuns] = useState<SessionRunRecord[]>([])
|
|
115
|
+
const [evalRuns, setEvalRuns] = useState<EvalRun[]>([])
|
|
116
|
+
const [approvals, setApprovals] = useState<ApprovalRequest[]>([])
|
|
117
|
+
const [suites, setSuites] = useState<EvalSuiteSummary[]>([])
|
|
118
|
+
const [scenarios, setScenarios] = useState<EvalScenarioSummary[]>([])
|
|
119
|
+
const [loading, setLoading] = useState(true)
|
|
120
|
+
const [refreshing, setRefreshing] = useState(false)
|
|
121
|
+
const [loadError, setLoadError] = useState<string | null>(null)
|
|
122
|
+
const [selectedAgentId, setSelectedAgentId] = useState('')
|
|
123
|
+
const [selectedSuite, setSelectedSuite] = useState('core')
|
|
124
|
+
const [selectedScenarioId, setSelectedScenarioId] = useState('')
|
|
125
|
+
const [evalBusy, setEvalBusy] = useState<string | null>(null)
|
|
126
|
+
const [approvalBusy, setApprovalBusy] = useState<string | null>(null)
|
|
127
|
+
|
|
128
|
+
const loadQualityData = useCallback(async (opts: { silent?: boolean } = {}) => {
|
|
129
|
+
if (opts.silent) setRefreshing(true)
|
|
130
|
+
else setLoading(true)
|
|
131
|
+
setLoadError(null)
|
|
132
|
+
try {
|
|
133
|
+
const [nextRuns, nextEvalRuns, nextApprovals, nextSuites, nextScenarios] = await Promise.all([
|
|
134
|
+
api<SessionRunRecord[]>('GET', '/runs?limit=200'),
|
|
135
|
+
api<EvalRun[]>('GET', '/eval/run?limit=100'),
|
|
136
|
+
api<ApprovalRequest[]>('GET', '/approvals'),
|
|
137
|
+
api<EvalSuiteSummary[]>('GET', '/eval/suites'),
|
|
138
|
+
api<EvalScenarioSummary[]>('GET', '/eval/scenarios'),
|
|
139
|
+
])
|
|
140
|
+
setRuns(Array.isArray(nextRuns) ? nextRuns : [])
|
|
141
|
+
setEvalRuns(Array.isArray(nextEvalRuns) ? nextEvalRuns : [])
|
|
142
|
+
setApprovals(Array.isArray(nextApprovals) ? nextApprovals : [])
|
|
143
|
+
setSuites(Array.isArray(nextSuites) ? nextSuites : [])
|
|
144
|
+
setScenarios(Array.isArray(nextScenarios) ? nextScenarios : [])
|
|
145
|
+
} catch (err) {
|
|
146
|
+
const message = err instanceof Error ? err.message : 'Unable to load quality data'
|
|
147
|
+
setLoadError(message)
|
|
148
|
+
if (!opts.silent) toast.error(message)
|
|
149
|
+
} finally {
|
|
150
|
+
setLoading(false)
|
|
151
|
+
setRefreshing(false)
|
|
152
|
+
}
|
|
153
|
+
}, [])
|
|
154
|
+
|
|
155
|
+
useEffect(() => {
|
|
156
|
+
void loadQualityData()
|
|
157
|
+
}, [loadQualityData])
|
|
158
|
+
|
|
159
|
+
useWs('runs', () => { void loadQualityData({ silent: true }) }, 5000)
|
|
160
|
+
|
|
161
|
+
useEffect(() => {
|
|
162
|
+
if (!selectedAgentId && agentOptions[0]) setSelectedAgentId(agentOptions[0].id)
|
|
163
|
+
}, [agentOptions, selectedAgentId])
|
|
164
|
+
|
|
165
|
+
useEffect(() => {
|
|
166
|
+
if (!selectedScenarioId && scenarios[0]) setSelectedScenarioId(scenarios[0].id)
|
|
167
|
+
}, [scenarios, selectedScenarioId])
|
|
168
|
+
|
|
169
|
+
useEffect(() => {
|
|
170
|
+
if (!suites.some((suite) => suite.name === selectedSuite) && suites[0]) {
|
|
171
|
+
setSelectedSuite(suites[0].name)
|
|
172
|
+
}
|
|
173
|
+
}, [selectedSuite, suites])
|
|
174
|
+
|
|
175
|
+
const scenarioById = useMemo(() => {
|
|
176
|
+
return new Map(scenarios.map((scenario) => [scenario.id, scenario]))
|
|
177
|
+
}, [scenarios])
|
|
178
|
+
|
|
179
|
+
const runHealth = useMemo(() => summarizeRunHealth(runs), [runs])
|
|
180
|
+
const evalSummary = useMemo(() => summarizeEvalRuns(evalRuns), [evalRuns])
|
|
181
|
+
const approvalGroups = useMemo(() => groupApprovalsByCategory(approvals), [approvals])
|
|
182
|
+
const overview = useMemo(() => buildQualityOverviewSummary({ runs, evalRuns, approvals }), [approvals, evalRuns, runs])
|
|
183
|
+
const selectedSuiteScenarios = useMemo(
|
|
184
|
+
() => scenarios.filter((scenario) => scenario.suite === selectedSuite),
|
|
185
|
+
[scenarios, selectedSuite],
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
const runScenario = useCallback(async () => {
|
|
189
|
+
if (!selectedAgentId || !selectedScenarioId) {
|
|
190
|
+
toast.error('Choose an agent and scenario first')
|
|
191
|
+
return
|
|
192
|
+
}
|
|
193
|
+
setEvalBusy(`scenario:${selectedScenarioId}`)
|
|
194
|
+
try {
|
|
195
|
+
await api<EvalRun>('POST', '/eval/run', { agentId: selectedAgentId, scenarioId: selectedScenarioId }, { timeoutMs: 180_000 })
|
|
196
|
+
toast.success('Eval scenario completed')
|
|
197
|
+
await loadQualityData({ silent: true })
|
|
198
|
+
} catch (err) {
|
|
199
|
+
toast.error(err instanceof Error ? err.message : 'Eval scenario failed')
|
|
200
|
+
} finally {
|
|
201
|
+
setEvalBusy(null)
|
|
202
|
+
}
|
|
203
|
+
}, [loadQualityData, selectedAgentId, selectedScenarioId])
|
|
204
|
+
|
|
205
|
+
const runSuite = useCallback(async (suiteName: string) => {
|
|
206
|
+
if (!selectedAgentId) {
|
|
207
|
+
toast.error('Choose an agent first')
|
|
208
|
+
return
|
|
209
|
+
}
|
|
210
|
+
setEvalBusy(`suite:${suiteName}`)
|
|
211
|
+
try {
|
|
212
|
+
const result = await api<EvalSuiteResult>('POST', '/eval/suite', { agentId: selectedAgentId, suite: suiteName }, { timeoutMs: 300_000 })
|
|
213
|
+
toast.success(`Suite completed at ${Math.round(result.percentage)}%`)
|
|
214
|
+
await loadQualityData({ silent: true })
|
|
215
|
+
} catch (err) {
|
|
216
|
+
toast.error(err instanceof Error ? err.message : 'Eval suite failed')
|
|
217
|
+
} finally {
|
|
218
|
+
setEvalBusy(null)
|
|
219
|
+
}
|
|
220
|
+
}, [loadQualityData, selectedAgentId])
|
|
221
|
+
|
|
222
|
+
const actOnApproval = useCallback(async (approval: ApprovalRequest, approved: boolean) => {
|
|
223
|
+
setApprovalBusy(approval.id)
|
|
224
|
+
try {
|
|
225
|
+
await api('POST', '/approvals', { id: approval.id, approved })
|
|
226
|
+
toast.success(approved ? 'Approval granted' : 'Approval denied')
|
|
227
|
+
await loadQualityData({ silent: true })
|
|
228
|
+
} catch (err) {
|
|
229
|
+
toast.error(err instanceof Error ? err.message : 'Unable to update approval')
|
|
230
|
+
} finally {
|
|
231
|
+
setApprovalBusy(null)
|
|
232
|
+
}
|
|
233
|
+
}, [loadQualityData])
|
|
234
|
+
|
|
235
|
+
if (loading) {
|
|
236
|
+
return (
|
|
237
|
+
<MainContent>
|
|
238
|
+
<PageLoader label="Loading quality center..." />
|
|
239
|
+
</MainContent>
|
|
240
|
+
)
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return (
|
|
244
|
+
<MainContent>
|
|
245
|
+
<div className="flex-1 overflow-y-auto">
|
|
246
|
+
<div className="mx-auto flex w-full max-w-[1280px] flex-col gap-6 px-4 py-5 sm:px-6 lg:px-8">
|
|
247
|
+
<div className="flex flex-col gap-4 lg:flex-row lg:items-start lg:justify-between">
|
|
248
|
+
<div>
|
|
249
|
+
<div className="text-[10px] font-700 uppercase tracking-[0.16em] text-accent-bright/75">Operator Quality Center</div>
|
|
250
|
+
<h1 className="mt-2 font-display text-[28px] font-700 tracking-[-0.03em] text-text">Quality</h1>
|
|
251
|
+
<p className="mt-2 max-w-[720px] text-[13px] leading-relaxed text-text-3/70">
|
|
252
|
+
Evals, approvals, run evidence, and release readiness in one operator workspace.
|
|
253
|
+
</p>
|
|
254
|
+
</div>
|
|
255
|
+
<div className="flex flex-wrap items-center gap-2">
|
|
256
|
+
{refreshing && <span className="text-[11px] text-text-3/60">Refreshing...</span>}
|
|
257
|
+
<button
|
|
258
|
+
type="button"
|
|
259
|
+
onClick={() => void loadQualityData({ silent: true })}
|
|
260
|
+
className="inline-flex items-center gap-2 rounded-[10px] border border-white/[0.08] bg-white/[0.04] px-3 py-2 text-[12px] font-700 text-text-2 transition-colors hover:bg-white/[0.08]"
|
|
261
|
+
>
|
|
262
|
+
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round">
|
|
263
|
+
<path d="M21 12a9 9 0 0 1-15.5 6.2" /><path d="M3 12A9 9 0 0 1 18.5 5.8" /><path d="M3 19v-5h5" /><path d="M21 5v5h-5" />
|
|
264
|
+
</svg>
|
|
265
|
+
Refresh
|
|
266
|
+
</button>
|
|
267
|
+
</div>
|
|
268
|
+
</div>
|
|
269
|
+
|
|
270
|
+
{loadError && (
|
|
271
|
+
<div className="rounded-[12px] border border-rose-500/25 bg-rose-500/[0.06] px-4 py-3 text-[12px] text-rose-200">
|
|
272
|
+
{loadError}
|
|
273
|
+
</div>
|
|
274
|
+
)}
|
|
275
|
+
|
|
276
|
+
<div className="flex gap-1 overflow-x-auto rounded-[12px] border border-white/[0.06] bg-white/[0.025] p-1">
|
|
277
|
+
{TABS.map((tab) => (
|
|
278
|
+
<button
|
|
279
|
+
key={tab.id}
|
|
280
|
+
type="button"
|
|
281
|
+
onClick={() => setActiveTab(tab.id)}
|
|
282
|
+
className={cn(
|
|
283
|
+
'min-w-fit rounded-[9px] px-3 py-2 text-[12px] font-700 transition-colors',
|
|
284
|
+
activeTab === tab.id
|
|
285
|
+
? 'bg-white/[0.1] text-text'
|
|
286
|
+
: 'text-text-3 hover:bg-white/[0.05] hover:text-text-2',
|
|
287
|
+
)}
|
|
288
|
+
>
|
|
289
|
+
{tab.label}
|
|
290
|
+
</button>
|
|
291
|
+
))}
|
|
292
|
+
</div>
|
|
293
|
+
|
|
294
|
+
{activeTab === 'overview' && (
|
|
295
|
+
<div className="flex flex-col gap-6">
|
|
296
|
+
<div className="grid gap-3 md:grid-cols-2 xl:grid-cols-4">
|
|
297
|
+
<StatTile
|
|
298
|
+
label="Needs Attention"
|
|
299
|
+
value={String(overview.needsAttention)}
|
|
300
|
+
hint="Failed runs, failed evals, and pending approvals."
|
|
301
|
+
tone={overview.needsAttention > 0 ? 'danger' : 'good'}
|
|
302
|
+
/>
|
|
303
|
+
<StatTile
|
|
304
|
+
label="Active Runs"
|
|
305
|
+
value={String(overview.activeRuns)}
|
|
306
|
+
hint={`${runHealth.byStatus.running} running, ${runHealth.byStatus.queued} queued.`}
|
|
307
|
+
tone={overview.activeRuns > 0 ? 'warn' : 'default'}
|
|
308
|
+
/>
|
|
309
|
+
<StatTile
|
|
310
|
+
label="Pending Approvals"
|
|
311
|
+
value={String(overview.pendingApprovals)}
|
|
312
|
+
hint={`${approvalGroups.categories.length} approval group${approvalGroups.categories.length === 1 ? '' : 's'}.`}
|
|
313
|
+
tone={overview.pendingApprovals > 0 ? 'warn' : 'good'}
|
|
314
|
+
/>
|
|
315
|
+
<StatTile
|
|
316
|
+
label="Eval Average"
|
|
317
|
+
value={formatPercent(overview.evalAveragePercent)}
|
|
318
|
+
hint={`${evalSummary.completedRuns} completed eval run${evalSummary.completedRuns === 1 ? '' : 's'}.`}
|
|
319
|
+
tone={overview.evalAveragePercent == null || overview.evalAveragePercent >= 80 ? 'good' : 'warn'}
|
|
320
|
+
/>
|
|
321
|
+
</div>
|
|
322
|
+
|
|
323
|
+
<div className="grid gap-4 lg:grid-cols-[1.2fr_0.8fr]">
|
|
324
|
+
<section className="rounded-[16px] border border-white/[0.06] bg-white/[0.025] p-4">
|
|
325
|
+
<div className="mb-4 flex flex-wrap items-center justify-between gap-2">
|
|
326
|
+
<div>
|
|
327
|
+
<h2 className="font-display text-[15px] font-700 text-text">Needs Attention</h2>
|
|
328
|
+
<p className="mt-1 text-[12px] text-text-3/65">Shortest path to unblock operator review.</p>
|
|
329
|
+
</div>
|
|
330
|
+
<div className="flex flex-wrap gap-2">
|
|
331
|
+
<button onClick={() => setActiveTab('evals')} className="rounded-[9px] border border-white/[0.08] px-2.5 py-1.5 text-[11px] font-700 text-text-2 hover:bg-white/[0.05]">Eval Lab</button>
|
|
332
|
+
<button onClick={() => setActiveTab('approvals')} className="rounded-[9px] border border-white/[0.08] px-2.5 py-1.5 text-[11px] font-700 text-text-2 hover:bg-white/[0.05]">Approvals</button>
|
|
333
|
+
<button onClick={() => setActiveTab('runs')} className="rounded-[9px] border border-white/[0.08] px-2.5 py-1.5 text-[11px] font-700 text-text-2 hover:bg-white/[0.05]">Runs</button>
|
|
334
|
+
</div>
|
|
335
|
+
</div>
|
|
336
|
+
{runHealth.recentFailures.length === 0 && approvalGroups.totalPending === 0 && evalSummary.failedRuns === 0 ? (
|
|
337
|
+
<EmptyState title="No quality blockers" description="Recent runs, evals, and approvals do not need immediate operator action." />
|
|
338
|
+
) : (
|
|
339
|
+
<div className="grid gap-2 md:grid-cols-2">
|
|
340
|
+
{runHealth.recentFailures.slice(0, 4).map((run) => (
|
|
341
|
+
<button
|
|
342
|
+
key={run.id}
|
|
343
|
+
onClick={() => setActiveTab('runs')}
|
|
344
|
+
className="rounded-[12px] border border-rose-500/20 bg-rose-500/[0.04] px-3 py-3 text-left transition-colors hover:bg-rose-500/[0.07]"
|
|
345
|
+
>
|
|
346
|
+
<div className="text-[11px] font-700 uppercase tracking-[0.1em] text-rose-300">Failed Run</div>
|
|
347
|
+
<div className="mt-1 truncate text-[13px] font-600 text-text">{run.messagePreview || run.id}</div>
|
|
348
|
+
<div className="mt-1 text-[11px] text-text-3/60">{run.source} - {formatTimestamp(run.endedAt ?? run.queuedAt)}</div>
|
|
349
|
+
</button>
|
|
350
|
+
))}
|
|
351
|
+
{approvalGroups.categories.slice(0, 4).map((group) => (
|
|
352
|
+
<button
|
|
353
|
+
key={group.category}
|
|
354
|
+
onClick={() => setActiveTab('approvals')}
|
|
355
|
+
className="rounded-[12px] border border-amber-500/20 bg-amber-500/[0.04] px-3 py-3 text-left transition-colors hover:bg-amber-500/[0.07]"
|
|
356
|
+
>
|
|
357
|
+
<div className="text-[11px] font-700 uppercase tracking-[0.1em] text-amber-300">Approval</div>
|
|
358
|
+
<div className="mt-1 text-[13px] font-600 text-text">{group.count} pending {group.category.replaceAll('_', ' ')}</div>
|
|
359
|
+
<div className="mt-1 text-[11px] text-text-3/60">{group.approvals[0]?.title || 'Review request'}</div>
|
|
360
|
+
</button>
|
|
361
|
+
))}
|
|
362
|
+
</div>
|
|
363
|
+
)}
|
|
364
|
+
</section>
|
|
365
|
+
|
|
366
|
+
<section className="rounded-[16px] border border-white/[0.06] bg-white/[0.025] p-4">
|
|
367
|
+
<h2 className="font-display text-[15px] font-700 text-text">Latest Eval Scores</h2>
|
|
368
|
+
<p className="mt-1 text-[12px] text-text-3/65">Most recent scored evidence across agents.</p>
|
|
369
|
+
<div className="mt-4 flex flex-col gap-2">
|
|
370
|
+
{evalRuns.slice(0, 5).length === 0 ? (
|
|
371
|
+
<EmptyState title="No eval history" description="Run a scenario or suite to start building score history." />
|
|
372
|
+
) : (
|
|
373
|
+
evalRuns.slice(0, 5).map((run) => {
|
|
374
|
+
const percent = scorePercent(run.score, run.maxScore)
|
|
375
|
+
return (
|
|
376
|
+
<div key={run.id} className="rounded-[12px] border border-white/[0.06] bg-white/[0.025] px-3 py-3">
|
|
377
|
+
<div className="flex items-center justify-between gap-3">
|
|
378
|
+
<div className="min-w-0">
|
|
379
|
+
<div className="truncate text-[13px] font-700 text-text">{scenarioById.get(run.scenarioId)?.name || run.scenarioId}</div>
|
|
380
|
+
<div className="mt-1 text-[11px] text-text-3/60">{agentLabel(agents[run.agentId], run.agentId)}</div>
|
|
381
|
+
</div>
|
|
382
|
+
<div className={cn('shrink-0 text-[16px] font-display font-700', percent == null || percent >= 80 ? 'text-emerald-300' : 'text-amber-300')}>
|
|
383
|
+
{formatPercent(percent)}
|
|
384
|
+
</div>
|
|
385
|
+
</div>
|
|
386
|
+
</div>
|
|
387
|
+
)
|
|
388
|
+
})
|
|
389
|
+
)}
|
|
390
|
+
</div>
|
|
391
|
+
</section>
|
|
392
|
+
</div>
|
|
393
|
+
</div>
|
|
394
|
+
)}
|
|
395
|
+
|
|
396
|
+
{activeTab === 'evals' && (
|
|
397
|
+
<div className="grid gap-5 xl:grid-cols-[360px_1fr]">
|
|
398
|
+
<section className="rounded-[16px] border border-white/[0.06] bg-white/[0.025] p-4">
|
|
399
|
+
<h2 className="font-display text-[15px] font-700 text-text">Eval Lab</h2>
|
|
400
|
+
<p className="mt-1 text-[12px] leading-relaxed text-text-3/65">Run focused scenarios or complete suites against one agent.</p>
|
|
401
|
+
<div className="mt-4 flex flex-col gap-3">
|
|
402
|
+
<label className="flex flex-col gap-1.5">
|
|
403
|
+
<span className="text-[10px] font-700 uppercase tracking-[0.12em] text-text-3/55">Agent</span>
|
|
404
|
+
<select
|
|
405
|
+
value={selectedAgentId}
|
|
406
|
+
onChange={(event) => setSelectedAgentId(event.target.value)}
|
|
407
|
+
className="rounded-[10px] border border-white/[0.08] bg-white/[0.04] px-3 py-2 text-[12px] text-text outline-none"
|
|
408
|
+
>
|
|
409
|
+
{agentOptions.length === 0 && <option value="">No agents available</option>}
|
|
410
|
+
{agentOptions.map((agent) => (
|
|
411
|
+
<option key={agent.id} value={agent.id}>{agent.name}</option>
|
|
412
|
+
))}
|
|
413
|
+
</select>
|
|
414
|
+
</label>
|
|
415
|
+
<label className="flex flex-col gap-1.5">
|
|
416
|
+
<span className="text-[10px] font-700 uppercase tracking-[0.12em] text-text-3/55">Scenario</span>
|
|
417
|
+
<select
|
|
418
|
+
value={selectedScenarioId}
|
|
419
|
+
onChange={(event) => setSelectedScenarioId(event.target.value)}
|
|
420
|
+
className="rounded-[10px] border border-white/[0.08] bg-white/[0.04] px-3 py-2 text-[12px] text-text outline-none"
|
|
421
|
+
>
|
|
422
|
+
{scenarios.map((scenario) => (
|
|
423
|
+
<option key={scenario.id} value={scenario.id}>{scenario.name}</option>
|
|
424
|
+
))}
|
|
425
|
+
</select>
|
|
426
|
+
</label>
|
|
427
|
+
{selectedScenarioId && scenarioById.get(selectedScenarioId) && (
|
|
428
|
+
<div className="rounded-[12px] border border-white/[0.06] bg-white/[0.025] px-3 py-3">
|
|
429
|
+
<div className="text-[13px] font-700 text-text">{scenarioById.get(selectedScenarioId)!.name}</div>
|
|
430
|
+
<p className="mt-1 text-[12px] leading-relaxed text-text-3/65">{scenarioById.get(selectedScenarioId)!.description}</p>
|
|
431
|
+
<div className="mt-2 flex flex-wrap gap-1.5">
|
|
432
|
+
<span className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">{scenarioById.get(selectedScenarioId)!.category}</span>
|
|
433
|
+
<span className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">{scenarioById.get(selectedScenarioId)!.criteriaCount} criteria</span>
|
|
434
|
+
<span className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">{formatDuration(scenarioById.get(selectedScenarioId)!.timeoutMs)}</span>
|
|
435
|
+
</div>
|
|
436
|
+
</div>
|
|
437
|
+
)}
|
|
438
|
+
<button
|
|
439
|
+
type="button"
|
|
440
|
+
disabled={!selectedAgentId || !selectedScenarioId || !!evalBusy}
|
|
441
|
+
onClick={() => void runScenario()}
|
|
442
|
+
className="inline-flex items-center justify-center gap-2 rounded-[10px] bg-accent-bright px-3 py-2.5 text-[12px] font-800 text-black transition-opacity hover:opacity-90 disabled:cursor-not-allowed disabled:opacity-40"
|
|
443
|
+
>
|
|
444
|
+
<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor">
|
|
445
|
+
<path d="M8 5v14l11-7z" />
|
|
446
|
+
</svg>
|
|
447
|
+
{evalBusy?.startsWith('scenario:') ? 'Running Scenario' : 'Run Scenario'}
|
|
448
|
+
</button>
|
|
449
|
+
</div>
|
|
450
|
+
</section>
|
|
451
|
+
|
|
452
|
+
<div className="flex flex-col gap-5">
|
|
453
|
+
<section className="rounded-[16px] border border-white/[0.06] bg-white/[0.025] p-4">
|
|
454
|
+
<div className="flex flex-wrap items-center justify-between gap-3">
|
|
455
|
+
<div>
|
|
456
|
+
<h2 className="font-display text-[15px] font-700 text-text">Suites</h2>
|
|
457
|
+
<p className="mt-1 text-[12px] text-text-3/65">Release-oriented eval suites available through the existing eval API.</p>
|
|
458
|
+
</div>
|
|
459
|
+
<select
|
|
460
|
+
value={selectedSuite}
|
|
461
|
+
onChange={(event) => setSelectedSuite(event.target.value)}
|
|
462
|
+
className="rounded-[10px] border border-white/[0.08] bg-white/[0.04] px-3 py-2 text-[12px] text-text outline-none"
|
|
463
|
+
>
|
|
464
|
+
{suites.map((suite) => (
|
|
465
|
+
<option key={suite.name} value={suite.name}>{suite.name}</option>
|
|
466
|
+
))}
|
|
467
|
+
</select>
|
|
468
|
+
</div>
|
|
469
|
+
<div className="mt-4 grid gap-3 md:grid-cols-2 xl:grid-cols-3">
|
|
470
|
+
{suites.map((suite) => (
|
|
471
|
+
<div key={suite.name} className="rounded-[14px] border border-white/[0.06] bg-white/[0.025] p-3">
|
|
472
|
+
<div className="flex items-start justify-between gap-3">
|
|
473
|
+
<div>
|
|
474
|
+
<div className="text-[13px] font-800 text-text">{suite.name}</div>
|
|
475
|
+
<div className="mt-1 text-[11px] text-text-3/65">{suite.count} scenarios - {suite.maxScore} max score</div>
|
|
476
|
+
</div>
|
|
477
|
+
<button
|
|
478
|
+
type="button"
|
|
479
|
+
disabled={!selectedAgentId || !!evalBusy}
|
|
480
|
+
onClick={() => void runSuite(suite.name)}
|
|
481
|
+
className="rounded-[8px] border border-white/[0.08] px-2 py-1 text-[11px] font-700 text-text-2 transition-colors hover:bg-white/[0.06] disabled:cursor-not-allowed disabled:opacity-40"
|
|
482
|
+
>
|
|
483
|
+
{evalBusy === `suite:${suite.name}` ? 'Running' : 'Run'}
|
|
484
|
+
</button>
|
|
485
|
+
</div>
|
|
486
|
+
<div className="mt-3 flex flex-wrap gap-1.5">
|
|
487
|
+
{suite.categories.map((category) => (
|
|
488
|
+
<span key={category} className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">{category}</span>
|
|
489
|
+
))}
|
|
490
|
+
</div>
|
|
491
|
+
</div>
|
|
492
|
+
))}
|
|
493
|
+
</div>
|
|
494
|
+
<div className="mt-4 text-[11px] text-text-3/60">
|
|
495
|
+
{selectedSuiteScenarios.length} scenario{selectedSuiteScenarios.length === 1 ? '' : 's'} selected in {selectedSuite}.
|
|
496
|
+
</div>
|
|
497
|
+
</section>
|
|
498
|
+
|
|
499
|
+
<section className="rounded-[16px] border border-white/[0.06] bg-white/[0.025] p-4">
|
|
500
|
+
<h2 className="font-display text-[15px] font-700 text-text">Score History</h2>
|
|
501
|
+
<div className="mt-4 grid gap-3 lg:grid-cols-2">
|
|
502
|
+
{evalRuns.length === 0 ? (
|
|
503
|
+
<EmptyState title="No eval results yet" description="Run a scenario or suite to see criteria scores and evidence." />
|
|
504
|
+
) : (
|
|
505
|
+
evalRuns.slice(0, 12).map((run) => {
|
|
506
|
+
const percent = scorePercent(run.score, run.maxScore)
|
|
507
|
+
const scenario = scenarioById.get(run.scenarioId)
|
|
508
|
+
return (
|
|
509
|
+
<div key={run.id} className="rounded-[14px] border border-white/[0.06] bg-white/[0.025] p-3">
|
|
510
|
+
<div className="flex items-start justify-between gap-3">
|
|
511
|
+
<div className="min-w-0">
|
|
512
|
+
<div className="truncate text-[13px] font-800 text-text">{scenario?.name || run.scenarioId}</div>
|
|
513
|
+
<div className="mt-1 text-[11px] text-text-3/60">{agentLabel(agents[run.agentId], run.agentId)}</div>
|
|
514
|
+
<div className="mt-1 text-[10px] text-text-3/50">{formatTimestamp(run.endedAt ?? run.startedAt)}</div>
|
|
515
|
+
</div>
|
|
516
|
+
<div className={cn('rounded-[8px] px-2 py-1 text-[13px] font-800', percent == null || percent >= 80 ? 'bg-emerald-500/10 text-emerald-300' : 'bg-amber-500/10 text-amber-300')}>
|
|
517
|
+
{formatPercent(percent)}
|
|
518
|
+
</div>
|
|
519
|
+
</div>
|
|
520
|
+
<div className="mt-3 flex flex-col gap-2">
|
|
521
|
+
{run.details.slice(0, 3).map((detail) => (
|
|
522
|
+
<div key={detail.criterion} className="rounded-[10px] bg-white/[0.025] px-3 py-2">
|
|
523
|
+
<div className="flex items-center justify-between gap-3">
|
|
524
|
+
<div className="text-[11px] font-700 text-text-2">{detail.criterion}</div>
|
|
525
|
+
<div className="text-[10px] text-text-3/70">{detail.score}/{detail.maxScore}</div>
|
|
526
|
+
</div>
|
|
527
|
+
{detail.evidence && <p className="mt-1 line-clamp-2 text-[11px] leading-relaxed text-text-3/65">{detail.evidence}</p>}
|
|
528
|
+
</div>
|
|
529
|
+
))}
|
|
530
|
+
{run.details.length > 3 && (
|
|
531
|
+
<div className="text-[10px] text-text-3/50">+{run.details.length - 3} more criteria</div>
|
|
532
|
+
)}
|
|
533
|
+
{run.error && <div className="rounded-[10px] bg-rose-500/[0.06] px-3 py-2 text-[11px] text-rose-200">{run.error}</div>}
|
|
534
|
+
</div>
|
|
535
|
+
</div>
|
|
536
|
+
)
|
|
537
|
+
})
|
|
538
|
+
)}
|
|
539
|
+
</div>
|
|
540
|
+
</section>
|
|
541
|
+
</div>
|
|
542
|
+
</div>
|
|
543
|
+
)}
|
|
544
|
+
|
|
545
|
+
{activeTab === 'approvals' && (
|
|
546
|
+
<section className="rounded-[16px] border border-white/[0.06] bg-white/[0.025] p-4">
|
|
547
|
+
<div className="flex flex-wrap items-center justify-between gap-3">
|
|
548
|
+
<div>
|
|
549
|
+
<h2 className="font-display text-[15px] font-700 text-text">Approval Desk</h2>
|
|
550
|
+
<p className="mt-1 text-[12px] text-text-3/65">Pending human-loop, tool, connector, skill, agent, and budget requests.</p>
|
|
551
|
+
</div>
|
|
552
|
+
<div className="rounded-full border border-white/[0.08] bg-white/[0.04] px-3 py-1 text-[11px] font-700 text-text-3">
|
|
553
|
+
{approvalGroups.totalPending} pending
|
|
554
|
+
</div>
|
|
555
|
+
</div>
|
|
556
|
+
<div className="mt-4 flex flex-col gap-4">
|
|
557
|
+
{approvalGroups.totalPending === 0 ? (
|
|
558
|
+
<EmptyState title="No pending approvals" description="The approval queue is clear." />
|
|
559
|
+
) : (
|
|
560
|
+
approvalGroups.categories.map((group) => (
|
|
561
|
+
<div key={group.category} className="rounded-[14px] border border-white/[0.06] bg-white/[0.02] p-3">
|
|
562
|
+
<div className="mb-3 flex items-center justify-between gap-3">
|
|
563
|
+
<div className="text-[12px] font-800 uppercase tracking-[0.1em] text-text-2">{group.category.replaceAll('_', ' ')}</div>
|
|
564
|
+
<div className="text-[11px] font-700 text-text-3/65">{group.count} request{group.count === 1 ? '' : 's'}</div>
|
|
565
|
+
</div>
|
|
566
|
+
<div className="grid gap-2 lg:grid-cols-2">
|
|
567
|
+
{group.approvals.map((approval) => (
|
|
568
|
+
<div key={approval.id} className="rounded-[12px] border border-white/[0.06] bg-surface px-3 py-3">
|
|
569
|
+
<div className="text-[13px] font-800 text-text">{approval.title}</div>
|
|
570
|
+
{approval.description && <p className="mt-1 text-[12px] leading-relaxed text-text-3/65">{approval.description}</p>}
|
|
571
|
+
<div className="mt-2 flex flex-wrap gap-x-3 gap-y-1 text-[10px] text-text-3/55">
|
|
572
|
+
<span>{formatTimestamp(approval.createdAt)}</span>
|
|
573
|
+
{approval.agentId && <span>agent {agents[approval.agentId]?.name || approval.agentId}</span>}
|
|
574
|
+
{approval.sessionId && <span>session {approval.sessionId.slice(0, 8)}</span>}
|
|
575
|
+
</div>
|
|
576
|
+
<div className="mt-3 flex gap-2">
|
|
577
|
+
<button
|
|
578
|
+
type="button"
|
|
579
|
+
disabled={approvalBusy === approval.id}
|
|
580
|
+
onClick={() => void actOnApproval(approval, true)}
|
|
581
|
+
className="inline-flex items-center gap-1.5 rounded-[9px] bg-emerald-400 px-3 py-1.5 text-[11px] font-800 text-black transition-opacity hover:opacity-90 disabled:opacity-40"
|
|
582
|
+
>
|
|
583
|
+
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="3" strokeLinecap="round">
|
|
584
|
+
<path d="M20 6L9 17l-5-5" />
|
|
585
|
+
</svg>
|
|
586
|
+
Approve
|
|
587
|
+
</button>
|
|
588
|
+
<button
|
|
589
|
+
type="button"
|
|
590
|
+
disabled={approvalBusy === approval.id}
|
|
591
|
+
onClick={() => void actOnApproval(approval, false)}
|
|
592
|
+
className="inline-flex items-center gap-1.5 rounded-[9px] border border-rose-400/25 bg-rose-500/[0.06] px-3 py-1.5 text-[11px] font-800 text-rose-200 transition-colors hover:bg-rose-500/[0.1] disabled:opacity-40"
|
|
593
|
+
>
|
|
594
|
+
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="3" strokeLinecap="round">
|
|
595
|
+
<path d="M18 6L6 18" /><path d="M6 6l12 12" />
|
|
596
|
+
</svg>
|
|
597
|
+
Deny
|
|
598
|
+
</button>
|
|
599
|
+
</div>
|
|
600
|
+
</div>
|
|
601
|
+
))}
|
|
602
|
+
</div>
|
|
603
|
+
</div>
|
|
604
|
+
))
|
|
605
|
+
)}
|
|
606
|
+
</div>
|
|
607
|
+
</section>
|
|
608
|
+
)}
|
|
609
|
+
|
|
610
|
+
{activeTab === 'runs' && (
|
|
611
|
+
<div className="flex min-h-[680px] flex-col rounded-[16px] border border-white/[0.06] bg-white/[0.025]">
|
|
612
|
+
<div className="border-b border-white/[0.06] px-5 py-4">
|
|
613
|
+
<div className="flex flex-col gap-3 lg:flex-row lg:items-center lg:justify-between">
|
|
614
|
+
<div>
|
|
615
|
+
<h2 className="font-display text-[15px] font-700 text-text">Run Review</h2>
|
|
616
|
+
<p className="mt-1 text-[12px] text-text-3/65">Filter recent runs and open replay evidence from the detail sheet.</p>
|
|
617
|
+
</div>
|
|
618
|
+
<div className="flex flex-wrap gap-2">
|
|
619
|
+
<span className="rounded-full bg-rose-500/[0.08] px-2.5 py-1 text-[11px] font-700 text-rose-300">{runHealth.byStatus.failed} failed</span>
|
|
620
|
+
<span className="rounded-full bg-blue-500/[0.08] px-2.5 py-1 text-[11px] font-700 text-blue-300">{runHealth.byStatus.running} running</span>
|
|
621
|
+
<span className="rounded-full bg-emerald-500/[0.08] px-2.5 py-1 text-[11px] font-700 text-emerald-300">{runHealth.byStatus.completed} completed</span>
|
|
622
|
+
</div>
|
|
623
|
+
</div>
|
|
624
|
+
</div>
|
|
625
|
+
<RunList />
|
|
626
|
+
</div>
|
|
627
|
+
)}
|
|
628
|
+
</div>
|
|
629
|
+
</div>
|
|
630
|
+
</MainContent>
|
|
631
|
+
)
|
|
632
|
+
}
|