@swarmclawai/swarmclaw 1.9.4 → 1.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -0
- package/package.json +2 -2
- package/src/app/api/eval/environments/route.ts +59 -0
- package/src/app/api/eval/run/route.ts +8 -1
- package/src/app/api/eval/suite/route.ts +6 -0
- package/src/app/api/portability/export/route.test.ts +225 -0
- package/src/app/api/portability/export/route.ts +18 -9
- package/src/app/api/portability/import/route.test.ts +232 -31
- package/src/app/api/portability/import/route.ts +2 -2
- package/src/cli/index.js +2 -0
- package/src/components/quality/quality-workspace.tsx +149 -5
- package/src/lib/server/eval/environment-plan.test.ts +221 -0
- package/src/lib/server/eval/environment-plan.ts +498 -0
- package/src/lib/server/eval/runner.ts +53 -3
- package/src/lib/server/eval/scenarios.ts +18 -0
- package/src/lib/server/eval/types.ts +55 -0
- package/src/lib/server/portability/export.ts +244 -38
- package/src/lib/server/portability/import.ts +148 -98
- package/src/lib/validation/schemas.ts +54 -1
|
@@ -1,39 +1,18 @@
|
|
|
1
1
|
import assert from 'node:assert/strict'
|
|
2
|
-
import fs from 'node:fs'
|
|
3
|
-
import os from 'node:os'
|
|
4
|
-
import path from 'node:path'
|
|
5
|
-
import { spawnSync } from 'node:child_process'
|
|
6
2
|
import test from 'node:test'
|
|
7
3
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
function runWithTempDataDir(script: string) {
|
|
11
|
-
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'swarmclaw-portability-import-'))
|
|
12
|
-
try {
|
|
13
|
-
const result = spawnSync(process.execPath, ['--import', 'tsx', '--input-type=module', '--eval', script], {
|
|
14
|
-
cwd: repoRoot,
|
|
15
|
-
env: {
|
|
16
|
-
...process.env,
|
|
17
|
-
DATA_DIR: path.join(tempDir, 'data'),
|
|
18
|
-
WORKSPACE_DIR: path.join(tempDir, 'workspace'),
|
|
19
|
-
},
|
|
20
|
-
encoding: 'utf-8',
|
|
21
|
-
})
|
|
22
|
-
assert.equal(result.status, 0, result.stderr || result.stdout || 'subprocess failed')
|
|
23
|
-
const lines = (result.stdout || '')
|
|
24
|
-
.trim()
|
|
25
|
-
.split('\n')
|
|
26
|
-
.map((line) => line.trim())
|
|
27
|
-
.filter(Boolean)
|
|
28
|
-
const jsonLine = [...lines].reverse().find((line) => line.startsWith('{'))
|
|
29
|
-
return JSON.parse(jsonLine || '{}')
|
|
30
|
-
} finally {
|
|
31
|
-
fs.rmSync(tempDir, { recursive: true, force: true })
|
|
32
|
-
}
|
|
33
|
-
}
|
|
4
|
+
import { runWithTempDataDir } from '@/lib/server/test-utils/run-with-temp-data-dir'
|
|
34
5
|
|
|
35
6
|
test('POST /api/portability/import validates manifest arrays before importing', () => {
|
|
36
|
-
const output = runWithTempDataDir
|
|
7
|
+
const output = runWithTempDataDir<{
|
|
8
|
+
invalidStatus: number
|
|
9
|
+
invalidError: string | null
|
|
10
|
+
invalidPaths: string[]
|
|
11
|
+
validStatus: number
|
|
12
|
+
validAgentsCreated: number | null
|
|
13
|
+
validSkillsCreated: number | null
|
|
14
|
+
validSchedulesCreated: number | null
|
|
15
|
+
}>(`
|
|
37
16
|
const routeMod = await import('./src/app/api/portability/import/route')
|
|
38
17
|
const route = routeMod.default || routeMod
|
|
39
18
|
|
|
@@ -78,3 +57,225 @@ test('POST /api/portability/import validates manifest arrays before importing',
|
|
|
78
57
|
assert.equal(output.validSkillsCreated, 0)
|
|
79
58
|
assert.equal(output.validSchedulesCreated, 0)
|
|
80
59
|
})
|
|
60
|
+
|
|
61
|
+
test('POST /api/portability/import preserves v2 bundle resources after validation', () => {
|
|
62
|
+
const output = runWithTempDataDir<{
|
|
63
|
+
status: number
|
|
64
|
+
created: Record<string, number>
|
|
65
|
+
projectId: string | null
|
|
66
|
+
agentId: string | null
|
|
67
|
+
agentProjectId: string | null
|
|
68
|
+
agentSkillIds: string[]
|
|
69
|
+
agentMcpServerIds: string[]
|
|
70
|
+
agentGoalId: string | null
|
|
71
|
+
skillId: string | null
|
|
72
|
+
skillProjectId: string | null
|
|
73
|
+
skillAgentIds: string[]
|
|
74
|
+
scheduleProjectId: string | null
|
|
75
|
+
scheduleParticipantIds: string[]
|
|
76
|
+
scheduleFacilitatorId: string | null
|
|
77
|
+
scheduleObserverIds: string[]
|
|
78
|
+
chatroomId: string | null
|
|
79
|
+
chatroomAgentIds: string[]
|
|
80
|
+
connectorAgentId: string | null
|
|
81
|
+
connectorChatroomId: string | null
|
|
82
|
+
connectorEnabled: boolean | null
|
|
83
|
+
mcpId: string | null
|
|
84
|
+
mcpEnvKeys: string[]
|
|
85
|
+
goalId: string | null
|
|
86
|
+
goalProjectId: string | null
|
|
87
|
+
goalAgentId: string | null
|
|
88
|
+
needsCredentials: string[]
|
|
89
|
+
}>(`
|
|
90
|
+
const routeMod = await import('./src/app/api/portability/import/route')
|
|
91
|
+
const storageMod = await import('./src/lib/server/storage')
|
|
92
|
+
const agentRepoMod = await import('./src/lib/server/agents/agent-repository')
|
|
93
|
+
const skillRepoMod = await import('./src/lib/server/skills/skill-repository')
|
|
94
|
+
const scheduleRepoMod = await import('./src/lib/server/schedules/schedule-repository')
|
|
95
|
+
const chatroomRepoMod = await import('./src/lib/server/chatrooms/chatroom-repository')
|
|
96
|
+
const connectorRepoMod = await import('./src/lib/server/connectors/connector-repository')
|
|
97
|
+
const route = routeMod.default || routeMod
|
|
98
|
+
const storage = storageMod.default || storageMod
|
|
99
|
+
const agentRepo = agentRepoMod.default || agentRepoMod
|
|
100
|
+
const skillRepo = skillRepoMod.default || skillRepoMod
|
|
101
|
+
const scheduleRepo = scheduleRepoMod.default || scheduleRepoMod
|
|
102
|
+
const chatroomRepo = chatroomRepoMod.default || chatroomRepoMod
|
|
103
|
+
const connectorRepo = connectorRepoMod.default || connectorRepoMod
|
|
104
|
+
const { loadProjects, loadMcpServers, loadGoals } = storage
|
|
105
|
+
const { loadAgents } = agentRepo
|
|
106
|
+
const { loadSkills } = skillRepo
|
|
107
|
+
const { loadSchedules } = scheduleRepo
|
|
108
|
+
const { loadChatrooms } = chatroomRepo
|
|
109
|
+
const { loadConnectors } = connectorRepo
|
|
110
|
+
|
|
111
|
+
const response = await route.POST(new Request('http://local/api/portability/import', {
|
|
112
|
+
method: 'POST',
|
|
113
|
+
headers: { 'content-type': 'application/json' },
|
|
114
|
+
body: JSON.stringify({
|
|
115
|
+
formatVersion: 2,
|
|
116
|
+
exportedAt: '2026-05-05T00:00:00.000Z',
|
|
117
|
+
scope: { kind: 'project', originalProjectId: 'project-1', projectName: 'Launch Room' },
|
|
118
|
+
projects: [{
|
|
119
|
+
originalId: 'project-1',
|
|
120
|
+
name: 'Launch Room',
|
|
121
|
+
description: 'Shipping workspace',
|
|
122
|
+
objective: 'Ship the fix',
|
|
123
|
+
}],
|
|
124
|
+
skills: [{
|
|
125
|
+
originalId: 'skill-1',
|
|
126
|
+
originalProjectId: 'project-1',
|
|
127
|
+
originalAgentIds: ['agent-1'],
|
|
128
|
+
name: 'Release Skill',
|
|
129
|
+
content: 'Ship carefully',
|
|
130
|
+
scope: 'agent',
|
|
131
|
+
}],
|
|
132
|
+
mcpServers: [{
|
|
133
|
+
originalId: 'mcp-1',
|
|
134
|
+
name: 'Local Tools',
|
|
135
|
+
transport: 'stdio',
|
|
136
|
+
command: 'node',
|
|
137
|
+
args: ['tool.js'],
|
|
138
|
+
envKeys: ['API_TOKEN'],
|
|
139
|
+
credentialsScrubbed: true,
|
|
140
|
+
}],
|
|
141
|
+
agents: [{
|
|
142
|
+
originalId: 'agent-1',
|
|
143
|
+
name: 'Release Lead',
|
|
144
|
+
description: 'Owns launch execution',
|
|
145
|
+
systemPrompt: 'Ship safely',
|
|
146
|
+
provider: 'openai',
|
|
147
|
+
model: 'gpt-4o-mini',
|
|
148
|
+
projectId: 'project-1',
|
|
149
|
+
skillIds: ['skill-1'],
|
|
150
|
+
mcpServerIds: ['mcp-1'],
|
|
151
|
+
goalId: 'goal-1',
|
|
152
|
+
}],
|
|
153
|
+
schedules: [{
|
|
154
|
+
originalId: 'schedule-1',
|
|
155
|
+
originalAgentId: 'agent-1',
|
|
156
|
+
name: 'Launch Check',
|
|
157
|
+
projectId: 'project-1',
|
|
158
|
+
taskPrompt: 'Check release readiness',
|
|
159
|
+
taskMode: 'protocol',
|
|
160
|
+
protocolTemplateId: 'template-1',
|
|
161
|
+
protocolParticipantAgentIds: ['agent-1'],
|
|
162
|
+
protocolFacilitatorAgentId: 'agent-1',
|
|
163
|
+
protocolObserverAgentIds: ['agent-1'],
|
|
164
|
+
protocolConfig: { phase: 'ship' },
|
|
165
|
+
scheduleType: 'interval',
|
|
166
|
+
intervalMs: 60000,
|
|
167
|
+
}],
|
|
168
|
+
chatrooms: [{
|
|
169
|
+
originalId: 'room-1',
|
|
170
|
+
originalAgentIds: ['agent-1'],
|
|
171
|
+
name: 'Launch Room Chat',
|
|
172
|
+
chatMode: 'parallel',
|
|
173
|
+
autoAddress: true,
|
|
174
|
+
routingRules: [{
|
|
175
|
+
type: 'keyword',
|
|
176
|
+
keywords: ['release'],
|
|
177
|
+
originalAgentId: 'agent-1',
|
|
178
|
+
priority: 1,
|
|
179
|
+
}],
|
|
180
|
+
}],
|
|
181
|
+
connectors: [{
|
|
182
|
+
originalId: 'connector-1',
|
|
183
|
+
originalAgentId: 'agent-1',
|
|
184
|
+
originalChatroomId: 'room-1',
|
|
185
|
+
name: 'Launch Slack',
|
|
186
|
+
platform: 'slack',
|
|
187
|
+
isEnabled: false,
|
|
188
|
+
config: { channel: 'launch' },
|
|
189
|
+
credentialsScrubbed: true,
|
|
190
|
+
}],
|
|
191
|
+
goals: [{
|
|
192
|
+
originalId: 'goal-1',
|
|
193
|
+
originalProjectId: 'project-1',
|
|
194
|
+
originalAgentId: 'agent-1',
|
|
195
|
+
title: 'Ship fix',
|
|
196
|
+
level: 'project',
|
|
197
|
+
objective: 'Release the portability fix',
|
|
198
|
+
status: 'active',
|
|
199
|
+
}],
|
|
200
|
+
extensions: [{ name: 'builtin-checks' }],
|
|
201
|
+
}),
|
|
202
|
+
}))
|
|
203
|
+
const payload = await response.json()
|
|
204
|
+
const project = Object.values(loadProjects()).find((item) => item.name === 'Launch Room')
|
|
205
|
+
const agent = Object.values(loadAgents()).find((item) => item.name === 'Release Lead')
|
|
206
|
+
const skill = Object.values(loadSkills()).find((item) => item.name === 'Release Skill')
|
|
207
|
+
const schedule = Object.values(loadSchedules()).find((item) => item.name === 'Launch Check')
|
|
208
|
+
const chatroom = Object.values(loadChatrooms()).find((item) => item.name === 'Launch Room Chat')
|
|
209
|
+
const connector = Object.values(loadConnectors()).find((item) => item.name === 'Launch Slack')
|
|
210
|
+
const mcp = Object.values(loadMcpServers()).find((item) => item.name === 'Local Tools')
|
|
211
|
+
const goal = Object.values(loadGoals()).find((item) => item.title === 'Ship fix')
|
|
212
|
+
|
|
213
|
+
console.log(JSON.stringify({
|
|
214
|
+
status: response.status,
|
|
215
|
+
created: {
|
|
216
|
+
agents: payload.agents.created,
|
|
217
|
+
skills: payload.skills.created,
|
|
218
|
+
schedules: payload.schedules.created,
|
|
219
|
+
connectors: payload.connectors.created,
|
|
220
|
+
chatrooms: payload.chatrooms.created,
|
|
221
|
+
mcpServers: payload.mcpServers.created,
|
|
222
|
+
projects: payload.projects.created,
|
|
223
|
+
goals: payload.goals.created,
|
|
224
|
+
},
|
|
225
|
+
projectId: project?.id || null,
|
|
226
|
+
agentId: agent?.id || null,
|
|
227
|
+
agentProjectId: agent?.projectId || null,
|
|
228
|
+
agentSkillIds: agent?.skillIds || [],
|
|
229
|
+
agentMcpServerIds: agent?.mcpServerIds || [],
|
|
230
|
+
agentGoalId: agent?.goalId || null,
|
|
231
|
+
skillId: skill?.id || null,
|
|
232
|
+
skillProjectId: skill?.projectId || null,
|
|
233
|
+
skillAgentIds: skill?.agentIds || [],
|
|
234
|
+
scheduleProjectId: schedule?.projectId || null,
|
|
235
|
+
scheduleParticipantIds: schedule?.protocolParticipantAgentIds || [],
|
|
236
|
+
scheduleFacilitatorId: schedule?.protocolFacilitatorAgentId || null,
|
|
237
|
+
scheduleObserverIds: schedule?.protocolObserverAgentIds || [],
|
|
238
|
+
chatroomId: chatroom?.id || null,
|
|
239
|
+
chatroomAgentIds: chatroom?.agentIds || [],
|
|
240
|
+
connectorAgentId: connector?.agentId || null,
|
|
241
|
+
connectorChatroomId: connector?.chatroomId || null,
|
|
242
|
+
connectorEnabled: connector?.isEnabled ?? null,
|
|
243
|
+
mcpId: mcp?.id || null,
|
|
244
|
+
mcpEnvKeys: Object.keys(mcp?.env || {}),
|
|
245
|
+
goalId: goal?.id || null,
|
|
246
|
+
goalProjectId: goal?.projectId || null,
|
|
247
|
+
goalAgentId: goal?.agentId || null,
|
|
248
|
+
needsCredentials: payload.mcpServers.needsCredentials,
|
|
249
|
+
}))
|
|
250
|
+
`)
|
|
251
|
+
|
|
252
|
+
assert.equal(output.status, 200)
|
|
253
|
+
assert.deepEqual(output.created, {
|
|
254
|
+
agents: 1,
|
|
255
|
+
skills: 1,
|
|
256
|
+
schedules: 1,
|
|
257
|
+
connectors: 1,
|
|
258
|
+
chatrooms: 1,
|
|
259
|
+
mcpServers: 1,
|
|
260
|
+
projects: 1,
|
|
261
|
+
goals: 1,
|
|
262
|
+
})
|
|
263
|
+
assert.equal(output.agentProjectId, output.projectId)
|
|
264
|
+
assert.deepEqual(output.agentSkillIds, [output.skillId])
|
|
265
|
+
assert.deepEqual(output.agentMcpServerIds, [output.mcpId])
|
|
266
|
+
assert.equal(output.agentGoalId, output.goalId)
|
|
267
|
+
assert.equal(output.skillProjectId, output.projectId)
|
|
268
|
+
assert.deepEqual(output.skillAgentIds, [output.agentId])
|
|
269
|
+
assert.equal(output.scheduleProjectId, output.projectId)
|
|
270
|
+
assert.deepEqual(output.scheduleParticipantIds, [output.agentId])
|
|
271
|
+
assert.equal(output.scheduleFacilitatorId, output.agentId)
|
|
272
|
+
assert.deepEqual(output.scheduleObserverIds, [output.agentId])
|
|
273
|
+
assert.deepEqual(output.chatroomAgentIds, [output.agentId])
|
|
274
|
+
assert.equal(output.connectorAgentId, output.agentId)
|
|
275
|
+
assert.equal(output.connectorChatroomId, output.chatroomId)
|
|
276
|
+
assert.equal(output.connectorEnabled, false)
|
|
277
|
+
assert.deepEqual(output.mcpEnvKeys, ['API_TOKEN'])
|
|
278
|
+
assert.equal(output.goalProjectId, output.projectId)
|
|
279
|
+
assert.equal(output.goalAgentId, output.agentId)
|
|
280
|
+
assert.deepEqual(output.needsCredentials, ['Local Tools'])
|
|
281
|
+
})
|
|
@@ -16,11 +16,11 @@ export async function POST(req: Request) {
|
|
|
16
16
|
}
|
|
17
17
|
|
|
18
18
|
try {
|
|
19
|
-
const result = importConfig(parsed.data as PortableManifest)
|
|
19
|
+
const result = importConfig(parsed.data as unknown as PortableManifest)
|
|
20
20
|
return NextResponse.json(result)
|
|
21
21
|
} catch (err) {
|
|
22
22
|
const message = err instanceof Error ? err.message : 'Failed to import manifest'
|
|
23
|
-
if (
|
|
23
|
+
if (message.startsWith('Unsupported format version ')) {
|
|
24
24
|
return NextResponse.json({ error: message }, { status: 400 })
|
|
25
25
|
}
|
|
26
26
|
return NextResponse.json({ error: message }, { status: 500 })
|
package/src/cli/index.js
CHANGED
|
@@ -231,8 +231,10 @@ const COMMAND_GROUPS = [
|
|
|
231
231
|
cmd('scenarios', 'GET', '/eval/scenarios', 'List available eval scenarios'),
|
|
232
232
|
cmd('suites', 'GET', '/eval/suites', 'List available eval suites (core, swe-bench-lite, gaia-l1, ...)'),
|
|
233
233
|
cmd('status', 'GET', '/eval/run', 'Get eval run status'),
|
|
234
|
+
cmd('environment', 'GET', '/eval/environments', 'Preview validation environment readiness for an eval'),
|
|
234
235
|
cmd('run', 'POST', '/eval/run', 'Run an eval scenario against an agent', { expectsJsonBody: true }),
|
|
235
236
|
cmd('suite', 'POST', '/eval/suite', 'Run a full eval suite against an agent (pass { suite: "swe-bench-lite" } in body)', { expectsJsonBody: true }),
|
|
237
|
+
cmd('environment-prepare', 'POST', '/eval/environments', 'Prepare validation environment readiness for an eval', { expectsJsonBody: true }),
|
|
236
238
|
],
|
|
237
239
|
},
|
|
238
240
|
{
|
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
} from '@/lib/quality/quality-summary'
|
|
18
18
|
import { cn } from '@/lib/utils'
|
|
19
19
|
import { useAppStore } from '@/stores/use-app-store'
|
|
20
|
-
import type { EvalRun, EvalSuiteResult } from '@/lib/server/eval/types'
|
|
20
|
+
import type { EvalEnvironmentPlan, EvalRun, EvalSuiteResult } from '@/lib/server/eval/types'
|
|
21
21
|
import type { Agent, ApprovalRequest, SessionRunRecord } from '@/types'
|
|
22
22
|
|
|
23
23
|
type QualityTab = 'overview' | 'evals' | 'approvals' | 'runs'
|
|
@@ -105,6 +105,96 @@ function EmptyState({ title, description }: { title: string; description: string
|
|
|
105
105
|
)
|
|
106
106
|
}
|
|
107
107
|
|
|
108
|
+
function environmentStatusClass(status: EvalEnvironmentPlan['status']): string {
|
|
109
|
+
if (status === 'ready') return 'border-emerald-500/25 bg-emerald-500/10 text-emerald-200'
|
|
110
|
+
if (status === 'warning') return 'border-amber-500/25 bg-amber-500/10 text-amber-200'
|
|
111
|
+
return 'border-rose-500/25 bg-rose-500/10 text-rose-200'
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function checkClass(level: 'info' | 'warn' | 'error'): string {
|
|
115
|
+
if (level === 'error') return 'border-rose-500/20 bg-rose-500/[0.05] text-rose-200'
|
|
116
|
+
if (level === 'warn') return 'border-amber-500/20 bg-amber-500/[0.05] text-amber-200'
|
|
117
|
+
return 'border-white/[0.06] bg-white/[0.025] text-text-3'
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function EvalEnvironmentPanel({ plan, loading, onRefresh }: {
|
|
121
|
+
plan: EvalEnvironmentPlan | null
|
|
122
|
+
loading: boolean
|
|
123
|
+
onRefresh: () => void
|
|
124
|
+
}) {
|
|
125
|
+
return (
|
|
126
|
+
<div className="rounded-[12px] border border-white/[0.06] bg-white/[0.025] px-3 py-3">
|
|
127
|
+
<div className="flex items-start justify-between gap-3">
|
|
128
|
+
<div>
|
|
129
|
+
<div className="text-[13px] font-800 text-text">Validation environment</div>
|
|
130
|
+
<p className="mt-1 text-[11px] leading-relaxed text-text-3/65">
|
|
131
|
+
Preflight checks, workspace context, and generated files for the selected eval.
|
|
132
|
+
</p>
|
|
133
|
+
</div>
|
|
134
|
+
<button
|
|
135
|
+
type="button"
|
|
136
|
+
onClick={onRefresh}
|
|
137
|
+
disabled={loading}
|
|
138
|
+
className="shrink-0 rounded-[8px] border border-white/[0.08] px-2 py-1 text-[10px] font-800 text-text-2 transition-colors hover:bg-white/[0.06] disabled:opacity-40"
|
|
139
|
+
>
|
|
140
|
+
{loading ? 'Checking' : 'Refresh'}
|
|
141
|
+
</button>
|
|
142
|
+
</div>
|
|
143
|
+
{!plan ? (
|
|
144
|
+
<div className="mt-3 text-[11px] text-text-3/60">{loading ? 'Checking readiness...' : 'Choose an agent and scenario.'}</div>
|
|
145
|
+
) : (
|
|
146
|
+
<div className="mt-3 flex flex-col gap-3">
|
|
147
|
+
<div className="flex flex-wrap items-center gap-2">
|
|
148
|
+
<span className={cn('rounded-full border px-2 py-1 text-[10px] font-800 uppercase tracking-[0.08em]', environmentStatusClass(plan.status))}>
|
|
149
|
+
{plan.status}
|
|
150
|
+
</span>
|
|
151
|
+
{plan.target && (
|
|
152
|
+
<span className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">
|
|
153
|
+
{plan.target.kind} - {plan.target.label}
|
|
154
|
+
</span>
|
|
155
|
+
)}
|
|
156
|
+
<span className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">
|
|
157
|
+
{plan.requiredTools.length} tool{plan.requiredTools.length === 1 ? '' : 's'}
|
|
158
|
+
</span>
|
|
159
|
+
<span className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">
|
|
160
|
+
{plan.generatedFiles.length} file{plan.generatedFiles.length === 1 ? '' : 's'}
|
|
161
|
+
</span>
|
|
162
|
+
</div>
|
|
163
|
+
{plan.target?.environmentLabel && (
|
|
164
|
+
<div className="rounded-[10px] border border-white/[0.06] bg-white/[0.02] px-3 py-2 text-[11px] text-text-3/70">
|
|
165
|
+
Environment: <span className="font-700 text-text-2">{plan.target.environmentLabel}</span>
|
|
166
|
+
{plan.target.environmentStatus ? ` (${plan.target.environmentStatus})` : ''}
|
|
167
|
+
</div>
|
|
168
|
+
)}
|
|
169
|
+
<div className="flex flex-col gap-1.5">
|
|
170
|
+
{plan.checks.slice(0, 4).map((check) => (
|
|
171
|
+
<div key={`${check.code}:${check.message}`} className={cn('rounded-[9px] border px-2.5 py-2 text-[11px] leading-relaxed', checkClass(check.level))}>
|
|
172
|
+
<span className="font-800 uppercase tracking-[0.08em]">{check.level}</span>
|
|
173
|
+
<span className="ml-2">{check.message}</span>
|
|
174
|
+
</div>
|
|
175
|
+
))}
|
|
176
|
+
{plan.checks.length > 4 && (
|
|
177
|
+
<div className="text-[10px] text-text-3/55">+{plan.checks.length - 4} more check{plan.checks.length - 4 === 1 ? '' : 's'}</div>
|
|
178
|
+
)}
|
|
179
|
+
</div>
|
|
180
|
+
<div className="flex flex-wrap gap-1.5">
|
|
181
|
+
{plan.generatedFiles.slice(0, 5).map((file) => (
|
|
182
|
+
<span key={`${file.kind}:${file.path}`} className="rounded-full bg-white/[0.04] px-2 py-1 text-[10px] font-700 text-text-3">
|
|
183
|
+
{file.path}
|
|
184
|
+
</span>
|
|
185
|
+
))}
|
|
186
|
+
{plan.generatedFiles.length > 5 && (
|
|
187
|
+
<span className="rounded-full bg-white/[0.04] px-2 py-1 text-[10px] font-700 text-text-3">
|
|
188
|
+
+{plan.generatedFiles.length - 5}
|
|
189
|
+
</span>
|
|
190
|
+
)}
|
|
191
|
+
</div>
|
|
192
|
+
</div>
|
|
193
|
+
)}
|
|
194
|
+
</div>
|
|
195
|
+
)
|
|
196
|
+
}
|
|
197
|
+
|
|
108
198
|
export function QualityWorkspace() {
|
|
109
199
|
const router = useRouter()
|
|
110
200
|
const searchParams = useSearchParams()
|
|
@@ -127,6 +217,8 @@ export function QualityWorkspace() {
|
|
|
127
217
|
const [selectedSuite, setSelectedSuite] = useState('core')
|
|
128
218
|
const [selectedScenarioId, setSelectedScenarioId] = useState('')
|
|
129
219
|
const [evalBusy, setEvalBusy] = useState<string | null>(null)
|
|
220
|
+
const [evalEnvironmentPlan, setEvalEnvironmentPlan] = useState<EvalEnvironmentPlan | null>(null)
|
|
221
|
+
const [evalEnvironmentLoading, setEvalEnvironmentLoading] = useState(false)
|
|
130
222
|
const [approvalBusy, setApprovalBusy] = useState<string | null>(null)
|
|
131
223
|
|
|
132
224
|
useEffect(() => {
|
|
@@ -170,6 +262,27 @@ export function QualityWorkspace() {
|
|
|
170
262
|
}
|
|
171
263
|
}, [])
|
|
172
264
|
|
|
265
|
+
const loadEvalEnvironmentPlan = useCallback(async (opts: { refreshGateway?: boolean } = {}) => {
|
|
266
|
+
if (!selectedAgentId) {
|
|
267
|
+
setEvalEnvironmentPlan(null)
|
|
268
|
+
return
|
|
269
|
+
}
|
|
270
|
+
const params = new URLSearchParams({ agentId: selectedAgentId })
|
|
271
|
+
if (selectedScenarioId) params.set('scenarioId', selectedScenarioId)
|
|
272
|
+
else if (selectedSuite) params.set('suite', selectedSuite)
|
|
273
|
+
if (opts.refreshGateway) params.set('refreshGateway', 'true')
|
|
274
|
+
setEvalEnvironmentLoading(true)
|
|
275
|
+
try {
|
|
276
|
+
const plan = await api<EvalEnvironmentPlan>('GET', `/eval/environments?${params.toString()}`, undefined, { timeoutMs: opts.refreshGateway ? 20_000 : 8_000 })
|
|
277
|
+
setEvalEnvironmentPlan(plan)
|
|
278
|
+
} catch (err) {
|
|
279
|
+
setEvalEnvironmentPlan(null)
|
|
280
|
+
toast.error(err instanceof Error ? err.message : 'Unable to validate eval environment')
|
|
281
|
+
} finally {
|
|
282
|
+
setEvalEnvironmentLoading(false)
|
|
283
|
+
}
|
|
284
|
+
}, [selectedAgentId, selectedScenarioId, selectedSuite])
|
|
285
|
+
|
|
173
286
|
useEffect(() => {
|
|
174
287
|
void loadQualityData()
|
|
175
288
|
}, [loadQualityData])
|
|
@@ -184,6 +297,10 @@ export function QualityWorkspace() {
|
|
|
184
297
|
if (!selectedScenarioId && scenarios[0]) setSelectedScenarioId(scenarios[0].id)
|
|
185
298
|
}, [scenarios, selectedScenarioId])
|
|
186
299
|
|
|
300
|
+
useEffect(() => {
|
|
301
|
+
void loadEvalEnvironmentPlan()
|
|
302
|
+
}, [loadEvalEnvironmentPlan])
|
|
303
|
+
|
|
187
304
|
useEffect(() => {
|
|
188
305
|
if (!suites.some((suite) => suite.name === selectedSuite) && suites[0]) {
|
|
189
306
|
setSelectedSuite(suites[0].name)
|
|
@@ -208,34 +325,56 @@ export function QualityWorkspace() {
|
|
|
208
325
|
toast.error('Choose an agent and scenario first')
|
|
209
326
|
return
|
|
210
327
|
}
|
|
328
|
+
if (evalEnvironmentPlan?.status === 'blocked') {
|
|
329
|
+
toast.error('Fix the validation environment before running this eval')
|
|
330
|
+
return
|
|
331
|
+
}
|
|
211
332
|
setEvalBusy(`scenario:${selectedScenarioId}`)
|
|
212
333
|
try {
|
|
213
|
-
await api<EvalRun>('POST', '/eval/run', {
|
|
334
|
+
await api<EvalRun>('POST', '/eval/run', {
|
|
335
|
+
agentId: selectedAgentId,
|
|
336
|
+
scenarioId: selectedScenarioId,
|
|
337
|
+
gatewayProfileId: evalEnvironmentPlan?.target?.gatewayProfileId || null,
|
|
338
|
+
environmentId: evalEnvironmentPlan?.target?.environmentId || null,
|
|
339
|
+
refreshGateway: evalEnvironmentPlan?.target?.kind === 'gateway',
|
|
340
|
+
}, { timeoutMs: 180_000 })
|
|
214
341
|
toast.success('Eval scenario completed')
|
|
215
342
|
await loadQualityData({ silent: true })
|
|
343
|
+
await loadEvalEnvironmentPlan()
|
|
216
344
|
} catch (err) {
|
|
217
345
|
toast.error(err instanceof Error ? err.message : 'Eval scenario failed')
|
|
218
346
|
} finally {
|
|
219
347
|
setEvalBusy(null)
|
|
220
348
|
}
|
|
221
|
-
}, [loadQualityData, selectedAgentId, selectedScenarioId])
|
|
349
|
+
}, [evalEnvironmentPlan, loadEvalEnvironmentPlan, loadQualityData, selectedAgentId, selectedScenarioId])
|
|
222
350
|
|
|
223
351
|
const runSuite = useCallback(async (suiteName: string) => {
|
|
224
352
|
if (!selectedAgentId) {
|
|
225
353
|
toast.error('Choose an agent first')
|
|
226
354
|
return
|
|
227
355
|
}
|
|
356
|
+
if (evalEnvironmentPlan?.status === 'blocked') {
|
|
357
|
+
toast.error('Fix the validation environment before running this suite')
|
|
358
|
+
return
|
|
359
|
+
}
|
|
228
360
|
setEvalBusy(`suite:${suiteName}`)
|
|
229
361
|
try {
|
|
230
|
-
const result = await api<EvalSuiteResult>('POST', '/eval/suite', {
|
|
362
|
+
const result = await api<EvalSuiteResult>('POST', '/eval/suite', {
|
|
363
|
+
agentId: selectedAgentId,
|
|
364
|
+
suite: suiteName,
|
|
365
|
+
gatewayProfileId: evalEnvironmentPlan?.target?.gatewayProfileId || null,
|
|
366
|
+
environmentId: evalEnvironmentPlan?.target?.environmentId || null,
|
|
367
|
+
refreshGateway: evalEnvironmentPlan?.target?.kind === 'gateway',
|
|
368
|
+
}, { timeoutMs: 300_000 })
|
|
231
369
|
toast.success(`Suite completed at ${Math.round(result.percentage)}%`)
|
|
232
370
|
await loadQualityData({ silent: true })
|
|
371
|
+
await loadEvalEnvironmentPlan()
|
|
233
372
|
} catch (err) {
|
|
234
373
|
toast.error(err instanceof Error ? err.message : 'Eval suite failed')
|
|
235
374
|
} finally {
|
|
236
375
|
setEvalBusy(null)
|
|
237
376
|
}
|
|
238
|
-
}, [loadQualityData, selectedAgentId])
|
|
377
|
+
}, [evalEnvironmentPlan, loadEvalEnvironmentPlan, loadQualityData, selectedAgentId])
|
|
239
378
|
|
|
240
379
|
const actOnApproval = useCallback(async (approval: ApprovalRequest, approved: boolean) => {
|
|
241
380
|
setApprovalBusy(approval.id)
|
|
@@ -456,6 +595,11 @@ export function QualityWorkspace() {
|
|
|
456
595
|
</div>
|
|
457
596
|
</div>
|
|
458
597
|
)}
|
|
598
|
+
<EvalEnvironmentPanel
|
|
599
|
+
plan={evalEnvironmentPlan}
|
|
600
|
+
loading={evalEnvironmentLoading}
|
|
601
|
+
onRefresh={() => void loadEvalEnvironmentPlan({ refreshGateway: true })}
|
|
602
|
+
/>
|
|
459
603
|
<button
|
|
460
604
|
type="button"
|
|
461
605
|
onClick={() => openMissionTemplate('release-candidate-qa')}
|