@swarmclawai/swarmclaw 1.9.4 → 1.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,39 +1,18 @@
1
1
  import assert from 'node:assert/strict'
2
- import fs from 'node:fs'
3
- import os from 'node:os'
4
- import path from 'node:path'
5
- import { spawnSync } from 'node:child_process'
6
2
  import test from 'node:test'
7
3
 
8
- const repoRoot = path.resolve(path.dirname(new URL(import.meta.url).pathname), '../../../../..')
9
-
10
- function runWithTempDataDir(script: string) {
11
- const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'swarmclaw-portability-import-'))
12
- try {
13
- const result = spawnSync(process.execPath, ['--import', 'tsx', '--input-type=module', '--eval', script], {
14
- cwd: repoRoot,
15
- env: {
16
- ...process.env,
17
- DATA_DIR: path.join(tempDir, 'data'),
18
- WORKSPACE_DIR: path.join(tempDir, 'workspace'),
19
- },
20
- encoding: 'utf-8',
21
- })
22
- assert.equal(result.status, 0, result.stderr || result.stdout || 'subprocess failed')
23
- const lines = (result.stdout || '')
24
- .trim()
25
- .split('\n')
26
- .map((line) => line.trim())
27
- .filter(Boolean)
28
- const jsonLine = [...lines].reverse().find((line) => line.startsWith('{'))
29
- return JSON.parse(jsonLine || '{}')
30
- } finally {
31
- fs.rmSync(tempDir, { recursive: true, force: true })
32
- }
33
- }
4
+ import { runWithTempDataDir } from '@/lib/server/test-utils/run-with-temp-data-dir'
34
5
 
35
6
  test('POST /api/portability/import validates manifest arrays before importing', () => {
36
- const output = runWithTempDataDir(`
7
+ const output = runWithTempDataDir<{
8
+ invalidStatus: number
9
+ invalidError: string | null
10
+ invalidPaths: string[]
11
+ validStatus: number
12
+ validAgentsCreated: number | null
13
+ validSkillsCreated: number | null
14
+ validSchedulesCreated: number | null
15
+ }>(`
37
16
  const routeMod = await import('./src/app/api/portability/import/route')
38
17
  const route = routeMod.default || routeMod
39
18
 
@@ -78,3 +57,225 @@ test('POST /api/portability/import validates manifest arrays before importing',
78
57
  assert.equal(output.validSkillsCreated, 0)
79
58
  assert.equal(output.validSchedulesCreated, 0)
80
59
  })
60
+
61
+ test('POST /api/portability/import preserves v2 bundle resources after validation', () => {
62
+ const output = runWithTempDataDir<{
63
+ status: number
64
+ created: Record<string, number>
65
+ projectId: string | null
66
+ agentId: string | null
67
+ agentProjectId: string | null
68
+ agentSkillIds: string[]
69
+ agentMcpServerIds: string[]
70
+ agentGoalId: string | null
71
+ skillId: string | null
72
+ skillProjectId: string | null
73
+ skillAgentIds: string[]
74
+ scheduleProjectId: string | null
75
+ scheduleParticipantIds: string[]
76
+ scheduleFacilitatorId: string | null
77
+ scheduleObserverIds: string[]
78
+ chatroomId: string | null
79
+ chatroomAgentIds: string[]
80
+ connectorAgentId: string | null
81
+ connectorChatroomId: string | null
82
+ connectorEnabled: boolean | null
83
+ mcpId: string | null
84
+ mcpEnvKeys: string[]
85
+ goalId: string | null
86
+ goalProjectId: string | null
87
+ goalAgentId: string | null
88
+ needsCredentials: string[]
89
+ }>(`
90
+ const routeMod = await import('./src/app/api/portability/import/route')
91
+ const storageMod = await import('./src/lib/server/storage')
92
+ const agentRepoMod = await import('./src/lib/server/agents/agent-repository')
93
+ const skillRepoMod = await import('./src/lib/server/skills/skill-repository')
94
+ const scheduleRepoMod = await import('./src/lib/server/schedules/schedule-repository')
95
+ const chatroomRepoMod = await import('./src/lib/server/chatrooms/chatroom-repository')
96
+ const connectorRepoMod = await import('./src/lib/server/connectors/connector-repository')
97
+ const route = routeMod.default || routeMod
98
+ const storage = storageMod.default || storageMod
99
+ const agentRepo = agentRepoMod.default || agentRepoMod
100
+ const skillRepo = skillRepoMod.default || skillRepoMod
101
+ const scheduleRepo = scheduleRepoMod.default || scheduleRepoMod
102
+ const chatroomRepo = chatroomRepoMod.default || chatroomRepoMod
103
+ const connectorRepo = connectorRepoMod.default || connectorRepoMod
104
+ const { loadProjects, loadMcpServers, loadGoals } = storage
105
+ const { loadAgents } = agentRepo
106
+ const { loadSkills } = skillRepo
107
+ const { loadSchedules } = scheduleRepo
108
+ const { loadChatrooms } = chatroomRepo
109
+ const { loadConnectors } = connectorRepo
110
+
111
+ const response = await route.POST(new Request('http://local/api/portability/import', {
112
+ method: 'POST',
113
+ headers: { 'content-type': 'application/json' },
114
+ body: JSON.stringify({
115
+ formatVersion: 2,
116
+ exportedAt: '2026-05-05T00:00:00.000Z',
117
+ scope: { kind: 'project', originalProjectId: 'project-1', projectName: 'Launch Room' },
118
+ projects: [{
119
+ originalId: 'project-1',
120
+ name: 'Launch Room',
121
+ description: 'Shipping workspace',
122
+ objective: 'Ship the fix',
123
+ }],
124
+ skills: [{
125
+ originalId: 'skill-1',
126
+ originalProjectId: 'project-1',
127
+ originalAgentIds: ['agent-1'],
128
+ name: 'Release Skill',
129
+ content: 'Ship carefully',
130
+ scope: 'agent',
131
+ }],
132
+ mcpServers: [{
133
+ originalId: 'mcp-1',
134
+ name: 'Local Tools',
135
+ transport: 'stdio',
136
+ command: 'node',
137
+ args: ['tool.js'],
138
+ envKeys: ['API_TOKEN'],
139
+ credentialsScrubbed: true,
140
+ }],
141
+ agents: [{
142
+ originalId: 'agent-1',
143
+ name: 'Release Lead',
144
+ description: 'Owns launch execution',
145
+ systemPrompt: 'Ship safely',
146
+ provider: 'openai',
147
+ model: 'gpt-4o-mini',
148
+ projectId: 'project-1',
149
+ skillIds: ['skill-1'],
150
+ mcpServerIds: ['mcp-1'],
151
+ goalId: 'goal-1',
152
+ }],
153
+ schedules: [{
154
+ originalId: 'schedule-1',
155
+ originalAgentId: 'agent-1',
156
+ name: 'Launch Check',
157
+ projectId: 'project-1',
158
+ taskPrompt: 'Check release readiness',
159
+ taskMode: 'protocol',
160
+ protocolTemplateId: 'template-1',
161
+ protocolParticipantAgentIds: ['agent-1'],
162
+ protocolFacilitatorAgentId: 'agent-1',
163
+ protocolObserverAgentIds: ['agent-1'],
164
+ protocolConfig: { phase: 'ship' },
165
+ scheduleType: 'interval',
166
+ intervalMs: 60000,
167
+ }],
168
+ chatrooms: [{
169
+ originalId: 'room-1',
170
+ originalAgentIds: ['agent-1'],
171
+ name: 'Launch Room Chat',
172
+ chatMode: 'parallel',
173
+ autoAddress: true,
174
+ routingRules: [{
175
+ type: 'keyword',
176
+ keywords: ['release'],
177
+ originalAgentId: 'agent-1',
178
+ priority: 1,
179
+ }],
180
+ }],
181
+ connectors: [{
182
+ originalId: 'connector-1',
183
+ originalAgentId: 'agent-1',
184
+ originalChatroomId: 'room-1',
185
+ name: 'Launch Slack',
186
+ platform: 'slack',
187
+ isEnabled: false,
188
+ config: { channel: 'launch' },
189
+ credentialsScrubbed: true,
190
+ }],
191
+ goals: [{
192
+ originalId: 'goal-1',
193
+ originalProjectId: 'project-1',
194
+ originalAgentId: 'agent-1',
195
+ title: 'Ship fix',
196
+ level: 'project',
197
+ objective: 'Release the portability fix',
198
+ status: 'active',
199
+ }],
200
+ extensions: [{ name: 'builtin-checks' }],
201
+ }),
202
+ }))
203
+ const payload = await response.json()
204
+ const project = Object.values(loadProjects()).find((item) => item.name === 'Launch Room')
205
+ const agent = Object.values(loadAgents()).find((item) => item.name === 'Release Lead')
206
+ const skill = Object.values(loadSkills()).find((item) => item.name === 'Release Skill')
207
+ const schedule = Object.values(loadSchedules()).find((item) => item.name === 'Launch Check')
208
+ const chatroom = Object.values(loadChatrooms()).find((item) => item.name === 'Launch Room Chat')
209
+ const connector = Object.values(loadConnectors()).find((item) => item.name === 'Launch Slack')
210
+ const mcp = Object.values(loadMcpServers()).find((item) => item.name === 'Local Tools')
211
+ const goal = Object.values(loadGoals()).find((item) => item.title === 'Ship fix')
212
+
213
+ console.log(JSON.stringify({
214
+ status: response.status,
215
+ created: {
216
+ agents: payload.agents.created,
217
+ skills: payload.skills.created,
218
+ schedules: payload.schedules.created,
219
+ connectors: payload.connectors.created,
220
+ chatrooms: payload.chatrooms.created,
221
+ mcpServers: payload.mcpServers.created,
222
+ projects: payload.projects.created,
223
+ goals: payload.goals.created,
224
+ },
225
+ projectId: project?.id || null,
226
+ agentId: agent?.id || null,
227
+ agentProjectId: agent?.projectId || null,
228
+ agentSkillIds: agent?.skillIds || [],
229
+ agentMcpServerIds: agent?.mcpServerIds || [],
230
+ agentGoalId: agent?.goalId || null,
231
+ skillId: skill?.id || null,
232
+ skillProjectId: skill?.projectId || null,
233
+ skillAgentIds: skill?.agentIds || [],
234
+ scheduleProjectId: schedule?.projectId || null,
235
+ scheduleParticipantIds: schedule?.protocolParticipantAgentIds || [],
236
+ scheduleFacilitatorId: schedule?.protocolFacilitatorAgentId || null,
237
+ scheduleObserverIds: schedule?.protocolObserverAgentIds || [],
238
+ chatroomId: chatroom?.id || null,
239
+ chatroomAgentIds: chatroom?.agentIds || [],
240
+ connectorAgentId: connector?.agentId || null,
241
+ connectorChatroomId: connector?.chatroomId || null,
242
+ connectorEnabled: connector?.isEnabled ?? null,
243
+ mcpId: mcp?.id || null,
244
+ mcpEnvKeys: Object.keys(mcp?.env || {}),
245
+ goalId: goal?.id || null,
246
+ goalProjectId: goal?.projectId || null,
247
+ goalAgentId: goal?.agentId || null,
248
+ needsCredentials: payload.mcpServers.needsCredentials,
249
+ }))
250
+ `)
251
+
252
+ assert.equal(output.status, 200)
253
+ assert.deepEqual(output.created, {
254
+ agents: 1,
255
+ skills: 1,
256
+ schedules: 1,
257
+ connectors: 1,
258
+ chatrooms: 1,
259
+ mcpServers: 1,
260
+ projects: 1,
261
+ goals: 1,
262
+ })
263
+ assert.equal(output.agentProjectId, output.projectId)
264
+ assert.deepEqual(output.agentSkillIds, [output.skillId])
265
+ assert.deepEqual(output.agentMcpServerIds, [output.mcpId])
266
+ assert.equal(output.agentGoalId, output.goalId)
267
+ assert.equal(output.skillProjectId, output.projectId)
268
+ assert.deepEqual(output.skillAgentIds, [output.agentId])
269
+ assert.equal(output.scheduleProjectId, output.projectId)
270
+ assert.deepEqual(output.scheduleParticipantIds, [output.agentId])
271
+ assert.equal(output.scheduleFacilitatorId, output.agentId)
272
+ assert.deepEqual(output.scheduleObserverIds, [output.agentId])
273
+ assert.deepEqual(output.chatroomAgentIds, [output.agentId])
274
+ assert.equal(output.connectorAgentId, output.agentId)
275
+ assert.equal(output.connectorChatroomId, output.chatroomId)
276
+ assert.equal(output.connectorEnabled, false)
277
+ assert.deepEqual(output.mcpEnvKeys, ['API_TOKEN'])
278
+ assert.equal(output.goalProjectId, output.projectId)
279
+ assert.equal(output.goalAgentId, output.agentId)
280
+ assert.deepEqual(output.needsCredentials, ['Local Tools'])
281
+ })
@@ -16,11 +16,11 @@ export async function POST(req: Request) {
16
16
  }
17
17
 
18
18
  try {
19
- const result = importConfig(parsed.data as PortableManifest)
19
+ const result = importConfig(parsed.data as unknown as PortableManifest)
20
20
  return NextResponse.json(result)
21
21
  } catch (err) {
22
22
  const message = err instanceof Error ? err.message : 'Failed to import manifest'
23
- if (/^Unsupported format version /i.test(message)) {
23
+ if (message.startsWith('Unsupported format version ')) {
24
24
  return NextResponse.json({ error: message }, { status: 400 })
25
25
  }
26
26
  return NextResponse.json({ error: message }, { status: 500 })
package/src/cli/index.js CHANGED
@@ -231,8 +231,10 @@ const COMMAND_GROUPS = [
231
231
  cmd('scenarios', 'GET', '/eval/scenarios', 'List available eval scenarios'),
232
232
  cmd('suites', 'GET', '/eval/suites', 'List available eval suites (core, swe-bench-lite, gaia-l1, ...)'),
233
233
  cmd('status', 'GET', '/eval/run', 'Get eval run status'),
234
+ cmd('environment', 'GET', '/eval/environments', 'Preview validation environment readiness for an eval'),
234
235
  cmd('run', 'POST', '/eval/run', 'Run an eval scenario against an agent', { expectsJsonBody: true }),
235
236
  cmd('suite', 'POST', '/eval/suite', 'Run a full eval suite against an agent (pass { suite: "swe-bench-lite" } in body)', { expectsJsonBody: true }),
237
+ cmd('environment-prepare', 'POST', '/eval/environments', 'Prepare validation environment readiness for an eval', { expectsJsonBody: true }),
236
238
  ],
237
239
  },
238
240
  {
@@ -17,7 +17,7 @@ import {
17
17
  } from '@/lib/quality/quality-summary'
18
18
  import { cn } from '@/lib/utils'
19
19
  import { useAppStore } from '@/stores/use-app-store'
20
- import type { EvalRun, EvalSuiteResult } from '@/lib/server/eval/types'
20
+ import type { EvalEnvironmentPlan, EvalRun, EvalSuiteResult } from '@/lib/server/eval/types'
21
21
  import type { Agent, ApprovalRequest, SessionRunRecord } from '@/types'
22
22
 
23
23
  type QualityTab = 'overview' | 'evals' | 'approvals' | 'runs'
@@ -105,6 +105,96 @@ function EmptyState({ title, description }: { title: string; description: string
105
105
  )
106
106
  }
107
107
 
108
+ function environmentStatusClass(status: EvalEnvironmentPlan['status']): string {
109
+ if (status === 'ready') return 'border-emerald-500/25 bg-emerald-500/10 text-emerald-200'
110
+ if (status === 'warning') return 'border-amber-500/25 bg-amber-500/10 text-amber-200'
111
+ return 'border-rose-500/25 bg-rose-500/10 text-rose-200'
112
+ }
113
+
114
+ function checkClass(level: 'info' | 'warn' | 'error'): string {
115
+ if (level === 'error') return 'border-rose-500/20 bg-rose-500/[0.05] text-rose-200'
116
+ if (level === 'warn') return 'border-amber-500/20 bg-amber-500/[0.05] text-amber-200'
117
+ return 'border-white/[0.06] bg-white/[0.025] text-text-3'
118
+ }
119
+
120
+ function EvalEnvironmentPanel({ plan, loading, onRefresh }: {
121
+ plan: EvalEnvironmentPlan | null
122
+ loading: boolean
123
+ onRefresh: () => void
124
+ }) {
125
+ return (
126
+ <div className="rounded-[12px] border border-white/[0.06] bg-white/[0.025] px-3 py-3">
127
+ <div className="flex items-start justify-between gap-3">
128
+ <div>
129
+ <div className="text-[13px] font-800 text-text">Validation environment</div>
130
+ <p className="mt-1 text-[11px] leading-relaxed text-text-3/65">
131
+ Preflight checks, workspace context, and generated files for the selected eval.
132
+ </p>
133
+ </div>
134
+ <button
135
+ type="button"
136
+ onClick={onRefresh}
137
+ disabled={loading}
138
+ className="shrink-0 rounded-[8px] border border-white/[0.08] px-2 py-1 text-[10px] font-800 text-text-2 transition-colors hover:bg-white/[0.06] disabled:opacity-40"
139
+ >
140
+ {loading ? 'Checking' : 'Refresh'}
141
+ </button>
142
+ </div>
143
+ {!plan ? (
144
+ <div className="mt-3 text-[11px] text-text-3/60">{loading ? 'Checking readiness...' : 'Choose an agent and scenario.'}</div>
145
+ ) : (
146
+ <div className="mt-3 flex flex-col gap-3">
147
+ <div className="flex flex-wrap items-center gap-2">
148
+ <span className={cn('rounded-full border px-2 py-1 text-[10px] font-800 uppercase tracking-[0.08em]', environmentStatusClass(plan.status))}>
149
+ {plan.status}
150
+ </span>
151
+ {plan.target && (
152
+ <span className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">
153
+ {plan.target.kind} - {plan.target.label}
154
+ </span>
155
+ )}
156
+ <span className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">
157
+ {plan.requiredTools.length} tool{plan.requiredTools.length === 1 ? '' : 's'}
158
+ </span>
159
+ <span className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">
160
+ {plan.generatedFiles.length} file{plan.generatedFiles.length === 1 ? '' : 's'}
161
+ </span>
162
+ </div>
163
+ {plan.target?.environmentLabel && (
164
+ <div className="rounded-[10px] border border-white/[0.06] bg-white/[0.02] px-3 py-2 text-[11px] text-text-3/70">
165
+ Environment: <span className="font-700 text-text-2">{plan.target.environmentLabel}</span>
166
+ {plan.target.environmentStatus ? ` (${plan.target.environmentStatus})` : ''}
167
+ </div>
168
+ )}
169
+ <div className="flex flex-col gap-1.5">
170
+ {plan.checks.slice(0, 4).map((check) => (
171
+ <div key={`${check.code}:${check.message}`} className={cn('rounded-[9px] border px-2.5 py-2 text-[11px] leading-relaxed', checkClass(check.level))}>
172
+ <span className="font-800 uppercase tracking-[0.08em]">{check.level}</span>
173
+ <span className="ml-2">{check.message}</span>
174
+ </div>
175
+ ))}
176
+ {plan.checks.length > 4 && (
177
+ <div className="text-[10px] text-text-3/55">+{plan.checks.length - 4} more check{plan.checks.length - 4 === 1 ? '' : 's'}</div>
178
+ )}
179
+ </div>
180
+ <div className="flex flex-wrap gap-1.5">
181
+ {plan.generatedFiles.slice(0, 5).map((file) => (
182
+ <span key={`${file.kind}:${file.path}`} className="rounded-full bg-white/[0.04] px-2 py-1 text-[10px] font-700 text-text-3">
183
+ {file.path}
184
+ </span>
185
+ ))}
186
+ {plan.generatedFiles.length > 5 && (
187
+ <span className="rounded-full bg-white/[0.04] px-2 py-1 text-[10px] font-700 text-text-3">
188
+ +{plan.generatedFiles.length - 5}
189
+ </span>
190
+ )}
191
+ </div>
192
+ </div>
193
+ )}
194
+ </div>
195
+ )
196
+ }
197
+
108
198
  export function QualityWorkspace() {
109
199
  const router = useRouter()
110
200
  const searchParams = useSearchParams()
@@ -127,6 +217,8 @@ export function QualityWorkspace() {
127
217
  const [selectedSuite, setSelectedSuite] = useState('core')
128
218
  const [selectedScenarioId, setSelectedScenarioId] = useState('')
129
219
  const [evalBusy, setEvalBusy] = useState<string | null>(null)
220
+ const [evalEnvironmentPlan, setEvalEnvironmentPlan] = useState<EvalEnvironmentPlan | null>(null)
221
+ const [evalEnvironmentLoading, setEvalEnvironmentLoading] = useState(false)
130
222
  const [approvalBusy, setApprovalBusy] = useState<string | null>(null)
131
223
 
132
224
  useEffect(() => {
@@ -170,6 +262,27 @@ export function QualityWorkspace() {
170
262
  }
171
263
  }, [])
172
264
 
265
+ const loadEvalEnvironmentPlan = useCallback(async (opts: { refreshGateway?: boolean } = {}) => {
266
+ if (!selectedAgentId) {
267
+ setEvalEnvironmentPlan(null)
268
+ return
269
+ }
270
+ const params = new URLSearchParams({ agentId: selectedAgentId })
271
+ if (selectedScenarioId) params.set('scenarioId', selectedScenarioId)
272
+ else if (selectedSuite) params.set('suite', selectedSuite)
273
+ if (opts.refreshGateway) params.set('refreshGateway', 'true')
274
+ setEvalEnvironmentLoading(true)
275
+ try {
276
+ const plan = await api<EvalEnvironmentPlan>('GET', `/eval/environments?${params.toString()}`, undefined, { timeoutMs: opts.refreshGateway ? 20_000 : 8_000 })
277
+ setEvalEnvironmentPlan(plan)
278
+ } catch (err) {
279
+ setEvalEnvironmentPlan(null)
280
+ toast.error(err instanceof Error ? err.message : 'Unable to validate eval environment')
281
+ } finally {
282
+ setEvalEnvironmentLoading(false)
283
+ }
284
+ }, [selectedAgentId, selectedScenarioId, selectedSuite])
285
+
173
286
  useEffect(() => {
174
287
  void loadQualityData()
175
288
  }, [loadQualityData])
@@ -184,6 +297,10 @@ export function QualityWorkspace() {
184
297
  if (!selectedScenarioId && scenarios[0]) setSelectedScenarioId(scenarios[0].id)
185
298
  }, [scenarios, selectedScenarioId])
186
299
 
300
+ useEffect(() => {
301
+ void loadEvalEnvironmentPlan()
302
+ }, [loadEvalEnvironmentPlan])
303
+
187
304
  useEffect(() => {
188
305
  if (!suites.some((suite) => suite.name === selectedSuite) && suites[0]) {
189
306
  setSelectedSuite(suites[0].name)
@@ -208,34 +325,56 @@ export function QualityWorkspace() {
208
325
  toast.error('Choose an agent and scenario first')
209
326
  return
210
327
  }
328
+ if (evalEnvironmentPlan?.status === 'blocked') {
329
+ toast.error('Fix the validation environment before running this eval')
330
+ return
331
+ }
211
332
  setEvalBusy(`scenario:${selectedScenarioId}`)
212
333
  try {
213
- await api<EvalRun>('POST', '/eval/run', { agentId: selectedAgentId, scenarioId: selectedScenarioId }, { timeoutMs: 180_000 })
334
+ await api<EvalRun>('POST', '/eval/run', {
335
+ agentId: selectedAgentId,
336
+ scenarioId: selectedScenarioId,
337
+ gatewayProfileId: evalEnvironmentPlan?.target?.gatewayProfileId || null,
338
+ environmentId: evalEnvironmentPlan?.target?.environmentId || null,
339
+ refreshGateway: evalEnvironmentPlan?.target?.kind === 'gateway',
340
+ }, { timeoutMs: 180_000 })
214
341
  toast.success('Eval scenario completed')
215
342
  await loadQualityData({ silent: true })
343
+ await loadEvalEnvironmentPlan()
216
344
  } catch (err) {
217
345
  toast.error(err instanceof Error ? err.message : 'Eval scenario failed')
218
346
  } finally {
219
347
  setEvalBusy(null)
220
348
  }
221
- }, [loadQualityData, selectedAgentId, selectedScenarioId])
349
+ }, [evalEnvironmentPlan, loadEvalEnvironmentPlan, loadQualityData, selectedAgentId, selectedScenarioId])
222
350
 
223
351
  const runSuite = useCallback(async (suiteName: string) => {
224
352
  if (!selectedAgentId) {
225
353
  toast.error('Choose an agent first')
226
354
  return
227
355
  }
356
+ if (evalEnvironmentPlan?.status === 'blocked') {
357
+ toast.error('Fix the validation environment before running this suite')
358
+ return
359
+ }
228
360
  setEvalBusy(`suite:${suiteName}`)
229
361
  try {
230
- const result = await api<EvalSuiteResult>('POST', '/eval/suite', { agentId: selectedAgentId, suite: suiteName }, { timeoutMs: 300_000 })
362
+ const result = await api<EvalSuiteResult>('POST', '/eval/suite', {
363
+ agentId: selectedAgentId,
364
+ suite: suiteName,
365
+ gatewayProfileId: evalEnvironmentPlan?.target?.gatewayProfileId || null,
366
+ environmentId: evalEnvironmentPlan?.target?.environmentId || null,
367
+ refreshGateway: evalEnvironmentPlan?.target?.kind === 'gateway',
368
+ }, { timeoutMs: 300_000 })
231
369
  toast.success(`Suite completed at ${Math.round(result.percentage)}%`)
232
370
  await loadQualityData({ silent: true })
371
+ await loadEvalEnvironmentPlan()
233
372
  } catch (err) {
234
373
  toast.error(err instanceof Error ? err.message : 'Eval suite failed')
235
374
  } finally {
236
375
  setEvalBusy(null)
237
376
  }
238
- }, [loadQualityData, selectedAgentId])
377
+ }, [evalEnvironmentPlan, loadEvalEnvironmentPlan, loadQualityData, selectedAgentId])
239
378
 
240
379
  const actOnApproval = useCallback(async (approval: ApprovalRequest, approved: boolean) => {
241
380
  setApprovalBusy(approval.id)
@@ -456,6 +595,11 @@ export function QualityWorkspace() {
456
595
  </div>
457
596
  </div>
458
597
  )}
598
+ <EvalEnvironmentPanel
599
+ plan={evalEnvironmentPlan}
600
+ loading={evalEnvironmentLoading}
601
+ onRefresh={() => void loadEvalEnvironmentPlan({ refreshGateway: true })}
602
+ />
459
603
  <button
460
604
  type="button"
461
605
  onClick={() => openMissionTemplate('release-candidate-qa')}