@swarmclawai/swarmclaw 1.9.4 → 1.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -0
- package/package.json +2 -2
- package/src/app/api/eval/environments/route.ts +59 -0
- package/src/app/api/eval/run/route.ts +8 -1
- package/src/app/api/eval/suite/route.ts +6 -0
- package/src/app/api/portability/export/route.test.ts +225 -0
- package/src/app/api/portability/export/route.ts +18 -9
- package/src/app/api/portability/import/route.test.ts +232 -31
- package/src/app/api/portability/import/route.ts +2 -2
- package/src/cli/index.js +2 -0
- package/src/components/quality/quality-workspace.tsx +149 -5
- package/src/lib/server/eval/environment-plan.test.ts +221 -0
- package/src/lib/server/eval/environment-plan.ts +498 -0
- package/src/lib/server/eval/runner.ts +53 -3
- package/src/lib/server/eval/scenarios.ts +18 -0
- package/src/lib/server/eval/types.ts +55 -0
- package/src/lib/server/portability/export.ts +244 -38
- package/src/lib/server/portability/import.ts +148 -98
- package/src/lib/validation/schemas.ts +54 -1
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
import assert from 'node:assert/strict'
|
|
2
|
+
import fs from 'node:fs'
|
|
3
|
+
import os from 'node:os'
|
|
4
|
+
import path from 'node:path'
|
|
5
|
+
import test from 'node:test'
|
|
6
|
+
|
|
7
|
+
import type { Agent, GatewayProfile } from '@/types'
|
|
8
|
+
import { getScenario } from './scenarios'
|
|
9
|
+
import { buildEvalEnvironmentPlan, writeEvalEnvironmentWorkspace } from './environment-plan'
|
|
10
|
+
import type { EvalEnvironmentPlan, EvalScenario } from './types'
|
|
11
|
+
|
|
12
|
+
function makeAgent(overrides: Partial<Agent> = {}): Agent {
|
|
13
|
+
return {
|
|
14
|
+
id: 'agent-1',
|
|
15
|
+
name: 'Eval Agent',
|
|
16
|
+
description: 'Validates eval environments.',
|
|
17
|
+
systemPrompt: 'You are an eval agent.',
|
|
18
|
+
provider: 'ollama',
|
|
19
|
+
model: 'llama3',
|
|
20
|
+
ollamaMode: 'local',
|
|
21
|
+
tools: [],
|
|
22
|
+
createdAt: 1,
|
|
23
|
+
updatedAt: 1,
|
|
24
|
+
...overrides,
|
|
25
|
+
} as Agent
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function makeGateway(overrides: Partial<GatewayProfile> = {}): GatewayProfile {
|
|
29
|
+
return {
|
|
30
|
+
id: 'gateway-1',
|
|
31
|
+
name: 'Gateway 1',
|
|
32
|
+
provider: 'openclaw',
|
|
33
|
+
endpoint: 'http://127.0.0.1:18789/v1',
|
|
34
|
+
wsUrl: 'ws://127.0.0.1:18789',
|
|
35
|
+
credentialId: null,
|
|
36
|
+
status: 'healthy',
|
|
37
|
+
stats: {
|
|
38
|
+
nodeCount: 1,
|
|
39
|
+
connectedNodeCount: 1,
|
|
40
|
+
environmentCount: 1,
|
|
41
|
+
availableEnvironmentCount: 1,
|
|
42
|
+
pendingNodePairings: 0,
|
|
43
|
+
pendingDevicePairings: 0,
|
|
44
|
+
pairedDeviceCount: 0,
|
|
45
|
+
lastTopologyCheckedAt: 2,
|
|
46
|
+
lastTopologyErrorCount: 0,
|
|
47
|
+
lastTopologyError: null,
|
|
48
|
+
},
|
|
49
|
+
createdAt: 1,
|
|
50
|
+
updatedAt: 1,
|
|
51
|
+
...overrides,
|
|
52
|
+
} as GatewayProfile
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
test('eval environment plan blocks missing CLI provider readiness before a run spends tokens', async () => {
|
|
56
|
+
const plan = await buildEvalEnvironmentPlan(
|
|
57
|
+
{ agentId: 'agent-cli', scenarioId: 'coding-prime' },
|
|
58
|
+
{
|
|
59
|
+
now: () => 123,
|
|
60
|
+
loadAgents: () => ({
|
|
61
|
+
'agent-cli': makeAgent({
|
|
62
|
+
id: 'agent-cli',
|
|
63
|
+
provider: 'codex-cli',
|
|
64
|
+
model: 'gpt-5.2',
|
|
65
|
+
ollamaMode: null,
|
|
66
|
+
}),
|
|
67
|
+
}),
|
|
68
|
+
listGatewayProfiles: () => [],
|
|
69
|
+
checkCliProviderReady: () => ({
|
|
70
|
+
ok: false,
|
|
71
|
+
message: 'Codex CLI is not installed.',
|
|
72
|
+
providerId: 'codex-cli',
|
|
73
|
+
displayName: 'Codex CLI',
|
|
74
|
+
binaryName: 'codex',
|
|
75
|
+
}),
|
|
76
|
+
},
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
assert.equal(plan.status, 'blocked')
|
|
80
|
+
assert.equal(plan.target?.kind, 'local')
|
|
81
|
+
assert.ok(plan.checks.some((check) => check.code === 'cli_provider_not_ready' && check.level === 'error'))
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
test('eval environment plan refreshes gateway environments and selects an available target', async () => {
|
|
85
|
+
const plan = await buildEvalEnvironmentPlan(
|
|
86
|
+
{
|
|
87
|
+
agentId: 'agent-openclaw',
|
|
88
|
+
scenarioId: 'coding-prime',
|
|
89
|
+
refreshGateway: true,
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
now: () => 456,
|
|
93
|
+
loadAgents: () => ({
|
|
94
|
+
'agent-openclaw': makeAgent({
|
|
95
|
+
id: 'agent-openclaw',
|
|
96
|
+
provider: 'openclaw',
|
|
97
|
+
model: 'default',
|
|
98
|
+
gatewayProfileId: 'gateway-1',
|
|
99
|
+
}),
|
|
100
|
+
}),
|
|
101
|
+
listGatewayProfiles: () => [makeGateway()],
|
|
102
|
+
listGatewayEnvironments: async () => ({
|
|
103
|
+
profile: makeGateway(),
|
|
104
|
+
connected: true,
|
|
105
|
+
refreshedAt: 789,
|
|
106
|
+
errors: [],
|
|
107
|
+
environments: [
|
|
108
|
+
{ id: 'env-busy', type: 'sandbox', label: 'Busy', status: 'starting', capabilities: ['agent.run'] },
|
|
109
|
+
{ id: 'env-ready', type: 'sandbox', label: 'Ready', status: 'available', capabilities: ['agent.run', 'workspace'] },
|
|
110
|
+
],
|
|
111
|
+
}),
|
|
112
|
+
},
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
assert.equal(plan.status, 'ready')
|
|
116
|
+
assert.equal(plan.target?.kind, 'gateway')
|
|
117
|
+
assert.equal(plan.target?.environmentId, 'env-ready')
|
|
118
|
+
assert.equal(plan.target?.environmentStatus, 'available')
|
|
119
|
+
assert.deepEqual(plan.target?.capabilities, ['agent.run', 'workspace'])
|
|
120
|
+
assert.ok(plan.checks.some((check) => check.code === 'environment_available'))
|
|
121
|
+
})
|
|
122
|
+
|
|
123
|
+
test('eval environment plan blocks gateways with no available execution environments', async () => {
|
|
124
|
+
const plan = await buildEvalEnvironmentPlan(
|
|
125
|
+
{ agentId: 'agent-openclaw', scenarioId: 'coding-prime' },
|
|
126
|
+
{
|
|
127
|
+
loadAgents: () => ({
|
|
128
|
+
'agent-openclaw': makeAgent({
|
|
129
|
+
id: 'agent-openclaw',
|
|
130
|
+
provider: 'openclaw',
|
|
131
|
+
model: 'default',
|
|
132
|
+
gatewayProfileId: 'gateway-1',
|
|
133
|
+
}),
|
|
134
|
+
}),
|
|
135
|
+
listGatewayProfiles: () => [
|
|
136
|
+
makeGateway({
|
|
137
|
+
stats: {
|
|
138
|
+
nodeCount: 1,
|
|
139
|
+
connectedNodeCount: 1,
|
|
140
|
+
environmentCount: 2,
|
|
141
|
+
availableEnvironmentCount: 0,
|
|
142
|
+
pendingNodePairings: 0,
|
|
143
|
+
pendingDevicePairings: 0,
|
|
144
|
+
pairedDeviceCount: 0,
|
|
145
|
+
},
|
|
146
|
+
}),
|
|
147
|
+
],
|
|
148
|
+
},
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
assert.equal(plan.status, 'blocked')
|
|
152
|
+
assert.ok(plan.checks.some((check) => check.code === 'no_available_gateway_environment'))
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
test('eval workspace writer materializes manifests, env hints, and scenario fixtures', async () => {
|
|
156
|
+
const scenario = getScenario('multi-step-analyze')
|
|
157
|
+
assert.ok(scenario)
|
|
158
|
+
const root = fs.mkdtempSync(path.join(os.tmpdir(), 'swarmclaw-eval-env-'))
|
|
159
|
+
const plan = await buildEvalEnvironmentPlan(
|
|
160
|
+
{ agentId: 'agent-1', scenarioId: scenario.id },
|
|
161
|
+
{
|
|
162
|
+
now: () => 999,
|
|
163
|
+
loadAgents: () => ({ 'agent-1': makeAgent() }),
|
|
164
|
+
listGatewayProfiles: () => [],
|
|
165
|
+
},
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
const files = writeEvalEnvironmentWorkspace({
|
|
169
|
+
runId: 'run-1',
|
|
170
|
+
workspacePath: root,
|
|
171
|
+
scenario,
|
|
172
|
+
plan,
|
|
173
|
+
})
|
|
174
|
+
|
|
175
|
+
assert.ok(files.some((file) => file.path === 'package.json' && file.kind === 'fixture'))
|
|
176
|
+
assert.ok(fs.existsSync(path.join(root, 'README.md')))
|
|
177
|
+
assert.ok(fs.existsSync(path.join(root, 'environment.json')))
|
|
178
|
+
assert.ok(fs.existsSync(path.join(root, '.env.swarmclaw-eval')))
|
|
179
|
+
const fixture = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8')) as { dependencies?: Record<string, string> }
|
|
180
|
+
assert.equal(fixture.dependencies?.zod, '^4.1.13')
|
|
181
|
+
assert.ok(fs.readFileSync(path.join(root, '.env.swarmclaw-eval'), 'utf8').includes('SWARMCLAW_EVAL_RUN_ID="run-1"'))
|
|
182
|
+
})
|
|
183
|
+
|
|
184
|
+
test('eval workspace writer refuses fixture paths outside the eval workspace', () => {
|
|
185
|
+
const root = fs.mkdtempSync(path.join(os.tmpdir(), 'swarmclaw-eval-unsafe-'))
|
|
186
|
+
const scenario: EvalScenario = {
|
|
187
|
+
id: 'unsafe-fixture',
|
|
188
|
+
name: 'Unsafe Fixture',
|
|
189
|
+
category: 'coding',
|
|
190
|
+
description: 'Unsafe fixture path test',
|
|
191
|
+
userMessage: 'noop',
|
|
192
|
+
expectedBehaviors: [],
|
|
193
|
+
scoringCriteria: [],
|
|
194
|
+
timeoutMs: 1,
|
|
195
|
+
tools: [],
|
|
196
|
+
fixtures: [{ path: '../outside.txt', content: 'nope' }],
|
|
197
|
+
}
|
|
198
|
+
const plan: EvalEnvironmentPlan = {
|
|
199
|
+
generatedAt: 1,
|
|
200
|
+
status: 'ready',
|
|
201
|
+
agentId: 'agent-1',
|
|
202
|
+
agentName: 'Eval Agent',
|
|
203
|
+
scenarioIds: [scenario.id],
|
|
204
|
+
suite: null,
|
|
205
|
+
target: null,
|
|
206
|
+
checks: [],
|
|
207
|
+
requiredTools: [],
|
|
208
|
+
missingTools: [],
|
|
209
|
+
maxScore: 0,
|
|
210
|
+
timeoutMs: 1,
|
|
211
|
+
generatedFiles: [],
|
|
212
|
+
envHints: [],
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
assert.throws(() => writeEvalEnvironmentWorkspace({
|
|
216
|
+
runId: 'run-unsafe',
|
|
217
|
+
workspacePath: root,
|
|
218
|
+
scenario,
|
|
219
|
+
plan,
|
|
220
|
+
}), /Unsafe eval fixture path/)
|
|
221
|
+
})
|