@workbench-ai/workbench-core 0.0.67 → 0.0.68
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execution-graph.d.ts +4 -3
- package/dist/execution-graph.d.ts.map +1 -1
- package/dist/execution-graph.js +15 -14
- package/dist/execution-jobs.d.ts +5 -20
- package/dist/execution-jobs.d.ts.map +1 -1
- package/dist/execution-jobs.js +7 -91
- package/dist/execution-outputs.d.ts +2 -2
- package/dist/execution-outputs.d.ts.map +1 -1
- package/dist/execution-outputs.js +10 -10
- package/dist/execution-runtime-types.d.ts +1 -1
- package/dist/execution-runtime-types.d.ts.map +1 -1
- package/dist/execution-scheduler.d.ts.map +1 -1
- package/dist/execution-scheduler.js +4 -1
- package/dist/execution-traces.js +1 -1
- package/dist/generic-spec.d.ts +29 -29
- package/dist/generic-spec.d.ts.map +1 -1
- package/dist/generic-spec.js +94 -92
- package/dist/index.d.ts +325 -220
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5716 -3900
- package/dist/runtime-dockerfile.d.ts +1 -1
- package/dist/runtime-dockerfile.d.ts.map +1 -1
- package/dist/runtime-dockerfile.js +4 -4
- package/dist/runtime-utils.d.ts +1 -1
- package/dist/runtime-utils.d.ts.map +1 -1
- package/dist/runtime-utils.js +3 -3
- package/dist/sandbox-backends/docker.js +7 -5
- package/dist/sandbox-inputs.js +3 -3
- package/dist/sandbox-plane.d.ts.map +1 -1
- package/dist/sandbox-plane.js +13 -9
- package/dist/skill-patch.d.ts +8 -0
- package/dist/skill-patch.d.ts.map +1 -0
- package/dist/{candidate-patch.js → skill-patch.js} +5 -5
- package/package.json +3 -3
- package/worker/sandbox-adapter-runner.cjs +2 -2
- package/dist/candidate-patch.d.ts +0 -8
- package/dist/candidate-patch.d.ts.map +0 -1
- package/dist/execution-evidence.d.ts +0 -22
- package/dist/execution-evidence.d.ts.map +0 -1
- package/dist/execution-evidence.js +0 -302
- package/dist/inspection.d.ts +0 -117
- package/dist/inspection.d.ts.map +0 -1
- package/dist/inspection.js +0 -224
package/dist/generic-spec.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { createHash } from "node:crypto";
|
|
2
2
|
import { isWorkbenchExecutionNetworkEgress, } from "@workbench-ai/workbench-contract";
|
|
3
3
|
import YAML from "yaml";
|
|
4
|
-
export const
|
|
5
|
-
export const
|
|
4
|
+
export const EVAL_SPEC_FILE = "eval.yaml";
|
|
5
|
+
export const SKILL_SPEC_FILE = "skill.yaml";
|
|
6
6
|
export const DEFAULT_EXECUTION_RESOURCES = {
|
|
7
7
|
cpu: 2,
|
|
8
8
|
memoryGb: 4,
|
|
@@ -14,7 +14,7 @@ export function validateWorkbenchResolvedSourceYaml(source) {
|
|
|
14
14
|
const warnings = [];
|
|
15
15
|
const trimmed = source.trim();
|
|
16
16
|
if (!trimmed) {
|
|
17
|
-
errors.push("Resolved Workbench
|
|
17
|
+
errors.push("Resolved Workbench spec cannot be empty.");
|
|
18
18
|
}
|
|
19
19
|
if (trimmed) {
|
|
20
20
|
try {
|
|
@@ -31,25 +31,25 @@ export function validateWorkbenchResolvedSourceYaml(source) {
|
|
|
31
31
|
};
|
|
32
32
|
}
|
|
33
33
|
export function resolveWorkbenchResolvedSourceYaml(source) {
|
|
34
|
-
const parsed = parseYamlRecord(source, "resolved Workbench
|
|
34
|
+
const parsed = parseYamlRecord(source, "resolved Workbench spec");
|
|
35
35
|
const errors = [];
|
|
36
|
-
rejectUnknownKeys(parsed, "resolved Workbench
|
|
36
|
+
rejectUnknownKeys(parsed, "resolved Workbench spec", [
|
|
37
37
|
"version",
|
|
38
|
-
"
|
|
39
|
-
"
|
|
38
|
+
"eval",
|
|
39
|
+
"skill",
|
|
40
40
|
], errors);
|
|
41
41
|
if (parsed.version !== 4) {
|
|
42
|
-
throw new Error("Resolved Workbench
|
|
42
|
+
throw new Error("Resolved Workbench spec version must be 4.");
|
|
43
43
|
}
|
|
44
|
-
const
|
|
45
|
-
const
|
|
44
|
+
const evalSpec = normalizeEvalRecord(readRequiredRecord(parsed.eval, EVAL_SPEC_FILE, errors), EVAL_SPEC_FILE, "resolved", errors);
|
|
45
|
+
const skill = normalizeSkillRecord(readRequiredRecord(parsed.skill, "resolved Workbench spec.skill", errors), "resolved Workbench spec.skill", "resolved", errors);
|
|
46
46
|
if (errors.length > 0) {
|
|
47
47
|
throw new Error(errors.join("\n"));
|
|
48
48
|
}
|
|
49
49
|
return genericSpecFromAuthoredBundle({
|
|
50
50
|
version: 4,
|
|
51
|
-
|
|
52
|
-
|
|
51
|
+
eval: evalSpec,
|
|
52
|
+
skill: skill,
|
|
53
53
|
});
|
|
54
54
|
}
|
|
55
55
|
export function engineResolveBindingForSourceYaml(source) {
|
|
@@ -58,7 +58,7 @@ export function engineResolveBindingForSourceYaml(source) {
|
|
|
58
58
|
export function engineResolveBindingForSpec(spec) {
|
|
59
59
|
const resolver = engineResolveInvocationForSpec(spec);
|
|
60
60
|
return {
|
|
61
|
-
engine: spec.
|
|
61
|
+
engine: spec.eval.engine.use,
|
|
62
62
|
resolver: {
|
|
63
63
|
use: resolver.use,
|
|
64
64
|
withFingerprint: fingerprintJson(resolver.with ?? {}),
|
|
@@ -67,29 +67,29 @@ export function engineResolveBindingForSpec(spec) {
|
|
|
67
67
|
}
|
|
68
68
|
export function resolveWorkbenchSourceFiles(args) {
|
|
69
69
|
return genericSpecFromAuthoredBundle(parseWorkbenchSourceFiles({
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
70
|
+
evalSource: args.evalSource,
|
|
71
|
+
skillSource: args.skillSource,
|
|
72
|
+
selectedAgentId: args.selectedAgentId,
|
|
73
73
|
}));
|
|
74
74
|
}
|
|
75
75
|
export function parseWorkbenchSourceFiles(args) {
|
|
76
76
|
const errors = [];
|
|
77
|
-
const
|
|
78
|
-
const
|
|
77
|
+
const evalSpec = normalizeEvalRecord(parseYamlRecord(args.evalSource, EVAL_SPEC_FILE), EVAL_SPEC_FILE, "authored", errors);
|
|
78
|
+
const skill = normalizeSkillRecord(parseYamlRecord(args.skillSource ?? "", "skill YAML"), "skill YAML", "authored", errors, args.selectedAgentId ?? undefined);
|
|
79
79
|
if (errors.length > 0) {
|
|
80
80
|
throw new Error(errors.join("\n"));
|
|
81
81
|
}
|
|
82
82
|
return {
|
|
83
83
|
version: 4,
|
|
84
|
-
|
|
85
|
-
|
|
84
|
+
eval: evalSpec,
|
|
85
|
+
skill: skill,
|
|
86
86
|
};
|
|
87
87
|
}
|
|
88
88
|
export function serializeWorkbenchResolvedSourceYaml(source) {
|
|
89
89
|
return YAML.stringify(source).trimEnd() + "\n";
|
|
90
90
|
}
|
|
91
|
-
export function
|
|
92
|
-
return /^
|
|
91
|
+
export function isWorkbenchSkillManifestPath(filePath) {
|
|
92
|
+
return /^skills\/[^/]+\/skill\.ya?ml$/iu.test(filePath.replace(/\\/gu, "/").replace(/^\/+/u, "").replace(/^(?:\.\/)+/u, ""));
|
|
93
93
|
}
|
|
94
94
|
export function resolveEngineCaseExecutionConfig(args) {
|
|
95
95
|
return {
|
|
@@ -131,38 +131,38 @@ export function runtimeSandboxRef(runtime) {
|
|
|
131
131
|
return `dockerfile://${runtime.dockerfile}`;
|
|
132
132
|
}
|
|
133
133
|
function genericSpecFromAuthoredBundle(source) {
|
|
134
|
-
const engineRuntime = engineRuntimeFromConfig(source.
|
|
135
|
-
const engineRun = cloneEngineInvocation(source.
|
|
136
|
-
const engineResolve = cloneEngineInvocation(source.
|
|
137
|
-
const
|
|
138
|
-
const
|
|
139
|
-
if (!
|
|
140
|
-
throw new Error(`
|
|
134
|
+
const engineRuntime = engineRuntimeFromConfig(source.eval.engine);
|
|
135
|
+
const engineRun = cloneEngineInvocation(source.eval.engine);
|
|
136
|
+
const engineResolve = cloneEngineInvocation(source.eval.engine);
|
|
137
|
+
const skill = source.skill;
|
|
138
|
+
const selectedAgent = skill.agents[skill.selectedAgentId];
|
|
139
|
+
if (!selectedAgent) {
|
|
140
|
+
throw new Error(`Skill agent not found: ${skill.selectedAgentId}`);
|
|
141
141
|
}
|
|
142
142
|
return {
|
|
143
143
|
version: 4,
|
|
144
|
-
name: source.
|
|
145
|
-
description: source.
|
|
146
|
-
|
|
147
|
-
name: source.
|
|
148
|
-
description: source.
|
|
149
|
-
engine: cloneJson(source.
|
|
144
|
+
name: source.eval.name,
|
|
145
|
+
description: source.eval.description,
|
|
146
|
+
eval: {
|
|
147
|
+
name: source.eval.name,
|
|
148
|
+
description: source.eval.description,
|
|
149
|
+
engine: cloneJson(source.eval.engine),
|
|
150
150
|
},
|
|
151
|
-
|
|
152
|
-
name:
|
|
153
|
-
...(
|
|
154
|
-
files: cloneJson(
|
|
155
|
-
...(
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
...(
|
|
151
|
+
skill: {
|
|
152
|
+
name: skill.name,
|
|
153
|
+
...(skill.description ? { description: skill.description } : {}),
|
|
154
|
+
files: cloneJson(skill.files),
|
|
155
|
+
...(skill.prepare ? { prepare: cloneJson(skill.prepare) } : {}),
|
|
156
|
+
defaultAgent: skill.defaultAgent ?? skill.selectedAgentId,
|
|
157
|
+
selectedAgentId: skill.selectedAgentId,
|
|
158
|
+
selectedAgentName: selectedAgent.name,
|
|
159
|
+
agents: cloneJson(skill.agents),
|
|
160
|
+
...(skill.improve
|
|
161
161
|
? {
|
|
162
162
|
improve: {
|
|
163
|
-
edits: [...
|
|
164
|
-
...(
|
|
165
|
-
...(
|
|
163
|
+
edits: [...skill.improve.edits],
|
|
164
|
+
...(skill.improve.optimizeOn ? { optimizeOn: cloneJson(skill.improve.optimizeOn) } : {}),
|
|
165
|
+
...(skill.improve.selectBy ? { selectBy: cloneJson(skill.improve.selectBy) } : {}),
|
|
166
166
|
},
|
|
167
167
|
}
|
|
168
168
|
: {}),
|
|
@@ -170,18 +170,18 @@ function genericSpecFromAuthoredBundle(source) {
|
|
|
170
170
|
environment: cloneJson(engineRuntime),
|
|
171
171
|
adapters: [
|
|
172
172
|
...new Set([
|
|
173
|
-
...source.
|
|
174
|
-
...
|
|
173
|
+
...source.eval.adapters,
|
|
174
|
+
...skill.adapters,
|
|
175
175
|
]),
|
|
176
176
|
],
|
|
177
|
-
engine: cloneJson(source.
|
|
177
|
+
engine: cloneJson(source.eval.engine),
|
|
178
178
|
engineResolve: cloneJson(engineResolve),
|
|
179
|
-
...(
|
|
180
|
-
run: clonePhaseAdapter(
|
|
179
|
+
...(skill.improve ? { improve: clonePhaseAdapter(skill.improve) } : {}),
|
|
180
|
+
run: clonePhaseAdapter(selectedAgent),
|
|
181
181
|
engineRun: cloneJson(engineRun),
|
|
182
182
|
};
|
|
183
183
|
}
|
|
184
|
-
function
|
|
184
|
+
function normalizeEvalRecord(record, label, mode, errors) {
|
|
185
185
|
if (!record) {
|
|
186
186
|
return null;
|
|
187
187
|
}
|
|
@@ -192,7 +192,7 @@ function normalizeBenchmarkRecord(record, label, errors) {
|
|
|
192
192
|
"adapters",
|
|
193
193
|
"engine",
|
|
194
194
|
], errors);
|
|
195
|
-
|
|
195
|
+
requireSpecVersion(record.version, label, mode === "authored" ? 1 : 4, errors);
|
|
196
196
|
const name = readRequiredString(record.name, `${label}.name`, errors);
|
|
197
197
|
const description = readRequiredString(record.description, `${label}.description`, errors);
|
|
198
198
|
const adapters = normalizeAdapterSources(record.adapters, `${label}.adapters`, errors);
|
|
@@ -222,7 +222,7 @@ function normalizeEngineRuntimeConfig(engine, label, errors) {
|
|
|
222
222
|
}
|
|
223
223
|
}
|
|
224
224
|
}
|
|
225
|
-
function
|
|
225
|
+
function normalizeSkillRecord(record, label, mode, errors, selectedAgentId) {
|
|
226
226
|
if (!record) {
|
|
227
227
|
return null;
|
|
228
228
|
}
|
|
@@ -233,26 +233,28 @@ function normalizeCandidateRecord(record, label, errors, selectedRunId) {
|
|
|
233
233
|
"files",
|
|
234
234
|
"prepare",
|
|
235
235
|
"adapters",
|
|
236
|
-
"
|
|
237
|
-
"
|
|
236
|
+
"defaultAgent",
|
|
237
|
+
"agents",
|
|
238
|
+
...(mode === "resolved" ? ["selectedAgentId"] : []),
|
|
238
239
|
"improve",
|
|
239
|
-
"selectedRunId",
|
|
240
240
|
], errors);
|
|
241
|
-
|
|
241
|
+
requireSpecVersion(record.version, label, mode === "authored" ? 1 : 4, errors);
|
|
242
242
|
const name = readRequiredString(record.name, `${label}.name`, errors);
|
|
243
243
|
const description = readOptionalString(record.description, `${label}.description`, errors);
|
|
244
244
|
const files = normalizePathRef(record.files, `${label}.files`, errors);
|
|
245
|
-
const prepare =
|
|
245
|
+
const prepare = normalizeSkillPrepare(record.prepare, `${label}.prepare`, errors);
|
|
246
246
|
const adapters = normalizeAdapterSources(record.adapters, `${label}.adapters`, errors);
|
|
247
|
-
const
|
|
248
|
-
const
|
|
249
|
-
const
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
247
|
+
const agents = normalizeSkillAgents(record.agents, `${label}.agents`, errors);
|
|
248
|
+
const defaultAgent = readOptionalString(record.defaultAgent, `${label}.defaultAgent`, errors);
|
|
249
|
+
const embeddedSelectedAgent = mode === "resolved"
|
|
250
|
+
? readOptionalString(record.selectedAgentId, `${label}.selectedAgentId`, errors)
|
|
251
|
+
: undefined;
|
|
252
|
+
const selected = selectedAgentId ?? embeddedSelectedAgent ?? defaultAgent ?? Object.keys(agents).sort()[0];
|
|
253
|
+
if (selected && !agents[selected]) {
|
|
254
|
+
errors.push(`${label}.${mode === "authored" ? "defaultAgent" : "selectedAgentId"} references unknown agent ${selected}.`);
|
|
255
|
+
}
|
|
256
|
+
const improve = normalizeSkillImprove(record.improve, `${label}.improve`, errors);
|
|
257
|
+
return name && files && selected && Object.keys(agents).length > 0
|
|
256
258
|
? {
|
|
257
259
|
version: 4,
|
|
258
260
|
name,
|
|
@@ -260,14 +262,14 @@ function normalizeCandidateRecord(record, label, errors, selectedRunId) {
|
|
|
260
262
|
files,
|
|
261
263
|
...(prepare ? { prepare } : {}),
|
|
262
264
|
adapters,
|
|
263
|
-
...(
|
|
264
|
-
|
|
265
|
+
...(defaultAgent ? { defaultAgent } : {}),
|
|
266
|
+
agents,
|
|
265
267
|
...(improve ? { improve } : {}),
|
|
266
|
-
|
|
268
|
+
selectedAgentId: selected,
|
|
267
269
|
}
|
|
268
270
|
: null;
|
|
269
271
|
}
|
|
270
|
-
function
|
|
272
|
+
function normalizeSkillPrepare(value, label, errors) {
|
|
271
273
|
if (value === undefined) {
|
|
272
274
|
return undefined;
|
|
273
275
|
}
|
|
@@ -279,37 +281,37 @@ function normalizeCandidatePrepare(value, label, errors) {
|
|
|
279
281
|
const command = readRequiredString(record.command, `${label}.command`, errors);
|
|
280
282
|
return command ? { command } : undefined;
|
|
281
283
|
}
|
|
282
|
-
function
|
|
284
|
+
function normalizeSkillAgents(value, label, errors) {
|
|
283
285
|
const record = readRequiredRecord(value, label, errors);
|
|
284
286
|
if (!record) {
|
|
285
287
|
return {};
|
|
286
288
|
}
|
|
287
|
-
const
|
|
288
|
-
for (const [
|
|
289
|
-
if (!/^[a-zA-Z0-9][a-zA-Z0-9._-]*$/u.test(
|
|
290
|
-
errors.push(`${label}.${
|
|
289
|
+
const agents = {};
|
|
290
|
+
for (const [agentId, agentValue] of Object.entries(record).sort(([left], [right]) => left.localeCompare(right))) {
|
|
291
|
+
if (!/^[a-zA-Z0-9][a-zA-Z0-9._-]*$/u.test(agentId)) {
|
|
292
|
+
errors.push(`${label}.${agentId} must use letters, numbers, dots, underscores, or dashes.`);
|
|
291
293
|
continue;
|
|
292
294
|
}
|
|
293
|
-
const
|
|
294
|
-
if (!
|
|
295
|
+
const agentRecord = readRequiredRecord(agentValue, `${label}.${agentId}`, errors);
|
|
296
|
+
if (!agentRecord) {
|
|
295
297
|
continue;
|
|
296
298
|
}
|
|
297
|
-
rejectUnknownKeys(
|
|
298
|
-
const name = readRequiredString(
|
|
299
|
-
const invocation = normalizePhaseAdapter(adapterRecordFrom(
|
|
299
|
+
rejectUnknownKeys(agentRecord, `${label}.${agentId}`, ["name", "use", "with", "auth"], errors);
|
|
300
|
+
const name = readRequiredString(agentRecord.name, `${label}.${agentId}.name`, errors);
|
|
301
|
+
const invocation = normalizePhaseAdapter(adapterRecordFrom(agentRecord), `${label}.${agentId}`, errors);
|
|
300
302
|
if (name && invocation) {
|
|
301
|
-
|
|
303
|
+
agents[agentId] = {
|
|
302
304
|
name,
|
|
303
305
|
...invocation,
|
|
304
306
|
};
|
|
305
307
|
}
|
|
306
308
|
}
|
|
307
|
-
if (Object.keys(
|
|
308
|
-
errors.push(`${label} must declare at least one
|
|
309
|
+
if (Object.keys(agents).length === 0) {
|
|
310
|
+
errors.push(`${label} must declare at least one agent.`);
|
|
309
311
|
}
|
|
310
|
-
return
|
|
312
|
+
return agents;
|
|
311
313
|
}
|
|
312
|
-
function
|
|
314
|
+
function normalizeSkillImprove(value, label, errors) {
|
|
313
315
|
if (value === undefined) {
|
|
314
316
|
return undefined;
|
|
315
317
|
}
|
|
@@ -385,9 +387,9 @@ function adapterRecordFrom(record) {
|
|
|
385
387
|
...(record.auth !== undefined ? { auth: record.auth } : {}),
|
|
386
388
|
};
|
|
387
389
|
}
|
|
388
|
-
function
|
|
389
|
-
if (value !==
|
|
390
|
-
errors.push(`${label}.version must be
|
|
390
|
+
function requireSpecVersion(value, label, version, errors) {
|
|
391
|
+
if (value !== version) {
|
|
392
|
+
errors.push(`${label}.version must be ${version}.`);
|
|
391
393
|
}
|
|
392
394
|
}
|
|
393
395
|
function normalizeRuntime(value, label, errors) {
|