@workbench-ai/workbench-built-in-adapters 0.0.68 → 0.0.69
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-turn.js +2 -2
- package/dist/execute.d.ts.map +1 -1
- package/dist/execute.js +95 -205
- package/dist/runtime.js +4 -4
- package/package.json +6 -6
package/dist/agent-turn.js
CHANGED
|
@@ -244,9 +244,9 @@ function defaultWorkbenchAgentConfig(provider, providerName) {
|
|
|
244
244
|
};
|
|
245
245
|
}
|
|
246
246
|
async function resolveAgentAuth(provider, providerSpec, workspaceRoot, agentHome, adapterAuth) {
|
|
247
|
-
const
|
|
247
|
+
const authConfig = adapterAuthProviderOption(adapterAuth.request, providerSpec.use) ??
|
|
248
248
|
(provider.manifest.defaults.auth ?? {});
|
|
249
|
-
const parsed = provider.schemas.auth.safeParse(
|
|
249
|
+
const parsed = provider.schemas.auth.safeParse(authConfig);
|
|
250
250
|
if (!parsed.success) {
|
|
251
251
|
throw new Error(`Agent provider "${provider.manifest.id}" auth is invalid: ${formatValidationIssues(parsed.error.issues)}`);
|
|
252
252
|
}
|
package/dist/execute.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"execute.d.ts","sourceRoot":"","sources":["../src/execute.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAEV,IAAI,EAKL,MAAM,kCAAkC,CAAC;
|
|
1
|
+
{"version":3,"file":"execute.d.ts","sourceRoot":"","sources":["../src/execute.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAEV,IAAI,EAKL,MAAM,kCAAkC,CAAC;AAqB1C,OAAO,KAAK,EAEV,0BAA0B,EAG3B,MAAM,iBAAiB,CAAC;AAOzB,MAAM,WAAW,4CAA4C;IAC3D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,0BAA0B,CAAC;IAC3C,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,kBAAkB,CAAC,EAAE,IAAI,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;AA2DD,wBAAsB,qCAAqC,CACzD,IAAI,GAAE,4CAAiD,GACtD,OAAO,CAAC,IAAI,CAAC,CAyDf"}
|
package/dist/execute.js
CHANGED
|
@@ -2,10 +2,10 @@ import { spawn } from "node:child_process";
|
|
|
2
2
|
import { promises as fs } from "node:fs";
|
|
3
3
|
import os from "node:os";
|
|
4
4
|
import path from "node:path";
|
|
5
|
-
import { jsonRecord, normalizeRelativePath, readSurfaceFiles, writeSurfaceFiles, } from "@workbench-ai/workbench-core";
|
|
6
|
-
import { ensureWorkbenchAdapterOutputDir, readWorkbenchAdapterOperationResult, readWorkbenchAdapterOperationRequest,
|
|
5
|
+
import { createWorkbenchExecutionEventPublisher, jsonRecord, normalizeRelativePath, publishCommandStepEvent, readSurfaceFiles, writeSurfaceFiles, } from "@workbench-ai/workbench-core";
|
|
6
|
+
import { ensureWorkbenchAdapterOutputDir, readWorkbenchAdapterOperationResult, readWorkbenchAdapterOperationRequest, writeWorkbenchAdapterOperationResult, workbenchAdapterOperationResultPath, } from "@workbench-ai/workbench-protocol";
|
|
7
7
|
import YAML from "yaml";
|
|
8
|
-
import { isWorkbenchBuiltInAdapterId,
|
|
8
|
+
import { isWorkbenchBuiltInAdapterId, } from "./manifests.js";
|
|
9
9
|
import { importWorkbenchRuntime } from "./runtime.js";
|
|
10
10
|
const DIRECT_ADAPTER_HANDLERS = {
|
|
11
11
|
command: executeCommandAdapterRequest,
|
|
@@ -33,10 +33,19 @@ export async function executeWorkbenchBuiltInAdapterCommand(args = {}) {
|
|
|
33
33
|
adapterAuthRoot: args.adapterAuthRoot,
|
|
34
34
|
adapterAuthRequest: args.adapterAuthRequest ?? request.auth,
|
|
35
35
|
adapterAuthEnv: args.adapterAuthEnv,
|
|
36
|
+
eventPublisher: eventPublisherForAdapterRequest(request),
|
|
36
37
|
};
|
|
37
38
|
const directHandler = DIRECT_ADAPTER_HANDLERS[adapterId];
|
|
38
39
|
if (directHandler) {
|
|
39
|
-
await
|
|
40
|
+
await publishDirectAdapterStep(agentOptions.eventPublisher, adapterId, request, "started");
|
|
41
|
+
try {
|
|
42
|
+
await directHandler(request);
|
|
43
|
+
await publishDirectAdapterStep(agentOptions.eventPublisher, adapterId, request, "succeeded");
|
|
44
|
+
}
|
|
45
|
+
catch (error) {
|
|
46
|
+
await publishDirectAdapterStep(agentOptions.eventPublisher, adapterId, request, "failed", error);
|
|
47
|
+
throw error;
|
|
48
|
+
}
|
|
40
49
|
return;
|
|
41
50
|
}
|
|
42
51
|
if (adapterId === "rubric") {
|
|
@@ -60,6 +69,23 @@ export async function executeWorkbenchBuiltInAdapterCommand(args = {}) {
|
|
|
60
69
|
throw new Error(`Agent adapter ${adapterId} cannot handle ${request.operation}.`);
|
|
61
70
|
}
|
|
62
71
|
}
|
|
72
|
+
async function publishDirectAdapterStep(publisher, adapterId, request, status, error) {
|
|
73
|
+
await publishCommandStepEvent(publisher, {
|
|
74
|
+
step: `${adapterId}.${request.operation}`,
|
|
75
|
+
status,
|
|
76
|
+
role: directAdapterProgressRole(request.operation),
|
|
77
|
+
...(error ? { error: error instanceof Error ? error.message : String(error) } : {}),
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
function directAdapterProgressRole(operation) {
|
|
81
|
+
if (operation === "skill.run") {
|
|
82
|
+
return "runner";
|
|
83
|
+
}
|
|
84
|
+
if (operation === "skill.improve") {
|
|
85
|
+
return "improver";
|
|
86
|
+
}
|
|
87
|
+
return "engine";
|
|
88
|
+
}
|
|
63
89
|
async function executeWorkbenchEngineRequest(request) {
|
|
64
90
|
if (request.operation === "engine.resolve") {
|
|
65
91
|
await executeWorkbenchEngineResolveRequest(request);
|
|
@@ -92,42 +118,8 @@ async function executeWorkbenchEngineResolveRequest(request) {
|
|
|
92
118
|
});
|
|
93
119
|
}
|
|
94
120
|
async function executeWorkbenchEngineRunRequest(request) {
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
: await runWorkbenchEngineSharedGrading(request);
|
|
98
|
-
if (!outcome.result) {
|
|
99
|
-
throw new Error("Workbench engine scoring completed without an engine result.");
|
|
100
|
-
}
|
|
101
|
-
await writeSurfaceFiles(request.paths.output, outcome.files.map((file) => remapRuntimeControlTraceFile(request, file)));
|
|
102
|
-
const usage = await workbenchEngineOutcomeUsage(outcome);
|
|
103
|
-
await writeWorkbenchAdapterOperationResult(request.paths.output, {
|
|
104
|
-
protocol: "workbench.adapter-result.v1",
|
|
105
|
-
operation: "engine.run",
|
|
106
|
-
ok: true,
|
|
107
|
-
value: outcome.result,
|
|
108
|
-
...(usage ? { usage } : {}),
|
|
109
|
-
...(outcome.summary !== undefined ? { summary: outcome.summary } : {}),
|
|
110
|
-
...(outcome.feedback !== undefined ? { feedback: outcome.feedback } : {}),
|
|
111
|
-
});
|
|
112
|
-
}
|
|
113
|
-
async function workbenchEngineOutcomeUsage(outcome) {
|
|
114
|
-
const runtime = await importWorkbenchRuntime();
|
|
115
|
-
const operationUsage = outcome.usage
|
|
116
|
-
? undefined
|
|
117
|
-
: runtime.mergeUsageSummaries(outcome.operationResults.map((result) => {
|
|
118
|
-
if (result.operation === "skill.run") {
|
|
119
|
-
return runtime.assignUsageRole("runner", result.usage);
|
|
120
|
-
}
|
|
121
|
-
if (result.operation === "engine.run") {
|
|
122
|
-
return runtime.assignUsageRole("engine", result.usage);
|
|
123
|
-
}
|
|
124
|
-
return result.usage;
|
|
125
|
-
}));
|
|
126
|
-
const runtimeUsage = runtime.mergeUsageSummaries([outcome.usage, operationUsage]);
|
|
127
|
-
const resultUsage = runtimeUsage?.engine
|
|
128
|
-
? undefined
|
|
129
|
-
: runtime.assignUsageRole("engine", outcome.result?.usage);
|
|
130
|
-
return runtime.mergeUsageSummaries([runtimeUsage, resultUsage]);
|
|
121
|
+
void request;
|
|
122
|
+
throw new Error("Workbench engine.run is no longer an orchestration adapter. Run the selected skill in core and invoke the score adapter directly.");
|
|
131
123
|
}
|
|
132
124
|
function workbenchEngineCasesPath(request) {
|
|
133
125
|
const config = adapterCommandConfigRecord(request);
|
|
@@ -141,139 +133,6 @@ function workbenchEngineCasesPath(request) {
|
|
|
141
133
|
}
|
|
142
134
|
throw new Error("Workbench engine cases must be an object with path.");
|
|
143
135
|
}
|
|
144
|
-
function workbenchEngineScoreInvocation(request) {
|
|
145
|
-
const score = jsonRecord(adapterCommandConfigRecord(request).score);
|
|
146
|
-
if (!score || typeof score.use !== "string" || score.use.length === 0) {
|
|
147
|
-
throw new Error("Workbench engine requires invocation.with.score.use.");
|
|
148
|
-
}
|
|
149
|
-
return {
|
|
150
|
-
use: score.use,
|
|
151
|
-
with: (score.with ?? {}),
|
|
152
|
-
...(score.auth !== undefined ? { auth: score.auth } : {}),
|
|
153
|
-
command: typeof score.command === "string" && score.command.length > 0
|
|
154
|
-
? score.command
|
|
155
|
-
: adapterCommandName(score.use),
|
|
156
|
-
};
|
|
157
|
-
}
|
|
158
|
-
function workbenchEngineSkillInvocation(request) {
|
|
159
|
-
const skill = request.context?.skill?.run;
|
|
160
|
-
if (!skill?.use || !skill.command) {
|
|
161
|
-
throw new Error("Workbench engine requires context.skill.run.use and context.skill.run.command.");
|
|
162
|
-
}
|
|
163
|
-
return {
|
|
164
|
-
use: skill.use,
|
|
165
|
-
with: (skill.with ?? {}),
|
|
166
|
-
...(skill.auth !== undefined ? { auth: skill.auth } : {}),
|
|
167
|
-
command: skill.command,
|
|
168
|
-
};
|
|
169
|
-
}
|
|
170
|
-
async function workbenchEngineGradingIsolation(request) {
|
|
171
|
-
const grading = jsonRecord(adapterCommandConfigRecord(request).grading);
|
|
172
|
-
const isolation = grading?.isolation;
|
|
173
|
-
if (isolation !== undefined &&
|
|
174
|
-
isolation !== "shared" &&
|
|
175
|
-
isolation !== "separate") {
|
|
176
|
-
throw new Error("Workbench engine grading.isolation must be shared or separate.");
|
|
177
|
-
}
|
|
178
|
-
if (await workbenchEnginePrivateFilesPresent(request)) {
|
|
179
|
-
return "separate";
|
|
180
|
-
}
|
|
181
|
-
return isolation ?? "shared";
|
|
182
|
-
}
|
|
183
|
-
async function workbenchEnginePrivateFilesPresent(request) {
|
|
184
|
-
if (!request.paths.enginePrivate) {
|
|
185
|
-
return false;
|
|
186
|
-
}
|
|
187
|
-
const files = await readOptionalSurfaceFiles(request.paths.enginePrivate);
|
|
188
|
-
return files.length > 0;
|
|
189
|
-
}
|
|
190
|
-
async function runWorkbenchEngineSharedGrading(request) {
|
|
191
|
-
const inputs = await workbenchEngineRuntimeInputs(request);
|
|
192
|
-
const skill = workbenchEngineSkillInvocation(request);
|
|
193
|
-
const score = workbenchEngineScoreInvocation(request);
|
|
194
|
-
const result = await runWorkbenchRuntimeOperationSequence({
|
|
195
|
-
inputs,
|
|
196
|
-
prepare: true,
|
|
197
|
-
operations: [
|
|
198
|
-
{ label: "skill", operation: "skill.run", invocation: skill },
|
|
199
|
-
{ label: "score", operation: "engine.run", invocation: score },
|
|
200
|
-
],
|
|
201
|
-
});
|
|
202
|
-
assertRuntimeControlResultOk(result, "Workbench shared grading");
|
|
203
|
-
return result;
|
|
204
|
-
}
|
|
205
|
-
async function runWorkbenchEngineSeparateGrading(request) {
|
|
206
|
-
const inputs = await workbenchEngineRuntimeInputs(request);
|
|
207
|
-
const skill = workbenchEngineSkillInvocation(request);
|
|
208
|
-
const score = workbenchEngineScoreInvocation(request);
|
|
209
|
-
const runtime = await importWorkbenchRuntime();
|
|
210
|
-
const runner = await runWorkbenchRuntimeOperationSequence({
|
|
211
|
-
inputs: {
|
|
212
|
-
skill: inputs.skill,
|
|
213
|
-
case: inputs.case,
|
|
214
|
-
traces: inputs.traces,
|
|
215
|
-
},
|
|
216
|
-
prepare: true,
|
|
217
|
-
collectWorkspace: true,
|
|
218
|
-
operations: [
|
|
219
|
-
{ label: "skill", operation: "skill.run", invocation: skill },
|
|
220
|
-
],
|
|
221
|
-
});
|
|
222
|
-
assertRuntimeControlResultOk(runner, "Workbench separate runner");
|
|
223
|
-
const grader = await runWorkbenchRuntimeOperationSequence({
|
|
224
|
-
inputs: {
|
|
225
|
-
skill: inputs.skill,
|
|
226
|
-
case: inputs.case,
|
|
227
|
-
enginePrivate: inputs.enginePrivate,
|
|
228
|
-
traces: inputs.traces,
|
|
229
|
-
workspace: runner.workspaceFiles ?? [],
|
|
230
|
-
output: runner.files.filter((file) => !runtime.isWorkbenchInternalOutputPath(file.path)),
|
|
231
|
-
},
|
|
232
|
-
prepare: false,
|
|
233
|
-
operations: [
|
|
234
|
-
{ label: "score", operation: "engine.run", invocation: score },
|
|
235
|
-
],
|
|
236
|
-
});
|
|
237
|
-
assertRuntimeControlResultOk(grader, "Workbench separate grader");
|
|
238
|
-
return {
|
|
239
|
-
...grader,
|
|
240
|
-
files: dedupeSurfaceFiles([...runner.files, ...grader.files]),
|
|
241
|
-
fileChanges: [...new Set([...runner.fileChanges, ...grader.fileChanges])].sort(),
|
|
242
|
-
usage: runtime.mergeUsageSummaries([runner.usage, grader.usage]),
|
|
243
|
-
operationResults: [...runner.operationResults, ...grader.operationResults],
|
|
244
|
-
};
|
|
245
|
-
}
|
|
246
|
-
async function workbenchEngineRuntimeInputs(request) {
|
|
247
|
-
const [skill, caseFiles, enginePrivate, traces] = await Promise.all([
|
|
248
|
-
readOptionalSurfaceFiles(request.paths.skills ?? request.paths.skill),
|
|
249
|
-
readOptionalSurfaceFiles(request.paths.case),
|
|
250
|
-
readOptionalSurfaceFiles(request.paths.enginePrivate),
|
|
251
|
-
readOptionalSurfaceFiles(request.paths.traces),
|
|
252
|
-
]);
|
|
253
|
-
return {
|
|
254
|
-
skill,
|
|
255
|
-
case: caseFiles,
|
|
256
|
-
enginePrivate,
|
|
257
|
-
traces,
|
|
258
|
-
};
|
|
259
|
-
}
|
|
260
|
-
async function readOptionalSurfaceFiles(root) {
|
|
261
|
-
if (!root) {
|
|
262
|
-
return [];
|
|
263
|
-
}
|
|
264
|
-
return await readSurfaceFiles(root).catch((error) => {
|
|
265
|
-
if (error.code === "ENOENT") {
|
|
266
|
-
return [];
|
|
267
|
-
}
|
|
268
|
-
throw error;
|
|
269
|
-
});
|
|
270
|
-
}
|
|
271
|
-
function assertRuntimeControlResultOk(result, label) {
|
|
272
|
-
if (result.ok) {
|
|
273
|
-
return;
|
|
274
|
-
}
|
|
275
|
-
throw new Error(`${label} failed${result.error ? `: ${result.error}` : "."}`);
|
|
276
|
-
}
|
|
277
136
|
function dedupeSurfaceFiles(files) {
|
|
278
137
|
const byPath = new Map();
|
|
279
138
|
for (const file of files) {
|
|
@@ -285,31 +144,13 @@ function dedupeSurfaceFiles(files) {
|
|
|
285
144
|
}
|
|
286
145
|
return [...byPath.values()].sort((left, right) => left.path.localeCompare(right.path));
|
|
287
146
|
}
|
|
288
|
-
function remapRuntimeControlTraceFile(request, file) {
|
|
289
|
-
const normalized = normalizeRelativePath(file.path);
|
|
290
|
-
if (!normalized.startsWith(".workbench/traces/")) {
|
|
291
|
-
return { ...file, path: normalized };
|
|
292
|
-
}
|
|
293
|
-
const segments = normalized.split("/");
|
|
294
|
-
const rest = segments.length >= 6
|
|
295
|
-
? segments.slice(5)
|
|
296
|
-
: segments.length >= 3
|
|
297
|
-
? segments.slice(3)
|
|
298
|
-
: [];
|
|
299
|
-
if (rest.length === 0) {
|
|
300
|
-
return { ...file, path: normalized };
|
|
301
|
-
}
|
|
302
|
-
return {
|
|
303
|
-
...file,
|
|
304
|
-
path: `.workbench/traces/${request.jobId ?? request.id}/${rest.join("/")}`,
|
|
305
|
-
};
|
|
306
|
-
}
|
|
307
147
|
function safeInternalPathSegment(value) {
|
|
308
148
|
const safe = value.replace(/[^a-z0-9._-]+/giu, "_").replace(/^_+|_+$/gu, "");
|
|
309
149
|
return safe || "nested";
|
|
310
150
|
}
|
|
311
151
|
async function executeCommandAdapterRequest(request) {
|
|
312
152
|
const command = requiredAdapterCommandString(request, "command");
|
|
153
|
+
await ensureRunSkillDirectories(request);
|
|
313
154
|
const before = request.operation === "skill.improve"
|
|
314
155
|
? await snapshotEditableSkillWorkspace(request)
|
|
315
156
|
: null;
|
|
@@ -337,6 +178,7 @@ async function executeTestsEngineRequest(request) {
|
|
|
337
178
|
if (request.operation !== "engine.run") {
|
|
338
179
|
throw new Error(`Tests adapter cannot handle ${request.operation}.`);
|
|
339
180
|
}
|
|
181
|
+
await ensureRunSkillDirectories(request);
|
|
340
182
|
const testsRoot = requiredRequestPath(request.paths.enginePrivate, "paths.enginePrivate");
|
|
341
183
|
const verifierRoot = testsVerifierOutputDir(request.paths.output);
|
|
342
184
|
await fs.rm(verifierRoot, { recursive: true, force: true }).catch(() => undefined);
|
|
@@ -344,8 +186,6 @@ async function executeTestsEngineRequest(request) {
|
|
|
344
186
|
const script = await firstExistingFile([
|
|
345
187
|
path.join(testsRoot, "test.sh"),
|
|
346
188
|
path.join(testsRoot, "run.sh"),
|
|
347
|
-
path.join(testsRoot, "tests", "test.sh"),
|
|
348
|
-
path.join(testsRoot, "tests", "run.sh"),
|
|
349
189
|
]);
|
|
350
190
|
if (!script) {
|
|
351
191
|
throw new Error(`Tests engine requires ${path.join(testsRoot, "test.sh")}.`);
|
|
@@ -356,6 +196,7 @@ async function executeTestsEngineRequest(request) {
|
|
|
356
196
|
CASE_DIR: request.paths.case ?? path.join(request.paths.workspace, "input", "case"),
|
|
357
197
|
OUTPUT_DIR: request.paths.output,
|
|
358
198
|
WORKBENCH_TESTS_VERIFIER_DIR: verifierRoot,
|
|
199
|
+
WORKBENCH_CASE_ID: request.context?.attempt?.caseId ?? "current",
|
|
359
200
|
});
|
|
360
201
|
const result = await readTestsResult({
|
|
361
202
|
verifierRoot,
|
|
@@ -407,8 +248,18 @@ function commandAdapterEnvironment(request) {
|
|
|
407
248
|
TRACE_DIR: request.paths.traces ?? path.join(request.paths.workspace, "input", "traces"),
|
|
408
249
|
OUTPUT_DIR: request.paths.output,
|
|
409
250
|
WORKBENCH_SKILL_PATCH: commandSkillPatchPath(request),
|
|
251
|
+
WORKBENCH_CASE_ID: request.context?.attempt?.caseId ?? "current",
|
|
410
252
|
};
|
|
411
253
|
}
|
|
254
|
+
async function ensureRunSkillDirectories(request) {
|
|
255
|
+
if (request.operation === "skill.improve") {
|
|
256
|
+
return;
|
|
257
|
+
}
|
|
258
|
+
await Promise.all([
|
|
259
|
+
request.paths.skills ? fs.mkdir(request.paths.skills, { recursive: true }) : Promise.resolve(),
|
|
260
|
+
request.paths.skill ? fs.mkdir(request.paths.skill, { recursive: true }) : Promise.resolve(),
|
|
261
|
+
]);
|
|
262
|
+
}
|
|
412
263
|
function commandSkillPatchPath(request) {
|
|
413
264
|
return path.join(request.paths.output, COMMAND_SKILL_PATCH_FILE);
|
|
414
265
|
}
|
|
@@ -418,17 +269,25 @@ async function readSkillPatchFile(filePath) {
|
|
|
418
269
|
}
|
|
419
270
|
const record = jsonRecord(JSON.parse(await fs.readFile(filePath, "utf8")));
|
|
420
271
|
const rawFiles = Array.isArray(record.files) ? record.files : [];
|
|
421
|
-
const files = rawFiles.
|
|
272
|
+
const files = rawFiles.map((entry, index) => {
|
|
422
273
|
if (!isPatchSurfaceSnapshotFile(entry)) {
|
|
423
|
-
|
|
274
|
+
throw new Error(`Skill patch file ${filePath} files[${index}] must be an object with string path and content fields, got: ${describePatchEntry(entry)}.`);
|
|
424
275
|
}
|
|
425
|
-
return
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
276
|
+
return {
|
|
277
|
+
...entry,
|
|
278
|
+
path: normalizeRelativePath(entry.path),
|
|
279
|
+
};
|
|
429
280
|
});
|
|
281
|
+
if (record.fileChanges !== undefined && !Array.isArray(record.fileChanges)) {
|
|
282
|
+
throw new Error(`Skill patch file ${filePath} fileChanges must be an array of strings when provided.`);
|
|
283
|
+
}
|
|
430
284
|
const fileChanges = Array.isArray(record.fileChanges)
|
|
431
|
-
? record.fileChanges.
|
|
285
|
+
? record.fileChanges.map((entry, index) => {
|
|
286
|
+
if (typeof entry !== "string") {
|
|
287
|
+
throw new Error(`Skill patch file ${filePath} fileChanges[${index}] must be a string path, got: ${describePatchEntry(entry)}.`);
|
|
288
|
+
}
|
|
289
|
+
return normalizeRelativePath(entry);
|
|
290
|
+
})
|
|
432
291
|
: files.map((file) => file.path);
|
|
433
292
|
return {
|
|
434
293
|
files,
|
|
@@ -437,6 +296,20 @@ async function readSkillPatchFile(filePath) {
|
|
|
437
296
|
...(record.feedback !== undefined ? { feedback: record.feedback } : {}),
|
|
438
297
|
};
|
|
439
298
|
}
|
|
299
|
+
function describePatchEntry(value) {
|
|
300
|
+
if (value === null) {
|
|
301
|
+
return "null";
|
|
302
|
+
}
|
|
303
|
+
if (Array.isArray(value)) {
|
|
304
|
+
return "an array";
|
|
305
|
+
}
|
|
306
|
+
if (typeof value === "object") {
|
|
307
|
+
const record = value;
|
|
308
|
+
const keys = Object.keys(record);
|
|
309
|
+
return `an object with key${keys.length === 1 ? "" : "s"} [${keys.join(", ")}]`;
|
|
310
|
+
}
|
|
311
|
+
return `a ${typeof value}`;
|
|
312
|
+
}
|
|
440
313
|
function isPatchSurfaceSnapshotFile(value) {
|
|
441
314
|
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
442
315
|
return false;
|
|
@@ -636,10 +509,9 @@ async function readTestsResult(args) {
|
|
|
636
509
|
}
|
|
637
510
|
return normalizeTestsResult({ reward: score }, args.caseId);
|
|
638
511
|
}
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
}, args.caseId);
|
|
512
|
+
throw new Error("Tests engine did not find reward.json or reward.txt under its verifier output directory " +
|
|
513
|
+
`(${args.verifierRoot}). The tests script must write a reward to ` +
|
|
514
|
+
"$WORKBENCH_TESTS_VERIFIER_DIR/reward.json or $WORKBENCH_TESTS_VERIFIER_DIR/reward.txt.");
|
|
643
515
|
}
|
|
644
516
|
function testsVerifierOutputDir(outputRoot) {
|
|
645
517
|
return path.join(outputRoot, ".workbench", "internal", "verifier");
|
|
@@ -786,6 +658,19 @@ function requiredAdapterCommandString(request, key) {
|
|
|
786
658
|
}
|
|
787
659
|
return value;
|
|
788
660
|
}
|
|
661
|
+
function eventPublisherForAdapterRequest(request) {
|
|
662
|
+
if (!request.progress) {
|
|
663
|
+
return undefined;
|
|
664
|
+
}
|
|
665
|
+
return createWorkbenchExecutionEventPublisher({
|
|
666
|
+
projectId: request.progress.projectId,
|
|
667
|
+
runId: request.progress.runId,
|
|
668
|
+
jobId: request.progress.jobId,
|
|
669
|
+
executionId: request.progress.executionId,
|
|
670
|
+
attempt: request.progress.attempt,
|
|
671
|
+
target: request.progress.target,
|
|
672
|
+
});
|
|
673
|
+
}
|
|
789
674
|
async function executeBuiltInAgentTurn(executor, request) {
|
|
790
675
|
const { defaultWorkbenchAgentTurnExecutor, executeWorkbenchAgentTurn, } = await import("./agent-turn.js");
|
|
791
676
|
return await executeWorkbenchAgentTurn(executor ?? defaultWorkbenchAgentTurnExecutor, request);
|
|
@@ -806,6 +691,7 @@ async function writeAgentSkillOutput(request, workload, adapter, options = {}) {
|
|
|
806
691
|
prompt: buildAgentSkillPrompt(workload, adapter),
|
|
807
692
|
traceRoot,
|
|
808
693
|
jobId: workload.job.id,
|
|
694
|
+
eventPublisher: options.eventPublisher,
|
|
809
695
|
});
|
|
810
696
|
const outputPath = path.join(request.paths.output, "skill-summary.md");
|
|
811
697
|
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
|
@@ -874,6 +760,7 @@ async function writeAgentSkillRevisionOutput(request, workload, improver, option
|
|
|
874
760
|
prompt: buildAgentImproverPrompt(workload),
|
|
875
761
|
traceRoot,
|
|
876
762
|
jobId: workload.job.id,
|
|
763
|
+
eventPublisher: options.eventPublisher,
|
|
877
764
|
});
|
|
878
765
|
const skillPatch = await createSkillPatchFromWorkspace({
|
|
879
766
|
beforeRoot: before.root,
|
|
@@ -954,6 +841,7 @@ async function writeRubricJudgeResult(request, workload, engine, options = {}) {
|
|
|
954
841
|
adapterAuthRequest: options.adapterAuthRequest,
|
|
955
842
|
adapterAuthEnv: options.adapterAuthEnv,
|
|
956
843
|
runtime,
|
|
844
|
+
eventPublisher: options.eventPublisher,
|
|
957
845
|
}));
|
|
958
846
|
const usage = runtime.mergeUsageSummaries(criterionRuns.map((run) => run.usage));
|
|
959
847
|
const result = rubricJudgeResultFromCriteria({
|
|
@@ -1072,6 +960,7 @@ async function runRubricCriterionJudge(args) {
|
|
|
1072
960
|
traceRoot: path.join(traceRoot, "judge"),
|
|
1073
961
|
tracePath,
|
|
1074
962
|
jobId: args.workload.job.id,
|
|
963
|
+
eventPublisher: args.eventPublisher,
|
|
1075
964
|
});
|
|
1076
965
|
let usage = args.runtime.assignUsageRole("engine", agentResult.usage);
|
|
1077
966
|
try {
|
|
@@ -1101,6 +990,7 @@ async function runRubricCriterionJudge(args) {
|
|
|
1101
990
|
traceRoot: path.join(traceRoot, "repair"),
|
|
1102
991
|
tracePath: repairTracePath,
|
|
1103
992
|
jobId: args.workload.job.id,
|
|
993
|
+
eventPublisher: args.eventPublisher,
|
|
1104
994
|
});
|
|
1105
995
|
usage = args.runtime.mergeUsageSummaries([
|
|
1106
996
|
usage,
|
package/dist/runtime.js
CHANGED
|
@@ -12,11 +12,11 @@ export async function importWorkbenchRuntime() {
|
|
|
12
12
|
return await runtimeModule;
|
|
13
13
|
}
|
|
14
14
|
async function importWorkbenchRuntimeUncached() {
|
|
15
|
-
const
|
|
15
|
+
const specifiers = runtimeImportOptions();
|
|
16
16
|
let lastError;
|
|
17
|
-
for (const
|
|
17
|
+
for (const specifier of specifiers) {
|
|
18
18
|
try {
|
|
19
|
-
return await import(__rewriteRelativeImportExtension(
|
|
19
|
+
return await import(__rewriteRelativeImportExtension(specifier));
|
|
20
20
|
}
|
|
21
21
|
catch (error) {
|
|
22
22
|
lastError = error;
|
|
@@ -30,5 +30,5 @@ function runtimeImportOptions() {
|
|
|
30
30
|
"/app/products/workbench/packages/core/src/index.ts",
|
|
31
31
|
new URL("../../core/src/index.ts", import.meta.url).href,
|
|
32
32
|
"@workbench-ai/workbench-core",
|
|
33
|
-
].filter((
|
|
33
|
+
].filter((specifier) => typeof specifier === "string" && specifier.length > 0);
|
|
34
34
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@workbench-ai/workbench-built-in-adapters",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.69",
|
|
4
4
|
"repository": {
|
|
5
5
|
"type": "git",
|
|
6
6
|
"url": "git+https://github.com/workbench-ai/workbench.git",
|
|
@@ -32,12 +32,12 @@
|
|
|
32
32
|
],
|
|
33
33
|
"dependencies": {
|
|
34
34
|
"yaml": "^2.8.2",
|
|
35
|
-
"@workbench-ai/agent-driver-anthropic-claude-code": "0.0.46",
|
|
36
|
-
"@workbench-ai/workbench-core": "0.0.68",
|
|
37
|
-
"@workbench-ai/workbench-contract": "0.0.68",
|
|
38
|
-
"@workbench-ai/workbench-protocol": "0.0.68",
|
|
39
35
|
"@workbench-ai/agent-driver-openai-codex": "0.0.46",
|
|
40
|
-
"@workbench-ai/agent-driver": "0.0.46"
|
|
36
|
+
"@workbench-ai/agent-driver": "0.0.46",
|
|
37
|
+
"@workbench-ai/workbench-contract": "0.0.69",
|
|
38
|
+
"@workbench-ai/workbench-protocol": "0.0.69",
|
|
39
|
+
"@workbench-ai/agent-driver-anthropic-claude-code": "0.0.46",
|
|
40
|
+
"@workbench-ai/workbench-core": "0.0.69"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
43
|
"@types/node": "^24.3.1",
|