@workbench-ai/workbench-built-in-adapters 0.0.66 → 0.0.68
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-turn.js +3 -3
- package/dist/execute.d.ts.map +1 -1
- package/dist/execute.js +218 -157
- package/dist/manifests.d.ts +1 -8
- package/dist/manifests.d.ts.map +1 -1
- package/dist/manifests.js +1 -100
- package/dist/runtime.js +5 -5
- package/package.json +5 -5
package/dist/agent-turn.js
CHANGED
|
@@ -244,9 +244,9 @@ function defaultWorkbenchAgentConfig(provider, providerName) {
|
|
|
244
244
|
};
|
|
245
245
|
}
|
|
246
246
|
async function resolveAgentAuth(provider, providerSpec, workspaceRoot, agentHome, adapterAuth) {
|
|
247
|
-
const
|
|
247
|
+
const skill = adapterAuthProviderOption(adapterAuth.request, providerSpec.use) ??
|
|
248
248
|
(provider.manifest.defaults.auth ?? {});
|
|
249
|
-
const parsed = provider.schemas.auth.safeParse(
|
|
249
|
+
const parsed = provider.schemas.auth.safeParse(skill);
|
|
250
250
|
if (!parsed.success) {
|
|
251
251
|
throw new Error(`Agent provider "${provider.manifest.id}" auth is invalid: ${formatValidationIssues(parsed.error.issues)}`);
|
|
252
252
|
}
|
|
@@ -254,7 +254,7 @@ async function resolveAgentAuth(provider, providerSpec, workspaceRoot, agentHome
|
|
|
254
254
|
void agentHome;
|
|
255
255
|
return { ...parsed.data };
|
|
256
256
|
}
|
|
257
|
-
function
|
|
257
|
+
function adapterAuthProviderOption(auth, providerName) {
|
|
258
258
|
const record = jsonRecord(auth);
|
|
259
259
|
const self = jsonRecord(record?.self);
|
|
260
260
|
const adapters = jsonRecord(record?.adapters);
|
package/dist/execute.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"execute.d.ts","sourceRoot":"","sources":["../src/execute.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAEV,IAAI,EAKL,MAAM,kCAAkC,CAAC;AAoB1C,OAAO,KAAK,EAEV,0BAA0B,EAG3B,MAAM,iBAAiB,CAAC;AAQzB,MAAM,WAAW,4CAA4C;IAC3D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,0BAA0B,CAAC;IAC3C,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,kBAAkB,CAAC,EAAE,IAAI,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;
|
|
1
|
+
{"version":3,"file":"execute.d.ts","sourceRoot":"","sources":["../src/execute.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAEV,IAAI,EAKL,MAAM,kCAAkC,CAAC;AAoB1C,OAAO,KAAK,EAEV,0BAA0B,EAG3B,MAAM,iBAAiB,CAAC;AAQzB,MAAM,WAAW,4CAA4C;IAC3D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,0BAA0B,CAAC;IAC3C,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,kBAAkB,CAAC,EAAE,IAAI,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;AAyDD,wBAAsB,qCAAqC,CACzD,IAAI,GAAE,4CAAiD,GACtD,OAAO,CAAC,IAAI,CAAC,CAiDf"}
|
package/dist/execute.js
CHANGED
|
@@ -12,7 +12,8 @@ const DIRECT_ADAPTER_HANDLERS = {
|
|
|
12
12
|
tests: executeTestsEngineRequest,
|
|
13
13
|
workbench: executeWorkbenchEngineRequest,
|
|
14
14
|
};
|
|
15
|
-
const
|
|
15
|
+
const CASE_CONTROL_FILE = "case.yaml";
|
|
16
|
+
const COMMAND_SKILL_PATCH_FILE = "skill-patch.json";
|
|
16
17
|
const DEFAULT_RUBRIC_PARALLELISM = 4;
|
|
17
18
|
export async function executeWorkbenchBuiltInAdapterCommand(args = {}) {
|
|
18
19
|
const request = await readWorkbenchAdapterOperationRequest(args.requestPath);
|
|
@@ -48,12 +49,12 @@ export async function executeWorkbenchBuiltInAdapterCommand(args = {}) {
|
|
|
48
49
|
if (isBuiltInAgentAdapterId(adapterId)) {
|
|
49
50
|
const workload = workloadFromAdapterOperationRequest(request);
|
|
50
51
|
const agent = builtInAgentSpecFromRequest(request);
|
|
51
|
-
if (request.operation === "
|
|
52
|
-
await
|
|
52
|
+
if (request.operation === "skill.improve") {
|
|
53
|
+
await writeAgentSkillRevisionOutput(request, workload, agent, agentOptions);
|
|
53
54
|
return;
|
|
54
55
|
}
|
|
55
|
-
if (request.operation === "
|
|
56
|
-
await
|
|
56
|
+
if (request.operation === "skill.run") {
|
|
57
|
+
await writeAgentSkillOutput(request, workload, agent, agentOptions);
|
|
57
58
|
return;
|
|
58
59
|
}
|
|
59
60
|
throw new Error(`Agent adapter ${adapterId} cannot handle ${request.operation}.`);
|
|
@@ -71,13 +72,13 @@ async function executeWorkbenchEngineRequest(request) {
|
|
|
71
72
|
throw new Error(`Workbench engine adapter cannot handle ${request.operation}.`);
|
|
72
73
|
}
|
|
73
74
|
async function executeWorkbenchEngineResolveRequest(request) {
|
|
74
|
-
const configuredPath =
|
|
75
|
+
const configuredPath = workbenchEngineCasesPath(request);
|
|
75
76
|
const sourcePath = path.resolve(request.paths.workspace, configuredPath);
|
|
76
77
|
const stat = await fs.stat(sourcePath).catch(() => null);
|
|
77
78
|
if (!stat?.isDirectory()) {
|
|
78
|
-
throw new Error(`Workbench engine
|
|
79
|
+
throw new Error(`Workbench engine cases path is not a directory: ${sourcePath}`);
|
|
79
80
|
}
|
|
80
|
-
const cases = await
|
|
81
|
+
const cases = await readEngineCasesFromWorkbenchCaseRoot(sourcePath);
|
|
81
82
|
await writeWorkbenchAdapterOperationResult(request.paths.output, {
|
|
82
83
|
protocol: "workbench.adapter-result.v1",
|
|
83
84
|
operation: "engine.resolve",
|
|
@@ -114,7 +115,7 @@ async function workbenchEngineOutcomeUsage(outcome) {
|
|
|
114
115
|
const operationUsage = outcome.usage
|
|
115
116
|
? undefined
|
|
116
117
|
: runtime.mergeUsageSummaries(outcome.operationResults.map((result) => {
|
|
117
|
-
if (result.operation === "
|
|
118
|
+
if (result.operation === "skill.run") {
|
|
118
119
|
return runtime.assignUsageRole("runner", result.usage);
|
|
119
120
|
}
|
|
120
121
|
if (result.operation === "engine.run") {
|
|
@@ -128,17 +129,17 @@ async function workbenchEngineOutcomeUsage(outcome) {
|
|
|
128
129
|
: runtime.assignUsageRole("engine", outcome.result?.usage);
|
|
129
130
|
return runtime.mergeUsageSummaries([runtimeUsage, resultUsage]);
|
|
130
131
|
}
|
|
131
|
-
function
|
|
132
|
+
function workbenchEngineCasesPath(request) {
|
|
132
133
|
const config = adapterCommandConfigRecord(request);
|
|
133
|
-
const
|
|
134
|
-
if (
|
|
135
|
-
return "
|
|
134
|
+
const cases = config.cases;
|
|
135
|
+
if (cases === undefined) {
|
|
136
|
+
return "cases";
|
|
136
137
|
}
|
|
137
|
-
const
|
|
138
|
-
if (typeof
|
|
139
|
-
return
|
|
138
|
+
const caseConfig = jsonRecord(cases);
|
|
139
|
+
if (typeof caseConfig.path === "string" && caseConfig.path.trim().length > 0) {
|
|
140
|
+
return caseConfig.path;
|
|
140
141
|
}
|
|
141
|
-
throw new Error("Workbench engine
|
|
142
|
+
throw new Error("Workbench engine cases must be an object with path.");
|
|
142
143
|
}
|
|
143
144
|
function workbenchEngineScoreInvocation(request) {
|
|
144
145
|
const score = jsonRecord(adapterCommandConfigRecord(request).score);
|
|
@@ -154,16 +155,16 @@ function workbenchEngineScoreInvocation(request) {
|
|
|
154
155
|
: adapterCommandName(score.use),
|
|
155
156
|
};
|
|
156
157
|
}
|
|
157
|
-
function
|
|
158
|
-
const
|
|
159
|
-
if (!
|
|
160
|
-
throw new Error("Workbench engine requires context.
|
|
158
|
+
function workbenchEngineSkillInvocation(request) {
|
|
159
|
+
const skill = request.context?.skill?.run;
|
|
160
|
+
if (!skill?.use || !skill.command) {
|
|
161
|
+
throw new Error("Workbench engine requires context.skill.run.use and context.skill.run.command.");
|
|
161
162
|
}
|
|
162
163
|
return {
|
|
163
|
-
use:
|
|
164
|
-
with: (
|
|
165
|
-
...(
|
|
166
|
-
command:
|
|
164
|
+
use: skill.use,
|
|
165
|
+
with: (skill.with ?? {}),
|
|
166
|
+
...(skill.auth !== undefined ? { auth: skill.auth } : {}),
|
|
167
|
+
command: skill.command,
|
|
167
168
|
};
|
|
168
169
|
}
|
|
169
170
|
async function workbenchEngineGradingIsolation(request) {
|
|
@@ -188,13 +189,13 @@ async function workbenchEnginePrivateFilesPresent(request) {
|
|
|
188
189
|
}
|
|
189
190
|
async function runWorkbenchEngineSharedGrading(request) {
|
|
190
191
|
const inputs = await workbenchEngineRuntimeInputs(request);
|
|
191
|
-
const
|
|
192
|
+
const skill = workbenchEngineSkillInvocation(request);
|
|
192
193
|
const score = workbenchEngineScoreInvocation(request);
|
|
193
194
|
const result = await runWorkbenchRuntimeOperationSequence({
|
|
194
195
|
inputs,
|
|
195
196
|
prepare: true,
|
|
196
197
|
operations: [
|
|
197
|
-
{ label: "
|
|
198
|
+
{ label: "skill", operation: "skill.run", invocation: skill },
|
|
198
199
|
{ label: "score", operation: "engine.run", invocation: score },
|
|
199
200
|
],
|
|
200
201
|
});
|
|
@@ -203,25 +204,25 @@ async function runWorkbenchEngineSharedGrading(request) {
|
|
|
203
204
|
}
|
|
204
205
|
async function runWorkbenchEngineSeparateGrading(request) {
|
|
205
206
|
const inputs = await workbenchEngineRuntimeInputs(request);
|
|
206
|
-
const
|
|
207
|
+
const skill = workbenchEngineSkillInvocation(request);
|
|
207
208
|
const score = workbenchEngineScoreInvocation(request);
|
|
208
209
|
const runtime = await importWorkbenchRuntime();
|
|
209
210
|
const runner = await runWorkbenchRuntimeOperationSequence({
|
|
210
211
|
inputs: {
|
|
211
|
-
|
|
212
|
+
skill: inputs.skill,
|
|
212
213
|
case: inputs.case,
|
|
213
214
|
traces: inputs.traces,
|
|
214
215
|
},
|
|
215
216
|
prepare: true,
|
|
216
217
|
collectWorkspace: true,
|
|
217
218
|
operations: [
|
|
218
|
-
{ label: "
|
|
219
|
+
{ label: "skill", operation: "skill.run", invocation: skill },
|
|
219
220
|
],
|
|
220
221
|
});
|
|
221
222
|
assertRuntimeControlResultOk(runner, "Workbench separate runner");
|
|
222
223
|
const grader = await runWorkbenchRuntimeOperationSequence({
|
|
223
224
|
inputs: {
|
|
224
|
-
|
|
225
|
+
skill: inputs.skill,
|
|
225
226
|
case: inputs.case,
|
|
226
227
|
enginePrivate: inputs.enginePrivate,
|
|
227
228
|
traces: inputs.traces,
|
|
@@ -243,14 +244,14 @@ async function runWorkbenchEngineSeparateGrading(request) {
|
|
|
243
244
|
};
|
|
244
245
|
}
|
|
245
246
|
async function workbenchEngineRuntimeInputs(request) {
|
|
246
|
-
const [
|
|
247
|
-
readOptionalSurfaceFiles(request.paths.
|
|
247
|
+
const [skill, caseFiles, enginePrivate, traces] = await Promise.all([
|
|
248
|
+
readOptionalSurfaceFiles(request.paths.skills ?? request.paths.skill),
|
|
248
249
|
readOptionalSurfaceFiles(request.paths.case),
|
|
249
250
|
readOptionalSurfaceFiles(request.paths.enginePrivate),
|
|
250
251
|
readOptionalSurfaceFiles(request.paths.traces),
|
|
251
252
|
]);
|
|
252
253
|
return {
|
|
253
|
-
|
|
254
|
+
skill,
|
|
254
255
|
case: caseFiles,
|
|
255
256
|
enginePrivate,
|
|
256
257
|
traces,
|
|
@@ -309,11 +310,11 @@ function safeInternalPathSegment(value) {
|
|
|
309
310
|
}
|
|
310
311
|
async function executeCommandAdapterRequest(request) {
|
|
311
312
|
const command = requiredAdapterCommandString(request, "command");
|
|
312
|
-
const before = request.operation === "
|
|
313
|
-
? await
|
|
313
|
+
const before = request.operation === "skill.improve"
|
|
314
|
+
? await snapshotEditableSkillWorkspace(request)
|
|
314
315
|
: null;
|
|
315
316
|
try {
|
|
316
|
-
await runAdapterShellCommand(command, request
|
|
317
|
+
await runAdapterShellCommand(command, commandAdapterWorkingDirectory(request), commandAdapterEnvironment(request));
|
|
317
318
|
if (request.operation === "engine.run") {
|
|
318
319
|
await requireCommandScoreResult(request);
|
|
319
320
|
return;
|
|
@@ -343,11 +344,17 @@ async function executeTestsEngineRequest(request) {
|
|
|
343
344
|
const script = await firstExistingFile([
|
|
344
345
|
path.join(testsRoot, "test.sh"),
|
|
345
346
|
path.join(testsRoot, "run.sh"),
|
|
347
|
+
path.join(testsRoot, "tests", "test.sh"),
|
|
348
|
+
path.join(testsRoot, "tests", "run.sh"),
|
|
346
349
|
]);
|
|
347
350
|
if (!script) {
|
|
348
351
|
throw new Error(`Tests engine requires ${path.join(testsRoot, "test.sh")}.`);
|
|
349
352
|
}
|
|
350
353
|
await runAdapterShellCommand(`sh ${shellQuote(script)}`, request.paths.workspace, {
|
|
354
|
+
SKILL_DIR: request.paths.skill ?? path.join(request.paths.workspace, "input", "skills", "primary"),
|
|
355
|
+
SKILLS_DIR: request.paths.skills ?? path.join(request.paths.workspace, "input", "skills"),
|
|
356
|
+
CASE_DIR: request.paths.case ?? path.join(request.paths.workspace, "input", "case"),
|
|
357
|
+
OUTPUT_DIR: request.paths.output,
|
|
351
358
|
WORKBENCH_TESTS_VERIFIER_DIR: verifierRoot,
|
|
352
359
|
});
|
|
353
360
|
const result = await readTestsResult({
|
|
@@ -387,16 +394,68 @@ async function runAdapterShellCommand(command, cwd, env = {}) {
|
|
|
387
394
|
});
|
|
388
395
|
});
|
|
389
396
|
}
|
|
397
|
+
function commandAdapterWorkingDirectory(request) {
|
|
398
|
+
return request.operation === "skill.improve"
|
|
399
|
+
? requiredRequestPath(request.paths.skill, "paths.skill")
|
|
400
|
+
: request.paths.workspace;
|
|
401
|
+
}
|
|
402
|
+
function commandAdapterEnvironment(request) {
|
|
403
|
+
return {
|
|
404
|
+
SKILL_DIR: request.paths.skill ?? path.join(request.paths.workspace, "input", "skills", "primary"),
|
|
405
|
+
SKILLS_DIR: request.paths.skills ?? path.join(request.paths.workspace, "input", "skills"),
|
|
406
|
+
CASE_DIR: request.paths.case ?? path.join(request.paths.workspace, "input", "case"),
|
|
407
|
+
TRACE_DIR: request.paths.traces ?? path.join(request.paths.workspace, "input", "traces"),
|
|
408
|
+
OUTPUT_DIR: request.paths.output,
|
|
409
|
+
WORKBENCH_SKILL_PATCH: commandSkillPatchPath(request),
|
|
410
|
+
};
|
|
411
|
+
}
|
|
412
|
+
function commandSkillPatchPath(request) {
|
|
413
|
+
return path.join(request.paths.output, COMMAND_SKILL_PATCH_FILE);
|
|
414
|
+
}
|
|
415
|
+
async function readSkillPatchFile(filePath) {
|
|
416
|
+
if (!await fileExists(filePath)) {
|
|
417
|
+
return null;
|
|
418
|
+
}
|
|
419
|
+
const record = jsonRecord(JSON.parse(await fs.readFile(filePath, "utf8")));
|
|
420
|
+
const rawFiles = Array.isArray(record.files) ? record.files : [];
|
|
421
|
+
const files = rawFiles.flatMap((entry) => {
|
|
422
|
+
if (!isPatchSurfaceSnapshotFile(entry)) {
|
|
423
|
+
return [];
|
|
424
|
+
}
|
|
425
|
+
return [{
|
|
426
|
+
...entry,
|
|
427
|
+
path: normalizeRelativePath(entry.path),
|
|
428
|
+
}];
|
|
429
|
+
});
|
|
430
|
+
const fileChanges = Array.isArray(record.fileChanges)
|
|
431
|
+
? record.fileChanges.flatMap((entry) => typeof entry === "string" ? [normalizeRelativePath(entry)] : [])
|
|
432
|
+
: files.map((file) => file.path);
|
|
433
|
+
return {
|
|
434
|
+
files,
|
|
435
|
+
fileChanges,
|
|
436
|
+
...(typeof record.summary === "string" ? { summary: record.summary } : {}),
|
|
437
|
+
...(record.feedback !== undefined ? { feedback: record.feedback } : {}),
|
|
438
|
+
};
|
|
439
|
+
}
|
|
440
|
+
function isPatchSurfaceSnapshotFile(value) {
|
|
441
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
442
|
+
return false;
|
|
443
|
+
}
|
|
444
|
+
const record = value;
|
|
445
|
+
return typeof record.path === "string" && typeof record.content === "string";
|
|
446
|
+
}
|
|
390
447
|
async function writeOperationOkUnlessPresent(request, beforeRoot) {
|
|
391
448
|
if (await fileExists(workbenchAdapterOperationResultPath(request.paths.output))) {
|
|
392
449
|
return;
|
|
393
450
|
}
|
|
394
|
-
if (request.operation === "
|
|
395
|
-
const
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
451
|
+
if (request.operation === "skill.improve") {
|
|
452
|
+
const skillRoot = requiredRequestPath(request.paths.skill, "paths.skill");
|
|
453
|
+
const patch = await readSkillPatchFile(commandSkillPatchPath(request)) ??
|
|
454
|
+
await createSkillPatchFromWorkspace({
|
|
455
|
+
beforeRoot: beforeRoot ?? skillRoot,
|
|
456
|
+
afterRoot: skillRoot,
|
|
457
|
+
edits: request.context?.improve?.edits ?? [],
|
|
458
|
+
});
|
|
400
459
|
await writeWorkbenchAdapterOperationResult(request.paths.output, {
|
|
401
460
|
protocol: "workbench.adapter-result.v1",
|
|
402
461
|
operation: request.operation,
|
|
@@ -411,10 +470,10 @@ async function writeOperationOkUnlessPresent(request, beforeRoot) {
|
|
|
411
470
|
ok: true,
|
|
412
471
|
});
|
|
413
472
|
}
|
|
414
|
-
async function
|
|
415
|
-
const root = await fs.mkdtemp(path.join(os.tmpdir(), "workbench-
|
|
473
|
+
async function snapshotEditableSkillWorkspace(request) {
|
|
474
|
+
const root = await fs.mkdtemp(path.join(os.tmpdir(), "workbench-skill-before-"));
|
|
416
475
|
const edits = request.context?.improve?.edits ?? [];
|
|
417
|
-
const files = await
|
|
476
|
+
const files = await readEditableSkillWorkspaceFiles(requiredRequestPath(request.paths.skill, "paths.skill"), edits);
|
|
418
477
|
await writeSurfaceFiles(root, files);
|
|
419
478
|
return {
|
|
420
479
|
root,
|
|
@@ -423,7 +482,7 @@ async function snapshotEditableCandidateWorkspace(request) {
|
|
|
423
482
|
},
|
|
424
483
|
};
|
|
425
484
|
}
|
|
426
|
-
async function
|
|
485
|
+
async function readEditableSkillWorkspaceFiles(root, edits) {
|
|
427
486
|
const editPaths = edits
|
|
428
487
|
.map(normalizeRelativePath)
|
|
429
488
|
.filter((filePath) => !isRuntimeWorkspacePath(filePath));
|
|
@@ -431,7 +490,7 @@ async function readEditableCandidateWorkspaceFiles(root, edits) {
|
|
|
431
490
|
return [];
|
|
432
491
|
}
|
|
433
492
|
const files = await readSurfaceFiles(root);
|
|
434
|
-
return dedupeSurfaceFiles(files.filter((file) =>
|
|
493
|
+
return dedupeSurfaceFiles(files.filter((file) => isAllowedSkillEditPath(file.path, editPaths) &&
|
|
435
494
|
!isRuntimeWorkspacePath(file.path)));
|
|
436
495
|
}
|
|
437
496
|
async function firstExistingFile(files) {
|
|
@@ -449,64 +508,63 @@ function requiredRequestPath(value, label) {
|
|
|
449
508
|
}
|
|
450
509
|
return value;
|
|
451
510
|
}
|
|
452
|
-
async function
|
|
453
|
-
const
|
|
454
|
-
if (
|
|
455
|
-
throw new Error(`Engine resolve has no Workbench
|
|
511
|
+
async function readEngineCasesFromWorkbenchCaseRoot(casesRoot) {
|
|
512
|
+
const caseDirs = await listWorkbenchCaseDirectories(casesRoot);
|
|
513
|
+
if (caseDirs.length === 0) {
|
|
514
|
+
throw new Error(`Engine resolve has no Workbench case packages: ${casesRoot}`);
|
|
456
515
|
}
|
|
457
|
-
return await Promise.all(
|
|
458
|
-
|
|
459
|
-
id: path.basename(
|
|
516
|
+
return await Promise.all(caseDirs.map(async (caseDir) => readWorkbenchEngineCase({
|
|
517
|
+
caseDir,
|
|
518
|
+
id: path.basename(caseDir),
|
|
460
519
|
})));
|
|
461
520
|
}
|
|
462
|
-
async function
|
|
463
|
-
if (await fileExists(path.join(root,
|
|
464
|
-
throw new Error(`Workbench engine
|
|
521
|
+
async function listWorkbenchCaseDirectories(root) {
|
|
522
|
+
if (await fileExists(path.join(root, CASE_CONTROL_FILE))) {
|
|
523
|
+
throw new Error(`Workbench engine cases root must contain case directories, not a direct ${CASE_CONTROL_FILE}: ${root}`);
|
|
465
524
|
}
|
|
466
525
|
const entries = await fs.readdir(root, { withFileTypes: true });
|
|
467
|
-
const
|
|
526
|
+
const cases = [];
|
|
468
527
|
for (const entry of entries) {
|
|
469
528
|
if (!entry.isDirectory()) {
|
|
470
529
|
continue;
|
|
471
530
|
}
|
|
472
|
-
const
|
|
473
|
-
if (await fileExists(path.join(
|
|
474
|
-
|
|
531
|
+
const caseDir = path.join(root, entry.name);
|
|
532
|
+
if (await fileExists(path.join(caseDir, CASE_CONTROL_FILE))) {
|
|
533
|
+
cases.push(caseDir);
|
|
475
534
|
}
|
|
476
535
|
}
|
|
477
|
-
return
|
|
536
|
+
return cases.sort((left, right) => left.localeCompare(right));
|
|
478
537
|
}
|
|
479
538
|
async function readWorkbenchEngineCase(args) {
|
|
480
|
-
const sourceFiles = await readSurfaceFiles(args.
|
|
481
|
-
const
|
|
482
|
-
if (!
|
|
483
|
-
throw new Error(`
|
|
484
|
-
}
|
|
485
|
-
const
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
const
|
|
502
|
-
const
|
|
503
|
-
const
|
|
504
|
-
const publicFiles = stripTaskDirectory(sourceFiles, publicPrefix);
|
|
539
|
+
const sourceFiles = await readSurfaceFiles(args.caseDir);
|
|
540
|
+
const caseFile = sourceFiles.find((file) => normalizeRelativePath(file.path) === CASE_CONTROL_FILE && file.encoding === "utf8");
|
|
541
|
+
if (!caseFile) {
|
|
542
|
+
throw new Error(`Case ${args.id} is missing ${CASE_CONTROL_FILE}.`);
|
|
543
|
+
}
|
|
544
|
+
const caseRecord = jsonRecord(YAML.parse(caseFile.content));
|
|
545
|
+
if (caseRecord.version !== 1) {
|
|
546
|
+
throw new Error(`Case ${args.id} ${CASE_CONTROL_FILE} version must be 1.`);
|
|
547
|
+
}
|
|
548
|
+
if (typeof caseRecord.case !== "string" || caseRecord.case.trim().length === 0) {
|
|
549
|
+
throw new Error(`Case ${args.id} ${CASE_CONTROL_FILE} must include a case string.`);
|
|
550
|
+
}
|
|
551
|
+
const unsupportedCaseFields = Object.keys(caseRecord)
|
|
552
|
+
.filter((key) => !["version", "case", "split", "files", "tests", "solution", "environment"].includes(key));
|
|
553
|
+
if (unsupportedCaseFields.length > 0) {
|
|
554
|
+
throw new Error(`Case ${args.id} ${CASE_CONTROL_FILE} has unsupported field${unsupportedCaseFields.length === 1 ? "" : "s"}: ${unsupportedCaseFields.join(", ")}.`);
|
|
555
|
+
}
|
|
556
|
+
if (caseRecord.split !== undefined && (typeof caseRecord.split !== "string" || caseRecord.split.trim().length === 0)) {
|
|
557
|
+
throw new Error(`Case ${args.id} ${CASE_CONTROL_FILE} split must be a non-empty string when provided.`);
|
|
558
|
+
}
|
|
559
|
+
const publicPrefix = caseDirectoryPrefix(caseRecord.files, "files", args.id);
|
|
560
|
+
const testsPrefix = caseDirectoryPrefix(caseRecord.tests, "tests", args.id);
|
|
561
|
+
const solutionPrefix = caseDirectoryPrefix(caseRecord.solution, "solution", args.id);
|
|
562
|
+
const publicFiles = stripCaseDirectory(sourceFiles, publicPrefix);
|
|
505
563
|
const privateFiles = [
|
|
506
|
-
...
|
|
507
|
-
...
|
|
564
|
+
...stripCaseDirectory(sourceFiles, testsPrefix),
|
|
565
|
+
...stripCaseDirectory(sourceFiles, solutionPrefix),
|
|
508
566
|
].sort((left, right) => left.path.localeCompare(right.path));
|
|
509
|
-
|
|
567
|
+
assertWorkbenchCasePackageLayout(args.id, sourceFiles, [
|
|
510
568
|
publicPrefix,
|
|
511
569
|
testsPrefix,
|
|
512
570
|
solutionPrefix,
|
|
@@ -516,10 +574,10 @@ async function readWorkbenchEngineCase(args) {
|
|
|
516
574
|
id: normalizeRelativePath(args.id),
|
|
517
575
|
case: {
|
|
518
576
|
version: 3,
|
|
519
|
-
prompt:
|
|
520
|
-
...(typeof
|
|
521
|
-
...(
|
|
522
|
-
? { environment:
|
|
577
|
+
prompt: caseRecord.case,
|
|
578
|
+
...(typeof caseRecord.split === "string" ? { split: caseRecord.split.trim() } : {}),
|
|
579
|
+
...(caseRecord.environment !== undefined
|
|
580
|
+
? { environment: caseRecord.environment }
|
|
523
581
|
: {}),
|
|
524
582
|
},
|
|
525
583
|
files: {
|
|
@@ -529,26 +587,26 @@ async function readWorkbenchEngineCase(args) {
|
|
|
529
587
|
},
|
|
530
588
|
};
|
|
531
589
|
}
|
|
532
|
-
function
|
|
590
|
+
function caseDirectoryPrefix(value, fallback, caseId) {
|
|
533
591
|
if (value === undefined) {
|
|
534
592
|
return `${fallback}/`;
|
|
535
593
|
}
|
|
536
594
|
const record = jsonRecord(value);
|
|
537
595
|
if (typeof record.path !== "string" || record.path.trim().length === 0) {
|
|
538
|
-
throw new Error(`
|
|
596
|
+
throw new Error(`Case ${caseId} ${CASE_CONTROL_FILE} path config must include a path string.`);
|
|
539
597
|
}
|
|
540
598
|
return `${normalizeRelativePath(record.path)}/`;
|
|
541
599
|
}
|
|
542
|
-
function
|
|
600
|
+
function assertWorkbenchCasePackageLayout(caseId, files, allowedPrefixes) {
|
|
543
601
|
const invalid = files
|
|
544
602
|
.map((file) => normalizeRelativePath(file.path))
|
|
545
|
-
.filter((filePath) => filePath !==
|
|
603
|
+
.filter((filePath) => filePath !== CASE_CONTROL_FILE &&
|
|
546
604
|
!allowedPrefixes.some((prefix) => filePath.startsWith(prefix)));
|
|
547
605
|
if (invalid.length > 0) {
|
|
548
|
-
throw new Error(`
|
|
606
|
+
throw new Error(`Case ${caseId} contains unsupported file${invalid.length === 1 ? "" : "s"} outside case.yaml or declared case directories: ${invalid.join(", ")}`);
|
|
549
607
|
}
|
|
550
608
|
}
|
|
551
|
-
function
|
|
609
|
+
function stripCaseDirectory(files, prefix) {
|
|
552
610
|
return files.flatMap((file) => {
|
|
553
611
|
const normalized = normalizeRelativePath(file.path);
|
|
554
612
|
if (!normalized.startsWith(prefix)) {
|
|
@@ -578,7 +636,10 @@ async function readTestsResult(args) {
|
|
|
578
636
|
}
|
|
579
637
|
return normalizeTestsResult({ reward: score }, args.caseId);
|
|
580
638
|
}
|
|
581
|
-
|
|
639
|
+
return normalizeTestsResult({
|
|
640
|
+
score: 1,
|
|
641
|
+
summary: "Tests passed.",
|
|
642
|
+
}, args.caseId);
|
|
582
643
|
}
|
|
583
644
|
function testsVerifierOutputDir(outputRoot) {
|
|
584
645
|
return path.join(outputRoot, ".workbench", "internal", "verifier");
|
|
@@ -643,17 +704,17 @@ function workloadFromAdapterOperationRequest(request) {
|
|
|
643
704
|
const attempt = context.attempt ?? {};
|
|
644
705
|
return {
|
|
645
706
|
job: { id: request.jobId ?? request.id },
|
|
646
|
-
|
|
647
|
-
name: context.
|
|
648
|
-
description: context.
|
|
707
|
+
eval: {
|
|
708
|
+
name: context.eval?.name ?? "",
|
|
709
|
+
description: context.eval?.description ?? "",
|
|
649
710
|
},
|
|
650
|
-
|
|
651
|
-
path: context.
|
|
711
|
+
skill: {
|
|
712
|
+
path: context.skill?.path ?? "",
|
|
652
713
|
},
|
|
653
714
|
improve: {
|
|
654
715
|
edits: context.improve?.edits ?? [],
|
|
655
716
|
},
|
|
656
|
-
|
|
717
|
+
versionId: context.skill?.id ?? "",
|
|
657
718
|
attemptIndex: attempt.attemptIndex ?? 0,
|
|
658
719
|
sampleIndex: attempt.sampleIndex ?? 0,
|
|
659
720
|
caseId: attempt.caseId ?? "",
|
|
@@ -729,35 +790,35 @@ async function executeBuiltInAgentTurn(executor, request) {
|
|
|
729
790
|
const { defaultWorkbenchAgentTurnExecutor, executeWorkbenchAgentTurn, } = await import("./agent-turn.js");
|
|
730
791
|
return await executeWorkbenchAgentTurn(executor ?? defaultWorkbenchAgentTurnExecutor, request);
|
|
731
792
|
}
|
|
732
|
-
async function
|
|
733
|
-
if (request.operation !== "
|
|
734
|
-
throw new Error("Agent
|
|
793
|
+
async function writeAgentSkillOutput(request, workload, adapter, options = {}) {
|
|
794
|
+
if (request.operation !== "skill.run") {
|
|
795
|
+
throw new Error("Agent skill execution results can only complete skill.run operations.");
|
|
735
796
|
}
|
|
736
|
-
const traceRoot = path.join(request.paths.output, ".workbench", "internal", "agent-
|
|
797
|
+
const traceRoot = path.join(request.paths.output, ".workbench", "internal", "agent-skill");
|
|
737
798
|
const agentResult = await executeBuiltInAgentTurn(options.agentExecutor, {
|
|
738
799
|
role: "runner",
|
|
739
|
-
provider:
|
|
800
|
+
provider: adapter.agent,
|
|
740
801
|
adapterAuthRoot: options.adapterAuthRoot,
|
|
741
802
|
adapterAuthRequest: options.adapterAuthRequest,
|
|
742
803
|
adapterAuthEnv: options.adapterAuthEnv,
|
|
743
804
|
workspaceRoot: request.paths.workspace,
|
|
744
805
|
cwd: request.paths.workspace,
|
|
745
|
-
prompt:
|
|
806
|
+
prompt: buildAgentSkillPrompt(workload, adapter),
|
|
746
807
|
traceRoot,
|
|
747
808
|
jobId: workload.job.id,
|
|
748
809
|
});
|
|
749
|
-
const outputPath = path.join(request.paths.output, "
|
|
810
|
+
const outputPath = path.join(request.paths.output, "skill-summary.md");
|
|
750
811
|
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
|
751
812
|
await fs.writeFile(outputPath, agentResult.output);
|
|
752
813
|
const trace = {
|
|
753
|
-
path: `.workbench/traces/${workload.job.id}/
|
|
814
|
+
path: `.workbench/traces/${workload.job.id}/skill.json`,
|
|
754
815
|
kind: "text",
|
|
755
816
|
encoding: "utf8",
|
|
756
817
|
executable: false,
|
|
757
818
|
content: `${JSON.stringify({
|
|
758
|
-
kind: "
|
|
759
|
-
provider:
|
|
760
|
-
|
|
819
|
+
kind: "agent_skill",
|
|
820
|
+
provider: adapter.agent.use,
|
|
821
|
+
versionId: workload.versionId,
|
|
761
822
|
attemptIndex: workload.attemptIndex,
|
|
762
823
|
sampleIndex: workload.sampleIndex,
|
|
763
824
|
summary: agentResult.output,
|
|
@@ -769,37 +830,37 @@ async function writeAgentCandidateOutput(request, workload, candidate, options =
|
|
|
769
830
|
const usage = runtime.assignUsageRole("runner", agentResult.usage);
|
|
770
831
|
await writeWorkbenchAdapterOperationResult(request.paths.output, {
|
|
771
832
|
protocol: "workbench.adapter-result.v1",
|
|
772
|
-
operation: "
|
|
833
|
+
operation: "skill.run",
|
|
773
834
|
ok: true,
|
|
774
835
|
...(agentResult.output ? { summary: agentResult.output } : {}),
|
|
775
836
|
feedback: {
|
|
776
|
-
|
|
777
|
-
agent:
|
|
837
|
+
skill: "agent",
|
|
838
|
+
agent: adapter.agent.use,
|
|
778
839
|
metadata: agentResult.metadata,
|
|
779
840
|
},
|
|
780
841
|
...(usage ? { usage } : {}),
|
|
781
842
|
});
|
|
782
843
|
}
|
|
783
|
-
function
|
|
844
|
+
function buildAgentSkillPrompt(workload, adapter) {
|
|
784
845
|
return [
|
|
785
|
-
...(
|
|
846
|
+
...(adapter.instructions ? ["Instructions:", adapter.instructions, ""] : []),
|
|
786
847
|
"Context:",
|
|
787
|
-
"-
|
|
788
|
-
"-
|
|
848
|
+
"- The entry skill is mounted at /workspace/input/skills/primary unless another skill is selected.",
|
|
849
|
+
"- All skills installed for this run are mounted under /workspace/input/skills.",
|
|
789
850
|
"- The mutable working directory is /workspace.",
|
|
790
|
-
"- If the
|
|
851
|
+
"- If the skill declares prepare.command, it has already run and may have copied files into /workspace.",
|
|
791
852
|
...(workload.case?.prompt ? ["Case:", workload.case.prompt, ""] : []),
|
|
792
853
|
"- Public case files are mounted at /workspace/input/case.",
|
|
793
854
|
"- Verifier tests are not present while you run.",
|
|
794
|
-
"- Mutate the current working directory to complete the
|
|
855
|
+
"- Mutate the current working directory to complete the case.",
|
|
795
856
|
"- You may write inspection artifacts under /workspace/output.",
|
|
796
857
|
].join("\n");
|
|
797
858
|
}
|
|
798
|
-
async function
|
|
799
|
-
if (request.operation !== "
|
|
800
|
-
throw new Error("Agent
|
|
859
|
+
async function writeAgentSkillRevisionOutput(request, workload, improver, options) {
|
|
860
|
+
if (request.operation !== "skill.improve") {
|
|
861
|
+
throw new Error("Agent skill improvement results can only complete skill.improve operations.");
|
|
801
862
|
}
|
|
802
|
-
const before = await
|
|
863
|
+
const before = await snapshotEditableSkillWorkspace(request);
|
|
803
864
|
const traceRoot = path.join(request.paths.output, ".workbench", "internal", "agent-improver");
|
|
804
865
|
try {
|
|
805
866
|
const agentResult = await executeBuiltInAgentTurn(options.agentExecutor, {
|
|
@@ -809,19 +870,19 @@ async function writeAgentCandidateRevisionOutput(request, workload, improver, op
|
|
|
809
870
|
adapterAuthRequest: options.adapterAuthRequest,
|
|
810
871
|
adapterAuthEnv: options.adapterAuthEnv,
|
|
811
872
|
workspaceRoot: request.paths.workspace,
|
|
812
|
-
cwd: request.paths.
|
|
873
|
+
cwd: requiredRequestPath(request.paths.skill, "paths.skill"),
|
|
813
874
|
prompt: buildAgentImproverPrompt(workload),
|
|
814
875
|
traceRoot,
|
|
815
876
|
jobId: workload.job.id,
|
|
816
877
|
});
|
|
817
|
-
const
|
|
878
|
+
const skillPatch = await createSkillPatchFromWorkspace({
|
|
818
879
|
beforeRoot: before.root,
|
|
819
|
-
afterRoot: request.paths.
|
|
880
|
+
afterRoot: requiredRequestPath(request.paths.skill, "paths.skill"),
|
|
820
881
|
edits: workload.improve.edits,
|
|
821
882
|
});
|
|
822
|
-
const
|
|
823
|
-
if (
|
|
824
|
-
throw new Error("Agent improve adapter completed without changing a
|
|
883
|
+
const changedSkillPaths = skillPatch.fileChanges.filter((filePath) => isAllowedSkillEditPath(filePath, workload.improve.edits));
|
|
884
|
+
if (changedSkillPaths.length === 0) {
|
|
885
|
+
throw new Error("Agent improve adapter completed without changing a skill file covered by improve edits.");
|
|
825
886
|
}
|
|
826
887
|
const trace = {
|
|
827
888
|
path: `.workbench/traces/${workload.job.id}/improver.json`,
|
|
@@ -831,9 +892,9 @@ async function writeAgentCandidateRevisionOutput(request, workload, improver, op
|
|
|
831
892
|
content: `${JSON.stringify({
|
|
832
893
|
kind: "agent_improver",
|
|
833
894
|
provider: improver.agent.use,
|
|
834
|
-
|
|
895
|
+
versionId: workload.versionId,
|
|
835
896
|
attemptIndex: workload.attemptIndex,
|
|
836
|
-
changedPaths:
|
|
897
|
+
changedPaths: changedSkillPaths,
|
|
837
898
|
summary: agentResult.output,
|
|
838
899
|
metadata: agentResult.metadata,
|
|
839
900
|
}, null, 2)}\n`,
|
|
@@ -843,16 +904,16 @@ async function writeAgentCandidateRevisionOutput(request, workload, improver, op
|
|
|
843
904
|
const usage = runtime.assignUsageRole("improver", agentResult.usage);
|
|
844
905
|
await writeWorkbenchAdapterOperationResult(request.paths.output, {
|
|
845
906
|
protocol: "workbench.adapter-result.v1",
|
|
846
|
-
operation: "
|
|
907
|
+
operation: "skill.improve",
|
|
847
908
|
ok: true,
|
|
848
909
|
value: {
|
|
849
|
-
...
|
|
850
|
-
fileChanges:
|
|
910
|
+
...skillPatch,
|
|
911
|
+
fileChanges: changedSkillPaths,
|
|
851
912
|
},
|
|
852
913
|
...(agentResult.output ? { summary: agentResult.output } : {}),
|
|
853
914
|
feedback: {
|
|
854
915
|
improver: improver.agent.use,
|
|
855
|
-
changedPaths:
|
|
916
|
+
changedPaths: changedSkillPaths,
|
|
856
917
|
metadata: agentResult.metadata,
|
|
857
918
|
},
|
|
858
919
|
...(usage ? { usage } : {}),
|
|
@@ -864,12 +925,12 @@ async function writeAgentCandidateRevisionOutput(request, workload, improver, op
|
|
|
864
925
|
}
|
|
865
926
|
function buildAgentImproverPrompt(workload) {
|
|
866
927
|
return [
|
|
867
|
-
"
|
|
868
|
-
workload.
|
|
928
|
+
"Eval:",
|
|
929
|
+
workload.eval.description || workload.eval.name,
|
|
869
930
|
"",
|
|
870
|
-
"Improve the
|
|
931
|
+
"Improve the skill for this eval.",
|
|
871
932
|
"",
|
|
872
|
-
"
|
|
933
|
+
"Skill files are in the current directory.",
|
|
873
934
|
"Prior adapter executions are in /workspace/input/traces.",
|
|
874
935
|
"",
|
|
875
936
|
"Editable paths:",
|
|
@@ -935,7 +996,7 @@ async function writeRubricEvidenceFiles(args) {
|
|
|
935
996
|
schema: "workbench.engine.rubric.evidence.v1",
|
|
936
997
|
safeForImprover: true,
|
|
937
998
|
jobId: args.workload.job.id,
|
|
938
|
-
|
|
999
|
+
versionId: args.workload.versionId,
|
|
939
1000
|
attemptIndex: args.workload.attemptIndex,
|
|
940
1001
|
sampleIndex: args.workload.sampleIndex,
|
|
941
1002
|
caseId: args.workload.caseId,
|
|
@@ -1076,7 +1137,7 @@ function rubricCriterionTracePath(jobId, criterionId, turn) {
|
|
|
1076
1137
|
return `.workbench/traces/${jobId}/engine/rubric/criteria/${safeInternalPathSegment(criterionId)}/${turn}`;
|
|
1077
1138
|
}
|
|
1078
1139
|
function buildRubricCriterionJudgePrompt(workload, engine, criterion) {
|
|
1079
|
-
|
|
1140
|
+
requireWorkloadCase(workload, "Rubric judge");
|
|
1080
1141
|
return [
|
|
1081
1142
|
...(engine.instructions ? ["Instructions:", engine.instructions, ""] : []),
|
|
1082
1143
|
...(workload.case?.prompt ? ["Case:", workload.case.prompt, ""] : []),
|
|
@@ -1084,10 +1145,10 @@ function buildRubricCriterionJudgePrompt(workload, engine, criterion) {
|
|
|
1084
1145
|
JSON.stringify(criterion, null, 2),
|
|
1085
1146
|
"",
|
|
1086
1147
|
"Context:",
|
|
1087
|
-
"- The
|
|
1088
|
-
"-
|
|
1148
|
+
"- The skill already ran in this same working directory.",
|
|
1149
|
+
"- Skill outputs are available in the current working directory.",
|
|
1089
1150
|
"- Public case files are mounted at /workspace/input/case.",
|
|
1090
|
-
"- Verifier-private files are mounted at /workspace/private/engine when the
|
|
1151
|
+
"- Verifier-private files are mounted at /workspace/private/engine when the case provides them.",
|
|
1091
1152
|
"- Score only from the current working directory, public case files, verifier-private files, and the criterion above.",
|
|
1092
1153
|
"",
|
|
1093
1154
|
"Output:",
|
|
@@ -1276,17 +1337,17 @@ async function mapWithConcurrency(inputs, concurrency, mapper) {
|
|
|
1276
1337
|
await Promise.all(Array.from({ length: limit }, async () => worker()));
|
|
1277
1338
|
return results;
|
|
1278
1339
|
}
|
|
1279
|
-
function
|
|
1340
|
+
function requireWorkloadCase(workload, label) {
|
|
1280
1341
|
if (!workload.case) {
|
|
1281
1342
|
throw new Error(`${label} workload is missing case text.`);
|
|
1282
1343
|
}
|
|
1283
1344
|
}
|
|
1284
|
-
async function
|
|
1345
|
+
async function createSkillPatchFromWorkspace(args) {
|
|
1285
1346
|
const before = new Map((await readSurfaceFiles(args.beforeRoot))
|
|
1286
1347
|
.map((file) => [normalizeRelativePath(file.path), file]));
|
|
1287
1348
|
const changedFiles = (await readSurfaceFiles(args.afterRoot))
|
|
1288
1349
|
.map((file) => ({ ...file, path: normalizeRelativePath(file.path) }))
|
|
1289
|
-
.filter((file) =>
|
|
1350
|
+
.filter((file) => isAllowedSkillEditPath(file.path, args.edits) &&
|
|
1290
1351
|
!isRuntimeWorkspacePath(file.path) &&
|
|
1291
1352
|
!sameSurfaceFile(before.get(file.path), file))
|
|
1292
1353
|
.sort((left, right) => left.path.localeCompare(right.path));
|
|
@@ -1313,7 +1374,7 @@ function isRuntimeWorkspacePath(filePath) {
|
|
|
1313
1374
|
normalized === "private" ||
|
|
1314
1375
|
normalized.startsWith("private/");
|
|
1315
1376
|
}
|
|
1316
|
-
function
|
|
1377
|
+
function isAllowedSkillEditPath(filePath, edits) {
|
|
1317
1378
|
const normalized = normalizeRelativePath(filePath);
|
|
1318
1379
|
return edits.some((entry) => {
|
|
1319
1380
|
const editPath = normalizeRelativePath(entry).replace(/\/+$/u, "");
|
package/dist/manifests.d.ts
CHANGED
|
@@ -1,9 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
export type WorkbenchPublicBuiltInAdapterId = "workbench" | "codex" | "claude" | "command";
|
|
3
|
-
export type WorkbenchEngineHelperAdapterId = "rubric" | "tests";
|
|
4
|
-
export type WorkbenchBuiltInAdapterId = WorkbenchPublicBuiltInAdapterId | WorkbenchEngineHelperAdapterId;
|
|
5
|
-
export declare function builtinWorkbenchAdapterManifest(id: string): WorkbenchAdapterManifest | null;
|
|
6
|
-
export declare function builtinWorkbenchAdapterManifests(): WorkbenchAdapterManifest[];
|
|
7
|
-
export declare function isWorkbenchBuiltInAdapterId(id: string): id is WorkbenchBuiltInAdapterId;
|
|
8
|
-
export declare function adapterCommandName(adapterId: string): string;
|
|
1
|
+
export { adapterCommandName, builtinWorkbenchAdapterManifest, builtinWorkbenchAdapterManifests, isWorkbenchBuiltInAdapterId, type WorkbenchBuiltInAdapterId, type WorkbenchEngineHelperAdapterId, type WorkbenchPublicBuiltInAdapterId, } from "@workbench-ai/workbench-protocol";
|
|
9
2
|
//# sourceMappingURL=manifests.d.ts.map
|
package/dist/manifests.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"manifests.d.ts","sourceRoot":"","sources":["../src/manifests.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"manifests.d.ts","sourceRoot":"","sources":["../src/manifests.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,kBAAkB,EAClB,+BAA+B,EAC/B,gCAAgC,EAChC,2BAA2B,EAC3B,KAAK,yBAAyB,EAC9B,KAAK,8BAA8B,EACnC,KAAK,+BAA+B,GACrC,MAAM,kCAAkC,CAAC"}
|
package/dist/manifests.js
CHANGED
|
@@ -1,100 +1 @@
|
|
|
1
|
-
|
|
2
|
-
const BUILT_IN_ADAPTER_MANIFESTS = Object.fromEntries(Object.entries({
|
|
3
|
-
workbench: defineAdapter({
|
|
4
|
-
id: "workbench",
|
|
5
|
-
engineResolve: defineEngineResolver(),
|
|
6
|
-
engineRun: defineEngineRunner({ executor: "host" }),
|
|
7
|
-
slots: {
|
|
8
|
-
score: adapterSlot("/score", "engine.run"),
|
|
9
|
-
},
|
|
10
|
-
}),
|
|
11
|
-
codex: defineAdapter({
|
|
12
|
-
id: "codex",
|
|
13
|
-
candidate: defineCandidate(),
|
|
14
|
-
improve: defineImprover(),
|
|
15
|
-
setup: [
|
|
16
|
-
"npm install --global @openai/codex@0.125.0",
|
|
17
|
-
],
|
|
18
|
-
auth: {
|
|
19
|
-
methods: {
|
|
20
|
-
oauth: { files: [{ path: ".codex/auth.json" }] },
|
|
21
|
-
"api-key": { env: [{ name: "OPENAI_API_KEY" }] },
|
|
22
|
-
},
|
|
23
|
-
},
|
|
24
|
-
}),
|
|
25
|
-
claude: defineAdapter({
|
|
26
|
-
id: "claude",
|
|
27
|
-
candidate: defineCandidate(),
|
|
28
|
-
improve: defineImprover(),
|
|
29
|
-
setup: [
|
|
30
|
-
"npm install --global @anthropic-ai/claude-code@2.1.119",
|
|
31
|
-
],
|
|
32
|
-
auth: {
|
|
33
|
-
methods: {
|
|
34
|
-
oauth: {
|
|
35
|
-
files: [
|
|
36
|
-
{ path: ".claude.json" },
|
|
37
|
-
{ path: ".claude/oauth-token", required: false },
|
|
38
|
-
{ path: ".claude/.credentials.json", required: false },
|
|
39
|
-
],
|
|
40
|
-
},
|
|
41
|
-
"api-key": { env: [{ name: "ANTHROPIC_API_KEY" }] },
|
|
42
|
-
bedrock: {
|
|
43
|
-
env: [
|
|
44
|
-
{ name: "CLAUDE_CODE_USE_BEDROCK" },
|
|
45
|
-
{ name: "AWS_ACCESS_KEY_ID", required: false },
|
|
46
|
-
{ name: "AWS_SECRET_ACCESS_KEY", required: false },
|
|
47
|
-
{ name: "AWS_SESSION_TOKEN", required: false },
|
|
48
|
-
{ name: "AWS_REGION" },
|
|
49
|
-
{ name: "AWS_DEFAULT_REGION", required: false },
|
|
50
|
-
{ name: "AWS_BEARER_TOKEN_BEDROCK", required: false },
|
|
51
|
-
{ name: "ANTHROPIC_MODEL", required: false },
|
|
52
|
-
{ name: "ANTHROPIC_SMALL_FAST_MODEL", required: false },
|
|
53
|
-
],
|
|
54
|
-
},
|
|
55
|
-
},
|
|
56
|
-
},
|
|
57
|
-
}),
|
|
58
|
-
command: defineAdapter({
|
|
59
|
-
id: "command",
|
|
60
|
-
candidate: defineCandidate(),
|
|
61
|
-
engineRun: defineEngineRunner(),
|
|
62
|
-
improve: defineImprover(),
|
|
63
|
-
}),
|
|
64
|
-
rubric: defineAdapter({
|
|
65
|
-
id: "rubric",
|
|
66
|
-
engineRun: defineEngineRunner(),
|
|
67
|
-
slots: {
|
|
68
|
-
judge: adapterSlot("/judge", "candidate.run"),
|
|
69
|
-
},
|
|
70
|
-
}),
|
|
71
|
-
tests: defineAdapter({
|
|
72
|
-
id: "tests",
|
|
73
|
-
engineRun: defineEngineRunner(),
|
|
74
|
-
}),
|
|
75
|
-
}).map(([id, definition]) => [id, workbenchAdapterManifestFromDefinition(definition)]));
|
|
76
|
-
export function builtinWorkbenchAdapterManifest(id) {
|
|
77
|
-
return isWorkbenchBuiltInAdapterId(id)
|
|
78
|
-
? cloneManifest(BUILT_IN_ADAPTER_MANIFESTS[id])
|
|
79
|
-
: null;
|
|
80
|
-
}
|
|
81
|
-
export function builtinWorkbenchAdapterManifests() {
|
|
82
|
-
return Object.keys(BUILT_IN_ADAPTER_MANIFESTS)
|
|
83
|
-
.sort()
|
|
84
|
-
.map((id) => cloneManifest(BUILT_IN_ADAPTER_MANIFESTS[id]));
|
|
85
|
-
}
|
|
86
|
-
export function isWorkbenchBuiltInAdapterId(id) {
|
|
87
|
-
return Object.prototype.hasOwnProperty.call(BUILT_IN_ADAPTER_MANIFESTS, id);
|
|
88
|
-
}
|
|
89
|
-
export function adapterCommandName(adapterId) {
|
|
90
|
-
return `workbench-adapter-${adapterId}`;
|
|
91
|
-
}
|
|
92
|
-
function cloneManifest(manifest) {
|
|
93
|
-
return {
|
|
94
|
-
...manifest,
|
|
95
|
-
operations: JSON.parse(JSON.stringify(manifest.operations)),
|
|
96
|
-
setup: [...manifest.setup],
|
|
97
|
-
...(manifest.auth ? { auth: JSON.parse(JSON.stringify(manifest.auth)) } : {}),
|
|
98
|
-
...(manifest.slots ? { slots: JSON.parse(JSON.stringify(manifest.slots)) } : {}),
|
|
99
|
-
};
|
|
100
|
-
}
|
|
1
|
+
export { adapterCommandName, builtinWorkbenchAdapterManifest, builtinWorkbenchAdapterManifests, isWorkbenchBuiltInAdapterId, } from "@workbench-ai/workbench-protocol";
|
package/dist/runtime.js
CHANGED
|
@@ -12,11 +12,11 @@ export async function importWorkbenchRuntime() {
|
|
|
12
12
|
return await runtimeModule;
|
|
13
13
|
}
|
|
14
14
|
async function importWorkbenchRuntimeUncached() {
|
|
15
|
-
const
|
|
15
|
+
const skills = runtimeImportOptions();
|
|
16
16
|
let lastError;
|
|
17
|
-
for (const
|
|
17
|
+
for (const skill of skills) {
|
|
18
18
|
try {
|
|
19
|
-
return await import(__rewriteRelativeImportExtension(
|
|
19
|
+
return await import(__rewriteRelativeImportExtension(skill));
|
|
20
20
|
}
|
|
21
21
|
catch (error) {
|
|
22
22
|
lastError = error;
|
|
@@ -24,11 +24,11 @@ async function importWorkbenchRuntimeUncached() {
|
|
|
24
24
|
}
|
|
25
25
|
throw new Error(`Unable to load @workbench-ai/workbench-core for built-in adapters: ${lastError instanceof Error ? lastError.message : String(lastError)}`);
|
|
26
26
|
}
|
|
27
|
-
function
|
|
27
|
+
function runtimeImportOptions() {
|
|
28
28
|
return [
|
|
29
29
|
process.env.WORKBENCH_RUNTIME_IMPORT,
|
|
30
30
|
"/app/products/workbench/packages/core/src/index.ts",
|
|
31
31
|
new URL("../../core/src/index.ts", import.meta.url).href,
|
|
32
32
|
"@workbench-ai/workbench-core",
|
|
33
|
-
].filter((
|
|
33
|
+
].filter((skill) => typeof skill === "string" && skill.length > 0);
|
|
34
34
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@workbench-ai/workbench-built-in-adapters",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.68",
|
|
4
4
|
"repository": {
|
|
5
5
|
"type": "git",
|
|
6
6
|
"url": "git+https://github.com/workbench-ai/workbench.git",
|
|
@@ -33,11 +33,11 @@
|
|
|
33
33
|
"dependencies": {
|
|
34
34
|
"yaml": "^2.8.2",
|
|
35
35
|
"@workbench-ai/agent-driver-anthropic-claude-code": "0.0.46",
|
|
36
|
+
"@workbench-ai/workbench-core": "0.0.68",
|
|
37
|
+
"@workbench-ai/workbench-contract": "0.0.68",
|
|
38
|
+
"@workbench-ai/workbench-protocol": "0.0.68",
|
|
36
39
|
"@workbench-ai/agent-driver-openai-codex": "0.0.46",
|
|
37
|
-
"@workbench-ai/
|
|
38
|
-
"@workbench-ai/agent-driver": "0.0.46",
|
|
39
|
-
"@workbench-ai/workbench-core": "0.0.66",
|
|
40
|
-
"@workbench-ai/workbench-protocol": "0.0.66"
|
|
40
|
+
"@workbench-ai/agent-driver": "0.0.46"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
43
|
"@types/node": "^24.3.1",
|