@workbench-ai/workbench-built-in-adapters 0.0.49 → 0.0.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-turn.d.ts +8 -1
- package/dist/agent-turn.d.ts.map +1 -1
- package/dist/agent-turn.js +21 -6
- package/dist/execute.d.ts.map +1 -1
- package/dist/execute.js +199 -149
- package/dist/manifests.js +8 -8
- package/dist/runtime.js +5 -5
- package/package.json +5 -5
package/dist/agent-turn.d.ts
CHANGED
|
@@ -7,7 +7,7 @@ export interface AgentProviderSpec {
|
|
|
7
7
|
effort?: string;
|
|
8
8
|
}
|
|
9
9
|
export interface WorkbenchAgentTurnRequest {
|
|
10
|
-
role: "
|
|
10
|
+
role: "improver" | "runner" | "engine";
|
|
11
11
|
provider: AgentProviderSpec;
|
|
12
12
|
adapterAuthRoot?: string;
|
|
13
13
|
adapterAuthRequest?: JsonValue;
|
|
@@ -29,4 +29,11 @@ export interface WorkbenchAgentTurnResult {
|
|
|
29
29
|
export type WorkbenchAgentTurnExecutor = (request: WorkbenchAgentTurnRequest) => Promise<WorkbenchAgentTurnResult>;
|
|
30
30
|
export declare function executeWorkbenchAgentTurn(executor: (request: WorkbenchAgentTurnRequest) => Promise<WorkbenchAgentTurnResult>, request: WorkbenchAgentTurnRequest): Promise<WorkbenchAgentTurnResult>;
|
|
31
31
|
export declare function defaultWorkbenchAgentTurnExecutor(request: WorkbenchAgentTurnRequest): Promise<WorkbenchAgentTurnResult>;
|
|
32
|
+
export declare function resolveAgentTurnTimeouts(defaults: {
|
|
33
|
+
turn_timeout_ms?: number;
|
|
34
|
+
stall_timeout_ms?: number;
|
|
35
|
+
}): {
|
|
36
|
+
turnTimeoutMs: number;
|
|
37
|
+
stallTimeoutMs: number;
|
|
38
|
+
};
|
|
32
39
|
//# sourceMappingURL=agent-turn.d.ts.map
|
package/dist/agent-turn.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent-turn.d.ts","sourceRoot":"","sources":["../src/agent-turn.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAEV,mBAAmB,EACnB,YAAY,EACb,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAQL,KAAK,SAAS,EAEf,MAAM,4BAA4B,CAAC;AACpC,OAAO,KAAK,EACV,gCAAgC,EACjC,MAAM,8BAA8B,CAAC;
|
|
1
|
+
{"version":3,"file":"agent-turn.d.ts","sourceRoot":"","sources":["../src/agent-turn.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAEV,mBAAmB,EACnB,YAAY,EACb,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAQL,KAAK,SAAS,EAEf,MAAM,4BAA4B,CAAC;AACpC,OAAO,KAAK,EACV,gCAAgC,EACjC,MAAM,8BAA8B,CAAC;AAiBtC,MAAM,WAAW,iBAAiB;IAChC,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,yBAAyB;IACxC,IAAI,EAAE,UAAU,GAAG,QAAQ,GAAG,QAAQ,CAAC;IACvC,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,kBAAkB,CAAC,EAAE,SAAS,CAAC;IAC/B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACxC,aAAa,EAAE,MAAM,CAAC;IACtB,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,cAAc,CAAC,EAAE,gCAAgC,CAAC;CACnD;AAED,MAAM,WAAW,wBAAwB;IACvC,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,mBAAmB,EAAE,CAAC;IAClC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IACpC,KAAK,CAAC,EAAE,YAAY,CAAC;CACtB;AAED,MAAM,MAAM,0BAA0B,GAAG,CAAC,OAAO,EAAE,yBAAyB,KAAK,OAAO,CAAC,wBAAwB,CAAC,CAAC;AA4BnH,wBAAsB,yBAAyB,CAC7C,QAAQ,EAAE,CAAC,OAAO,EAAE,yBAAyB,KAAK,OAAO,CAAC,wBAAwB,CAAC,EACnF,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,wBAAwB,CAAC,CAenC;AAED,wBAAsB,iCAAiC,CACrD,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,wBAAwB,CAAC,CAwFnC;AAkHD,wBAAgB,wBAAwB,CAAC,QAAQ,EAAE;IACjD,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B,GAAG;IACF,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAQA"}
|
package/dist/agent-turn.js
CHANGED
|
@@ -8,6 +8,8 @@ import { importWorkbenchRuntime } from "./runtime.js";
|
|
|
8
8
|
const DEFAULT_AGENT_TURN_MAX_ATTEMPTS = 3;
|
|
9
9
|
const DEFAULT_AGENT_TURN_RETRY_BASE_MS = 5_000;
|
|
10
10
|
const DEFAULT_AGENT_TURN_RETRY_MAX_MS = 30_000;
|
|
11
|
+
const DEFAULT_AGENT_TURN_TIMEOUT_MS = 3_600_000;
|
|
12
|
+
const DEFAULT_AGENT_STALL_TIMEOUT_MS = 300_000;
|
|
11
13
|
const AGENT_PROVIDER_REGISTRY = {
|
|
12
14
|
codex: {
|
|
13
15
|
executable: "codex",
|
|
@@ -201,14 +203,14 @@ function agentProviderRegistration(providerName) {
|
|
|
201
203
|
return registration;
|
|
202
204
|
}
|
|
203
205
|
async function buildAgentExecutionPlan(provider, providerSpec, workspaceRoot, agentHome, adapterAuth) {
|
|
204
|
-
const turnTimeoutMs = provider.manifest.defaults
|
|
206
|
+
const { turnTimeoutMs, stallTimeoutMs } = resolveAgentTurnTimeouts(provider.manifest.defaults);
|
|
205
207
|
const harness = {
|
|
206
208
|
id: provider.manifest.id,
|
|
207
209
|
auth: await resolveAgentAuth(provider, providerSpec, workspaceRoot, agentHome, adapterAuth),
|
|
208
210
|
...(firstNonEmpty(providerSpec.model, provider.manifest.defaults.model) ? { model: firstNonEmpty(providerSpec.model, provider.manifest.defaults.model) } : {}),
|
|
209
211
|
...(firstNonEmpty(providerSpec.effort, provider.manifest.defaults.effort) ? { effort: firstNonEmpty(providerSpec.effort, provider.manifest.defaults.effort) } : {}),
|
|
210
212
|
turn_timeout_ms: turnTimeoutMs,
|
|
211
|
-
stall_timeout_ms:
|
|
213
|
+
stall_timeout_ms: stallTimeoutMs,
|
|
212
214
|
config: resolveAgentConfig(provider, defaultWorkbenchAgentConfig(provider, providerSpec.use)),
|
|
213
215
|
retry: DEFAULT_HARNESS_RETRY,
|
|
214
216
|
cancel: DEFAULT_HARNESS_CANCEL,
|
|
@@ -221,6 +223,19 @@ async function buildAgentExecutionPlan(provider, providerSpec, workspaceRoot, ag
|
|
|
221
223
|
harness,
|
|
222
224
|
};
|
|
223
225
|
}
|
|
226
|
+
export function resolveAgentTurnTimeouts(defaults) {
|
|
227
|
+
const turnTimeoutMs = positiveTimeoutMs(defaults.turn_timeout_ms) ?? DEFAULT_AGENT_TURN_TIMEOUT_MS;
|
|
228
|
+
const requestedStallTimeoutMs = positiveTimeoutMs(defaults.stall_timeout_ms) ?? DEFAULT_AGENT_STALL_TIMEOUT_MS;
|
|
229
|
+
return {
|
|
230
|
+
turnTimeoutMs,
|
|
231
|
+
stallTimeoutMs: Math.min(requestedStallTimeoutMs, turnTimeoutMs),
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
function positiveTimeoutMs(value) {
|
|
235
|
+
return typeof value === "number" && Number.isFinite(value) && value > 0
|
|
236
|
+
? value
|
|
237
|
+
: null;
|
|
238
|
+
}
|
|
224
239
|
function defaultWorkbenchAgentConfig(provider, providerName) {
|
|
225
240
|
const fallback = (provider.manifest.defaults.config ?? {});
|
|
226
241
|
return {
|
|
@@ -229,9 +244,9 @@ function defaultWorkbenchAgentConfig(provider, providerName) {
|
|
|
229
244
|
};
|
|
230
245
|
}
|
|
231
246
|
async function resolveAgentAuth(provider, providerSpec, workspaceRoot, agentHome, adapterAuth) {
|
|
232
|
-
const
|
|
247
|
+
const candidate = adapterAuthProviderCandidate(adapterAuth.request, providerSpec.use) ??
|
|
233
248
|
(provider.manifest.defaults.auth ?? {});
|
|
234
|
-
const parsed = provider.schemas.auth.safeParse(
|
|
249
|
+
const parsed = provider.schemas.auth.safeParse(candidate);
|
|
235
250
|
if (!parsed.success) {
|
|
236
251
|
throw new Error(`Agent provider "${provider.manifest.id}" auth is invalid: ${formatValidationIssues(parsed.error.issues)}`);
|
|
237
252
|
}
|
|
@@ -239,7 +254,7 @@ async function resolveAgentAuth(provider, providerSpec, workspaceRoot, agentHome
|
|
|
239
254
|
void agentHome;
|
|
240
255
|
return { ...parsed.data };
|
|
241
256
|
}
|
|
242
|
-
function
|
|
257
|
+
function adapterAuthProviderCandidate(auth, providerName) {
|
|
243
258
|
const record = jsonRecord(auth);
|
|
244
259
|
const self = jsonRecord(record?.self);
|
|
245
260
|
const adapters = jsonRecord(record?.adapters);
|
|
@@ -338,7 +353,7 @@ function isTransientAgentTurnError(error) {
|
|
|
338
353
|
if (isNativeCaCertificateFailure(message)) {
|
|
339
354
|
return false;
|
|
340
355
|
}
|
|
341
|
-
return /\b(fetch failed|error sending request|stream disconnected before completion|ECONNRESET|ETIMEDOUT|EAI_AGAIN|ENETUNREACH|ECONNREFUSED|socket hang up|network error|UND_ERR_|signal SIGTERM)/iu.test(message);
|
|
356
|
+
return /\b(fetch failed|error sending request|stream disconnected before completion|turn stalled after \d+ms|ECONNRESET|ETIMEDOUT|EAI_AGAIN|ENETUNREACH|ECONNREFUSED|socket hang up|network error|UND_ERR_|signal SIGTERM)/iu.test(message);
|
|
342
357
|
}
|
|
343
358
|
function isNativeCaCertificateFailure(message) {
|
|
344
359
|
return /\bno native root CA certificates found\b|install ca-certificates/iu.test(message);
|
package/dist/execute.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"execute.d.ts","sourceRoot":"","sources":["../src/execute.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"execute.d.ts","sourceRoot":"","sources":["../src/execute.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAEV,IAAI,EAKL,MAAM,kCAAkC,CAAC;AAc1C,OAAO,KAAK,EAEV,0BAA0B,EAG3B,MAAM,iBAAiB,CAAC;AAQzB,MAAM,WAAW,4CAA4C;IAC3D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,0BAA0B,CAAC;IAC3C,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,kBAAkB,CAAC,EAAE,IAAI,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;AA4CD,wBAAsB,qCAAqC,CACzD,IAAI,GAAE,4CAAiD,GACtD,OAAO,CAAC,IAAI,CAAC,CAiEf"}
|
package/dist/execute.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { spawn } from "node:child_process";
|
|
2
2
|
import { promises as fs } from "node:fs";
|
|
3
|
+
import os from "node:os";
|
|
3
4
|
import path from "node:path";
|
|
4
5
|
import { ensureWorkbenchAdapterOutputDir, readWorkbenchAdapterOperationResult, readWorkbenchAdapterOperationRequest, runWorkbenchRuntimeOperationSequence, writeWorkbenchAdapterOperationResult, workbenchAdapterOperationResultPath, } from "@workbench-ai/workbench-protocol";
|
|
5
6
|
import YAML from "yaml";
|
|
@@ -47,8 +48,8 @@ export async function executeWorkbenchBuiltInAdapterCommand(args = {}) {
|
|
|
47
48
|
if (isBuiltInAgentAdapterId(adapterId)) {
|
|
48
49
|
const workload = workloadFromAdapterOperationRequest(request);
|
|
49
50
|
const agent = builtInAgentSpecFromRequest(request);
|
|
50
|
-
if (request.operation === "
|
|
51
|
-
await
|
|
51
|
+
if (request.operation === "candidate.improve") {
|
|
52
|
+
await writeAgentCandidateRevisionOutput(request, workload, agent, {
|
|
52
53
|
agentExecutor: args.agentExecutor,
|
|
53
54
|
adapterAuthRoot: args.adapterAuthRoot,
|
|
54
55
|
adapterAuthRequest: args.adapterAuthRequest ?? request.auth,
|
|
@@ -56,8 +57,8 @@ export async function executeWorkbenchBuiltInAdapterCommand(args = {}) {
|
|
|
56
57
|
});
|
|
57
58
|
return;
|
|
58
59
|
}
|
|
59
|
-
if (request.operation === "
|
|
60
|
-
await
|
|
60
|
+
if (request.operation === "candidate.run") {
|
|
61
|
+
await writeAgentCandidateOutput(request, workload, agent, {
|
|
61
62
|
agentExecutor: args.agentExecutor,
|
|
62
63
|
adapterAuthRoot: args.adapterAuthRoot,
|
|
63
64
|
adapterAuthRequest: args.adapterAuthRequest ?? request.auth,
|
|
@@ -123,7 +124,7 @@ async function workbenchEngineOutcomeUsage(outcome) {
|
|
|
123
124
|
const operationUsage = outcome.usage
|
|
124
125
|
? undefined
|
|
125
126
|
: runtime.mergeUsageSummaries(outcome.operationResults.map((result) => {
|
|
126
|
-
if (result.operation === "
|
|
127
|
+
if (result.operation === "candidate.run") {
|
|
127
128
|
return runtime.assignUsageRole("runner", result.usage);
|
|
128
129
|
}
|
|
129
130
|
if (result.operation === "engine.run") {
|
|
@@ -163,16 +164,16 @@ function workbenchEngineScoreInvocation(request) {
|
|
|
163
164
|
: adapterCommandName(score.use),
|
|
164
165
|
};
|
|
165
166
|
}
|
|
166
|
-
function
|
|
167
|
-
const
|
|
168
|
-
if (!
|
|
169
|
-
throw new Error("Workbench engine requires context.
|
|
167
|
+
function workbenchEngineCandidateInvocation(request) {
|
|
168
|
+
const candidate = request.context?.candidate?.run;
|
|
169
|
+
if (!candidate?.use || !candidate.command) {
|
|
170
|
+
throw new Error("Workbench engine requires context.candidate.run.use and context.candidate.run.command.");
|
|
170
171
|
}
|
|
171
172
|
return {
|
|
172
|
-
use:
|
|
173
|
-
with: (
|
|
174
|
-
...(
|
|
175
|
-
command:
|
|
173
|
+
use: candidate.use,
|
|
174
|
+
with: (candidate.with ?? {}),
|
|
175
|
+
...(candidate.auth !== undefined ? { auth: candidate.auth } : {}),
|
|
176
|
+
command: candidate.command,
|
|
176
177
|
};
|
|
177
178
|
}
|
|
178
179
|
function workbenchEngineGradingIsolation(request) {
|
|
@@ -188,13 +189,13 @@ function workbenchEngineGradingIsolation(request) {
|
|
|
188
189
|
}
|
|
189
190
|
async function runWorkbenchEngineSharedGrading(request) {
|
|
190
191
|
const inputs = await workbenchEngineRuntimeInputs(request);
|
|
191
|
-
const
|
|
192
|
+
const candidate = workbenchEngineCandidateInvocation(request);
|
|
192
193
|
const score = workbenchEngineScoreInvocation(request);
|
|
193
194
|
const result = await runWorkbenchRuntimeOperationSequence({
|
|
194
195
|
inputs,
|
|
195
196
|
prepare: true,
|
|
196
197
|
operations: [
|
|
197
|
-
{ label: "
|
|
198
|
+
{ label: "candidate", operation: "candidate.run", invocation: candidate },
|
|
198
199
|
{ label: "score", operation: "engine.run", invocation: score },
|
|
199
200
|
],
|
|
200
201
|
});
|
|
@@ -203,25 +204,25 @@ async function runWorkbenchEngineSharedGrading(request) {
|
|
|
203
204
|
}
|
|
204
205
|
async function runWorkbenchEngineSeparateGrading(request) {
|
|
205
206
|
const inputs = await workbenchEngineRuntimeInputs(request);
|
|
206
|
-
const
|
|
207
|
+
const candidate = workbenchEngineCandidateInvocation(request);
|
|
207
208
|
const score = workbenchEngineScoreInvocation(request);
|
|
208
209
|
const runtime = await importWorkbenchRuntime();
|
|
209
210
|
const runner = await runWorkbenchRuntimeOperationSequence({
|
|
210
211
|
inputs: {
|
|
211
|
-
|
|
212
|
+
candidate: inputs.candidate,
|
|
212
213
|
case: inputs.case,
|
|
213
214
|
traces: inputs.traces,
|
|
214
215
|
},
|
|
215
216
|
prepare: true,
|
|
216
217
|
collectWorkspace: true,
|
|
217
218
|
operations: [
|
|
218
|
-
{ label: "
|
|
219
|
+
{ label: "candidate", operation: "candidate.run", invocation: candidate },
|
|
219
220
|
],
|
|
220
221
|
});
|
|
221
222
|
assertRuntimeControlResultOk(runner, "Workbench separate runner");
|
|
222
223
|
const grader = await runWorkbenchRuntimeOperationSequence({
|
|
223
224
|
inputs: {
|
|
224
|
-
|
|
225
|
+
candidate: inputs.candidate,
|
|
225
226
|
case: inputs.case,
|
|
226
227
|
enginePrivate: inputs.enginePrivate,
|
|
227
228
|
traces: inputs.traces,
|
|
@@ -243,14 +244,14 @@ async function runWorkbenchEngineSeparateGrading(request) {
|
|
|
243
244
|
};
|
|
244
245
|
}
|
|
245
246
|
async function workbenchEngineRuntimeInputs(request) {
|
|
246
|
-
const [
|
|
247
|
-
readOptionalSurfaceFiles(request.paths.
|
|
247
|
+
const [candidate, caseFiles, enginePrivate, traces] = await Promise.all([
|
|
248
|
+
readOptionalSurfaceFiles(request.paths.candidate),
|
|
248
249
|
readOptionalSurfaceFiles(request.paths.case),
|
|
249
250
|
readOptionalSurfaceFiles(request.paths.enginePrivate),
|
|
250
251
|
readOptionalSurfaceFiles(request.paths.traces),
|
|
251
252
|
]);
|
|
252
253
|
return {
|
|
253
|
-
|
|
254
|
+
candidate,
|
|
254
255
|
case: caseFiles,
|
|
255
256
|
enginePrivate,
|
|
256
257
|
traces,
|
|
@@ -309,12 +310,20 @@ function safeInternalPathSegment(value) {
|
|
|
309
310
|
}
|
|
310
311
|
async function executeCommandAdapterRequest(request) {
|
|
311
312
|
const command = requiredAdapterCommandString(request, "command");
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
313
|
+
const before = request.operation === "candidate.improve"
|
|
314
|
+
? await snapshotEditableCandidateWorkspace(request)
|
|
315
|
+
: null;
|
|
316
|
+
try {
|
|
317
|
+
await runAdapterShellCommand(command, request.paths.workspace);
|
|
318
|
+
if (request.operation === "engine.run") {
|
|
319
|
+
await requireCommandScoreResult(request);
|
|
320
|
+
return;
|
|
321
|
+
}
|
|
322
|
+
await writeOperationOkUnlessPresent(request, before?.root);
|
|
323
|
+
}
|
|
324
|
+
finally {
|
|
325
|
+
await before?.cleanup();
|
|
316
326
|
}
|
|
317
|
-
await writeOperationOkUnlessPresent(request);
|
|
318
327
|
}
|
|
319
328
|
async function requireCommandScoreResult(request) {
|
|
320
329
|
if (!await fileExists(workbenchAdapterOperationResultPath(request.paths.output))) {
|
|
@@ -379,15 +388,15 @@ async function runAdapterShellCommand(command, cwd, env = {}) {
|
|
|
379
388
|
});
|
|
380
389
|
});
|
|
381
390
|
}
|
|
382
|
-
async function writeOperationOkUnlessPresent(request) {
|
|
391
|
+
async function writeOperationOkUnlessPresent(request, beforeRoot) {
|
|
383
392
|
if (await fileExists(workbenchAdapterOperationResultPath(request.paths.output))) {
|
|
384
393
|
return;
|
|
385
394
|
}
|
|
386
|
-
if (request.operation === "
|
|
387
|
-
const patch = await
|
|
388
|
-
beforeRoot: requiredRequestPath(request.paths.
|
|
395
|
+
if (request.operation === "candidate.improve") {
|
|
396
|
+
const patch = await createCandidatePatchFromWorkspace({
|
|
397
|
+
beforeRoot: beforeRoot ?? requiredRequestPath(request.paths.candidate, "paths.candidate"),
|
|
389
398
|
afterRoot: request.paths.workspace,
|
|
390
|
-
edits: request.context?.
|
|
399
|
+
edits: request.context?.improve?.edits ?? [],
|
|
391
400
|
});
|
|
392
401
|
await writeWorkbenchAdapterOperationResult(request.paths.output, {
|
|
393
402
|
protocol: "workbench.adapter-result.v1",
|
|
@@ -403,6 +412,41 @@ async function writeOperationOkUnlessPresent(request) {
|
|
|
403
412
|
ok: true,
|
|
404
413
|
});
|
|
405
414
|
}
|
|
415
|
+
async function snapshotEditableCandidateWorkspace(request) {
|
|
416
|
+
const root = await fs.mkdtemp(path.join(os.tmpdir(), "workbench-candidate-before-"));
|
|
417
|
+
const edits = request.context?.improve?.edits ?? [];
|
|
418
|
+
const files = await readEditableCandidateWorkspaceFiles(request.paths.workspace, edits);
|
|
419
|
+
await writeSurfaceFiles(root, files);
|
|
420
|
+
return {
|
|
421
|
+
root,
|
|
422
|
+
cleanup: async () => {
|
|
423
|
+
await fs.rm(root, { recursive: true, force: true }).catch(() => undefined);
|
|
424
|
+
},
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
async function readEditableCandidateWorkspaceFiles(root, edits) {
|
|
428
|
+
const files = [];
|
|
429
|
+
for (const edit of edits) {
|
|
430
|
+
const normalized = normalizeRelativePath(edit);
|
|
431
|
+
if (!normalized || isRuntimeWorkspacePath(normalized)) {
|
|
432
|
+
continue;
|
|
433
|
+
}
|
|
434
|
+
const absolutePath = path.join(root, normalized);
|
|
435
|
+
const stat = await fs.stat(absolutePath).catch(() => null);
|
|
436
|
+
if (!stat) {
|
|
437
|
+
continue;
|
|
438
|
+
}
|
|
439
|
+
if (stat.isDirectory()) {
|
|
440
|
+
await readSurfaceFilesInto(root, normalized, files);
|
|
441
|
+
continue;
|
|
442
|
+
}
|
|
443
|
+
if (stat.isFile()) {
|
|
444
|
+
files.push(await readSurfaceFile(root, normalized));
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
return dedupeSurfaceFiles(files.filter((file) => isCandidateEditPath(file.path, edits) &&
|
|
448
|
+
!isRuntimeWorkspacePath(file.path)));
|
|
449
|
+
}
|
|
406
450
|
async function firstExistingFile(files) {
|
|
407
451
|
for (const file of files) {
|
|
408
452
|
const stat = await fs.stat(file).catch(() => null);
|
|
@@ -539,21 +583,25 @@ async function readSurfaceFilesInto(root, relativeDir, result) {
|
|
|
539
583
|
if (!entry.isFile()) {
|
|
540
584
|
continue;
|
|
541
585
|
}
|
|
542
|
-
|
|
543
|
-
fs.readFile(absolutePath),
|
|
544
|
-
fs.stat(absolutePath),
|
|
545
|
-
]);
|
|
546
|
-
const text = body.toString("utf8");
|
|
547
|
-
const isUtf8 = Buffer.from(text, "utf8").equals(body);
|
|
548
|
-
result.push({
|
|
549
|
-
path: relativePath,
|
|
550
|
-
kind: isUtf8 ? "text" : "binary",
|
|
551
|
-
encoding: isUtf8 ? "utf8" : "base64",
|
|
552
|
-
content: isUtf8 ? text : body.toString("base64"),
|
|
553
|
-
executable: (stat.mode & 0o111) !== 0,
|
|
554
|
-
});
|
|
586
|
+
result.push(await readSurfaceFile(root, relativePath));
|
|
555
587
|
}
|
|
556
588
|
}
|
|
589
|
+
async function readSurfaceFile(root, relativePath) {
|
|
590
|
+
const absolutePath = path.join(root, normalizeRelativePath(relativePath));
|
|
591
|
+
const [body, stat] = await Promise.all([
|
|
592
|
+
fs.readFile(absolutePath),
|
|
593
|
+
fs.stat(absolutePath),
|
|
594
|
+
]);
|
|
595
|
+
const text = body.toString("utf8");
|
|
596
|
+
const isUtf8 = Buffer.from(text, "utf8").equals(body);
|
|
597
|
+
return {
|
|
598
|
+
path: normalizeRelativePath(relativePath),
|
|
599
|
+
kind: isUtf8 ? "text" : "binary",
|
|
600
|
+
encoding: isUtf8 ? "utf8" : "base64",
|
|
601
|
+
content: isUtf8 ? text : body.toString("base64"),
|
|
602
|
+
executable: (stat.mode & 0o111) !== 0,
|
|
603
|
+
};
|
|
604
|
+
}
|
|
557
605
|
async function fileExists(filePath) {
|
|
558
606
|
return fs.stat(filePath).then((stat) => stat.isFile(), () => false);
|
|
559
607
|
}
|
|
@@ -644,13 +692,13 @@ function workloadFromAdapterOperationRequest(request) {
|
|
|
644
692
|
name: context.benchmark?.name ?? "",
|
|
645
693
|
description: context.benchmark?.description ?? "",
|
|
646
694
|
},
|
|
647
|
-
|
|
648
|
-
path: context.
|
|
695
|
+
candidate: {
|
|
696
|
+
path: context.candidate?.path ?? "",
|
|
649
697
|
},
|
|
650
|
-
|
|
651
|
-
edits: context.
|
|
698
|
+
improve: {
|
|
699
|
+
edits: context.improve?.edits ?? [],
|
|
652
700
|
},
|
|
653
|
-
|
|
701
|
+
candidateId: context.candidate?.id ?? "",
|
|
654
702
|
attemptIndex: attempt.attemptIndex ?? 0,
|
|
655
703
|
sampleIndex: attempt.sampleIndex ?? 0,
|
|
656
704
|
caseId: attempt.caseId ?? "",
|
|
@@ -726,35 +774,35 @@ async function executeBuiltInAgentTurn(executor, request) {
|
|
|
726
774
|
const { defaultWorkbenchAgentTurnExecutor, executeWorkbenchAgentTurn, } = await import("./agent-turn.js");
|
|
727
775
|
return await executeWorkbenchAgentTurn(executor ?? defaultWorkbenchAgentTurnExecutor, request);
|
|
728
776
|
}
|
|
729
|
-
async function
|
|
730
|
-
if (request.operation !== "
|
|
731
|
-
throw new Error("Agent
|
|
777
|
+
async function writeAgentCandidateOutput(request, workload, candidate, options = {}) {
|
|
778
|
+
if (request.operation !== "candidate.run") {
|
|
779
|
+
throw new Error("Agent candidate results can only complete candidate.run operations.");
|
|
732
780
|
}
|
|
733
|
-
const traceRoot = path.join(request.paths.output, ".workbench", "internal", "agent-
|
|
781
|
+
const traceRoot = path.join(request.paths.output, ".workbench", "internal", "agent-candidate");
|
|
734
782
|
const agentResult = await executeBuiltInAgentTurn(options.agentExecutor, {
|
|
735
783
|
role: "runner",
|
|
736
|
-
provider:
|
|
784
|
+
provider: candidate.agent,
|
|
737
785
|
adapterAuthRoot: options.adapterAuthRoot,
|
|
738
786
|
adapterAuthRequest: options.adapterAuthRequest,
|
|
739
787
|
adapterAuthEnv: options.adapterAuthEnv,
|
|
740
788
|
workspaceRoot: request.paths.workspace,
|
|
741
789
|
cwd: request.paths.workspace,
|
|
742
|
-
prompt:
|
|
790
|
+
prompt: buildAgentCandidatePrompt(workload, candidate),
|
|
743
791
|
traceRoot,
|
|
744
792
|
jobId: workload.job.id,
|
|
745
793
|
});
|
|
746
|
-
const outputPath = path.join(request.paths.output, "
|
|
794
|
+
const outputPath = path.join(request.paths.output, "candidate-summary.md");
|
|
747
795
|
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
|
748
796
|
await fs.writeFile(outputPath, agentResult.output);
|
|
749
797
|
const trace = {
|
|
750
|
-
path: `.workbench/traces/${workload.job.id}/
|
|
798
|
+
path: `.workbench/traces/${workload.job.id}/candidate.json`,
|
|
751
799
|
kind: "text",
|
|
752
800
|
encoding: "utf8",
|
|
753
801
|
executable: false,
|
|
754
802
|
content: `${JSON.stringify({
|
|
755
|
-
kind: "
|
|
756
|
-
provider:
|
|
757
|
-
|
|
803
|
+
kind: "agent_candidate",
|
|
804
|
+
provider: candidate.agent.use,
|
|
805
|
+
candidateId: workload.candidateId,
|
|
758
806
|
attemptIndex: workload.attemptIndex,
|
|
759
807
|
sampleIndex: workload.sampleIndex,
|
|
760
808
|
summary: agentResult.output,
|
|
@@ -766,25 +814,25 @@ async function writeAgentSubjectOutput(request, workload, subject, options = {})
|
|
|
766
814
|
const usage = runtime.assignUsageRole("runner", agentResult.usage);
|
|
767
815
|
await writeWorkbenchAdapterOperationResult(request.paths.output, {
|
|
768
816
|
protocol: "workbench.adapter-result.v1",
|
|
769
|
-
operation: "
|
|
817
|
+
operation: "candidate.run",
|
|
770
818
|
ok: true,
|
|
771
819
|
...(agentResult.output ? { summary: agentResult.output } : {}),
|
|
772
820
|
feedback: {
|
|
773
|
-
|
|
774
|
-
agent:
|
|
821
|
+
candidate: "agent",
|
|
822
|
+
agent: candidate.agent.use,
|
|
775
823
|
metadata: agentResult.metadata,
|
|
776
824
|
},
|
|
777
825
|
...(usage ? { usage } : {}),
|
|
778
826
|
});
|
|
779
827
|
}
|
|
780
|
-
function
|
|
828
|
+
function buildAgentCandidatePrompt(workload, candidate) {
|
|
781
829
|
return [
|
|
782
|
-
...(
|
|
830
|
+
...(candidate.instructions ? ["Instructions:", candidate.instructions, ""] : []),
|
|
783
831
|
"Context:",
|
|
784
|
-
"-
|
|
785
|
-
"- Follow any
|
|
832
|
+
"- Candidate source files are mounted at /workspace/input/candidate.",
|
|
833
|
+
"- Follow any candidate guidance, skill files, scripts, or configuration under /workspace/input/candidate.",
|
|
786
834
|
"- The mutable working directory is /workspace.",
|
|
787
|
-
"- If the
|
|
835
|
+
"- If the candidate declares prepare.command, it has already run and may have copied files into /workspace.",
|
|
788
836
|
...(workload.case?.prompt ? ["Case:", workload.case.prompt, ""] : []),
|
|
789
837
|
"- Public case files are mounted at /workspace/input/case.",
|
|
790
838
|
"- Verifier tests are not present while you run.",
|
|
@@ -792,87 +840,89 @@ function buildAgentSubjectPrompt(workload, subject) {
|
|
|
792
840
|
"- You may write inspection artifacts under /workspace/output.",
|
|
793
841
|
].join("\n");
|
|
794
842
|
}
|
|
795
|
-
async function
|
|
796
|
-
if (request.operation !== "
|
|
797
|
-
throw new Error("Agent
|
|
843
|
+
async function writeAgentCandidateRevisionOutput(request, workload, improver, options) {
|
|
844
|
+
if (request.operation !== "candidate.improve") {
|
|
845
|
+
throw new Error("Agent improve results can only complete candidate.improve operations.");
|
|
798
846
|
}
|
|
799
|
-
const
|
|
800
|
-
const
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
847
|
+
const before = await snapshotEditableCandidateWorkspace(request);
|
|
848
|
+
const traceRoot = path.join(request.paths.output, ".workbench", "internal", "agent-improver");
|
|
849
|
+
try {
|
|
850
|
+
const agentResult = await executeBuiltInAgentTurn(options.agentExecutor, {
|
|
851
|
+
role: "improver",
|
|
852
|
+
provider: improver.agent,
|
|
853
|
+
adapterAuthRoot: options.adapterAuthRoot,
|
|
854
|
+
adapterAuthRequest: options.adapterAuthRequest,
|
|
855
|
+
adapterAuthEnv: options.adapterAuthEnv,
|
|
856
|
+
workspaceRoot: request.paths.workspace,
|
|
857
|
+
cwd: request.paths.workspace,
|
|
858
|
+
prompt: buildAgentImproverPrompt(workload),
|
|
859
|
+
traceRoot,
|
|
860
|
+
jobId: workload.job.id,
|
|
861
|
+
});
|
|
862
|
+
const candidatePatch = await createCandidatePatchFromWorkspace({
|
|
863
|
+
beforeRoot: before.root,
|
|
864
|
+
afterRoot: request.paths.workspace,
|
|
865
|
+
edits: workload.improve.edits,
|
|
866
|
+
});
|
|
867
|
+
const changedCandidatePaths = candidatePatch.fileChanges.filter((filePath) => isCandidateEditPath(filePath, workload.improve.edits));
|
|
868
|
+
if (changedCandidatePaths.length === 0) {
|
|
869
|
+
throw new Error("Agent improve adapter completed without changing a candidate file covered by improve edits.");
|
|
870
|
+
}
|
|
871
|
+
const trace = {
|
|
872
|
+
path: `.workbench/traces/${workload.job.id}/improver.json`,
|
|
873
|
+
kind: "text",
|
|
874
|
+
encoding: "utf8",
|
|
875
|
+
executable: false,
|
|
876
|
+
content: `${JSON.stringify({
|
|
877
|
+
kind: "agent_improver",
|
|
878
|
+
provider: improver.agent.use,
|
|
879
|
+
candidateId: workload.candidateId,
|
|
880
|
+
attemptIndex: workload.attemptIndex,
|
|
881
|
+
changedPaths: changedCandidatePaths,
|
|
882
|
+
summary: agentResult.output,
|
|
883
|
+
metadata: agentResult.metadata,
|
|
884
|
+
}, null, 2)}\n`,
|
|
885
|
+
};
|
|
886
|
+
await writeSurfaceFiles(request.paths.output, [trace, ...agentResult.traceFiles]);
|
|
887
|
+
const runtime = await importWorkbenchRuntime();
|
|
888
|
+
const usage = runtime.assignUsageRole("improver", agentResult.usage);
|
|
889
|
+
await writeWorkbenchAdapterOperationResult(request.paths.output, {
|
|
890
|
+
protocol: "workbench.adapter-result.v1",
|
|
891
|
+
operation: "candidate.improve",
|
|
892
|
+
ok: true,
|
|
893
|
+
value: {
|
|
894
|
+
...candidatePatch,
|
|
895
|
+
fileChanges: changedCandidatePaths,
|
|
896
|
+
},
|
|
897
|
+
...(agentResult.output ? { summary: agentResult.output } : {}),
|
|
898
|
+
feedback: {
|
|
899
|
+
improver: improver.agent.use,
|
|
900
|
+
changedPaths: changedCandidatePaths,
|
|
901
|
+
metadata: agentResult.metadata,
|
|
902
|
+
},
|
|
903
|
+
...(usage ? { usage } : {}),
|
|
904
|
+
});
|
|
905
|
+
}
|
|
906
|
+
finally {
|
|
907
|
+
await before.cleanup();
|
|
820
908
|
}
|
|
821
|
-
const trace = {
|
|
822
|
-
path: `.workbench/traces/${workload.job.id}/optimizer.json`,
|
|
823
|
-
kind: "text",
|
|
824
|
-
encoding: "utf8",
|
|
825
|
-
executable: false,
|
|
826
|
-
content: `${JSON.stringify({
|
|
827
|
-
kind: "agent_optimizer",
|
|
828
|
-
provider: optimizer.agent.use,
|
|
829
|
-
subjectId: workload.subjectId,
|
|
830
|
-
attemptIndex: workload.attemptIndex,
|
|
831
|
-
changedPaths: changedSubjectPaths,
|
|
832
|
-
summary: agentResult.output,
|
|
833
|
-
metadata: agentResult.metadata,
|
|
834
|
-
}, null, 2)}\n`,
|
|
835
|
-
};
|
|
836
|
-
await writeSurfaceFiles(request.paths.output, [trace, ...agentResult.traceFiles]);
|
|
837
|
-
const runtime = await importWorkbenchRuntime();
|
|
838
|
-
const usage = runtime.assignUsageRole("optimizer", agentResult.usage);
|
|
839
|
-
await writeWorkbenchAdapterOperationResult(request.paths.output, {
|
|
840
|
-
protocol: "workbench.adapter-result.v1",
|
|
841
|
-
operation: "optimizer.improve",
|
|
842
|
-
ok: true,
|
|
843
|
-
value: {
|
|
844
|
-
...subjectPatch,
|
|
845
|
-
fileChanges: changedSubjectPaths,
|
|
846
|
-
},
|
|
847
|
-
...(agentResult.output ? { summary: agentResult.output } : {}),
|
|
848
|
-
feedback: {
|
|
849
|
-
optimizer: optimizer.agent.use,
|
|
850
|
-
changedPaths: changedSubjectPaths,
|
|
851
|
-
metadata: agentResult.metadata,
|
|
852
|
-
},
|
|
853
|
-
...(usage ? { usage } : {}),
|
|
854
|
-
});
|
|
855
909
|
}
|
|
856
|
-
function
|
|
910
|
+
function buildAgentImproverPrompt(workload) {
|
|
857
911
|
return [
|
|
858
912
|
"Benchmark:",
|
|
859
913
|
workload.benchmark.description || workload.benchmark.name,
|
|
860
914
|
"",
|
|
861
|
-
"
|
|
862
|
-
"- Subject source files are mounted at /workspace/input/subject.",
|
|
863
|
-
"- Follow any subject guidance, skill files, scripts, or configuration under /workspace/input/subject.",
|
|
864
|
-
"- The mutable working directory is /workspace.",
|
|
865
|
-
"- If the subject declares prepare.command, it has already run and may have copied files into /workspace.",
|
|
866
|
-
"- Prior run traces are mounted at /workspace/input/traces.",
|
|
867
|
-
"- Use /workspace/input/traces as the source of truth for what happened in prior attempts.",
|
|
868
|
-
"- Do not mutate /workspace/input.",
|
|
915
|
+
"Improve the candidate for this benchmark.",
|
|
869
916
|
"",
|
|
870
|
-
"
|
|
871
|
-
|
|
917
|
+
"Candidate files are in the current directory.",
|
|
918
|
+
"Prior adapter executions are in /workspace/input/traces.",
|
|
872
919
|
"",
|
|
873
|
-
"
|
|
874
|
-
|
|
875
|
-
"
|
|
920
|
+
"Editable paths:",
|
|
921
|
+
workload.improve.edits.map((entry) => `- ${entry}`).join("\n"),
|
|
922
|
+
"",
|
|
923
|
+
"Rules:",
|
|
924
|
+
"- Modify only editable paths.",
|
|
925
|
+
"- Change at least one editable file.",
|
|
876
926
|
].join("\n");
|
|
877
927
|
}
|
|
878
928
|
async function writeRubricJudgeResult(request, workload, engine, options = {}) {
|
|
@@ -928,9 +978,9 @@ async function writeRubricEvidenceFiles(args) {
|
|
|
928
978
|
const root = `.workbench/traces/${args.workload.job.id}/engine/rubric`;
|
|
929
979
|
const scorecard = {
|
|
930
980
|
schema: "workbench.engine.rubric.evidence.v1",
|
|
931
|
-
|
|
981
|
+
safeForImprover: true,
|
|
932
982
|
jobId: args.workload.job.id,
|
|
933
|
-
|
|
983
|
+
candidateId: args.workload.candidateId,
|
|
934
984
|
attemptIndex: args.workload.attemptIndex,
|
|
935
985
|
sampleIndex: args.workload.sampleIndex,
|
|
936
986
|
caseId: args.workload.caseId,
|
|
@@ -957,7 +1007,7 @@ async function writeRubricEvidenceFiles(args) {
|
|
|
957
1007
|
jsonSurfaceFile(`${root}/scorecard.json`, scorecard),
|
|
958
1008
|
...args.criterionRuns.map((run) => jsonSurfaceFile(`${root}/criteria/${safeInternalPathSegment(run.result.criterion_id)}/result.json`, {
|
|
959
1009
|
schema: "workbench.engine.rubric.criterion-evidence.v1",
|
|
960
|
-
|
|
1010
|
+
safeForImprover: true,
|
|
961
1011
|
criterion: args.engine.criteria.find((criterion) => criterion.id === run.result.criterion_id) ?? {
|
|
962
1012
|
id: run.result.criterion_id,
|
|
963
1013
|
},
|
|
@@ -1079,8 +1129,8 @@ function buildRubricCriterionJudgePrompt(workload, engine, criterion) {
|
|
|
1079
1129
|
JSON.stringify(criterion, null, 2),
|
|
1080
1130
|
"",
|
|
1081
1131
|
"Context:",
|
|
1082
|
-
"- The
|
|
1083
|
-
"-
|
|
1132
|
+
"- The candidate already ran in this same working directory.",
|
|
1133
|
+
"- Candidate outputs are available in the current working directory.",
|
|
1084
1134
|
"- Public case files are mounted at /workspace/input/case.",
|
|
1085
1135
|
"- Verifier-private files are mounted at /workspace/private/engine when the task provides them.",
|
|
1086
1136
|
"- Score only from the current working directory, public case files, verifier-private files, and the criterion above.",
|
|
@@ -1276,12 +1326,12 @@ function requireWorkloadTask(workload, label) {
|
|
|
1276
1326
|
throw new Error(`${label} workload is missing case text.`);
|
|
1277
1327
|
}
|
|
1278
1328
|
}
|
|
1279
|
-
async function
|
|
1329
|
+
async function createCandidatePatchFromWorkspace(args) {
|
|
1280
1330
|
const before = new Map((await readSurfaceFilesRecursive(args.beforeRoot))
|
|
1281
1331
|
.map((file) => [normalizeRelativePath(file.path), file]));
|
|
1282
1332
|
const changedFiles = (await readSurfaceFilesRecursive(args.afterRoot))
|
|
1283
1333
|
.map((file) => ({ ...file, path: normalizeRelativePath(file.path) }))
|
|
1284
|
-
.filter((file) =>
|
|
1334
|
+
.filter((file) => isCandidateEditPath(file.path, args.edits) &&
|
|
1285
1335
|
!isRuntimeWorkspacePath(file.path) &&
|
|
1286
1336
|
!sameSurfaceFile(before.get(file.path), file))
|
|
1287
1337
|
.sort((left, right) => left.path.localeCompare(right.path));
|
|
@@ -1321,7 +1371,7 @@ async function writeSurfaceFiles(root, files) {
|
|
|
1321
1371
|
}
|
|
1322
1372
|
}
|
|
1323
1373
|
}
|
|
1324
|
-
function
|
|
1374
|
+
function isCandidateEditPath(filePath, edits) {
|
|
1325
1375
|
const normalized = normalizeRelativePath(filePath);
|
|
1326
1376
|
return edits.some((entry) => {
|
|
1327
1377
|
const editPath = normalizeRelativePath(entry).replace(/\/+$/u, "");
|
package/dist/manifests.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { adapterSlot, defineAdapter, defineEngineResolver,
|
|
1
|
+
import { adapterSlot, defineAdapter, defineEngineResolver, defineCandidate, defineImprover, defineEngineRunner, workbenchAdapterManifestFromDefinition, } from "@workbench-ai/workbench-protocol";
|
|
2
2
|
const BUILT_IN_ADAPTER_MANIFESTS = Object.fromEntries(Object.entries({
|
|
3
3
|
workbench: defineAdapter({
|
|
4
4
|
id: "workbench",
|
|
@@ -10,8 +10,8 @@ const BUILT_IN_ADAPTER_MANIFESTS = Object.fromEntries(Object.entries({
|
|
|
10
10
|
}),
|
|
11
11
|
codex: defineAdapter({
|
|
12
12
|
id: "codex",
|
|
13
|
-
|
|
14
|
-
improve:
|
|
13
|
+
candidate: defineCandidate(),
|
|
14
|
+
improve: defineImprover(),
|
|
15
15
|
setup: [
|
|
16
16
|
"npm install --global @openai/codex@0.125.0",
|
|
17
17
|
],
|
|
@@ -24,8 +24,8 @@ const BUILT_IN_ADAPTER_MANIFESTS = Object.fromEntries(Object.entries({
|
|
|
24
24
|
}),
|
|
25
25
|
claude: defineAdapter({
|
|
26
26
|
id: "claude",
|
|
27
|
-
|
|
28
|
-
improve:
|
|
27
|
+
candidate: defineCandidate(),
|
|
28
|
+
improve: defineImprover(),
|
|
29
29
|
setup: [
|
|
30
30
|
"npm install --global @anthropic-ai/claude-code@2.1.119",
|
|
31
31
|
],
|
|
@@ -57,15 +57,15 @@ const BUILT_IN_ADAPTER_MANIFESTS = Object.fromEntries(Object.entries({
|
|
|
57
57
|
}),
|
|
58
58
|
command: defineAdapter({
|
|
59
59
|
id: "command",
|
|
60
|
-
|
|
60
|
+
candidate: defineCandidate(),
|
|
61
61
|
engineRun: defineEngineRunner(),
|
|
62
|
-
improve:
|
|
62
|
+
improve: defineImprover(),
|
|
63
63
|
}),
|
|
64
64
|
rubric: defineAdapter({
|
|
65
65
|
id: "rubric",
|
|
66
66
|
engineRun: defineEngineRunner(),
|
|
67
67
|
slots: {
|
|
68
|
-
judge: adapterSlot("/judge", "
|
|
68
|
+
judge: adapterSlot("/judge", "candidate.run"),
|
|
69
69
|
},
|
|
70
70
|
}),
|
|
71
71
|
tests: defineAdapter({
|
package/dist/runtime.js
CHANGED
|
@@ -12,11 +12,11 @@ export async function importWorkbenchRuntime() {
|
|
|
12
12
|
return await runtimeModule;
|
|
13
13
|
}
|
|
14
14
|
async function importWorkbenchRuntimeUncached() {
|
|
15
|
-
const
|
|
15
|
+
const candidates = runtimeImportCandidates();
|
|
16
16
|
let lastError;
|
|
17
|
-
for (const
|
|
17
|
+
for (const candidate of candidates) {
|
|
18
18
|
try {
|
|
19
|
-
return await import(__rewriteRelativeImportExtension(
|
|
19
|
+
return await import(__rewriteRelativeImportExtension(candidate));
|
|
20
20
|
}
|
|
21
21
|
catch (error) {
|
|
22
22
|
lastError = error;
|
|
@@ -24,11 +24,11 @@ async function importWorkbenchRuntimeUncached() {
|
|
|
24
24
|
}
|
|
25
25
|
throw new Error(`Unable to load @workbench-ai/workbench-core for built-in adapters: ${lastError instanceof Error ? lastError.message : String(lastError)}`);
|
|
26
26
|
}
|
|
27
|
-
function
|
|
27
|
+
function runtimeImportCandidates() {
|
|
28
28
|
return [
|
|
29
29
|
process.env.WORKBENCH_RUNTIME_IMPORT,
|
|
30
30
|
"/app/products/workbench/packages/core/src/index.ts",
|
|
31
31
|
new URL("../../core/src/index.ts", import.meta.url).href,
|
|
32
32
|
"@workbench-ai/workbench-core",
|
|
33
|
-
].filter((
|
|
33
|
+
].filter((candidate) => typeof candidate === "string" && candidate.length > 0);
|
|
34
34
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@workbench-ai/workbench-built-in-adapters",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.50",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -33,11 +33,11 @@
|
|
|
33
33
|
"dependencies": {
|
|
34
34
|
"yaml": "^2.8.2",
|
|
35
35
|
"@workbench-ai/agent-driver-anthropic-claude-code": "0.0.45",
|
|
36
|
-
"@workbench-ai/agent-driver": "0.0.45",
|
|
37
36
|
"@workbench-ai/agent-driver-openai-codex": "0.0.45",
|
|
38
|
-
"@workbench-ai/
|
|
39
|
-
"@workbench-ai/workbench-
|
|
40
|
-
"@workbench-ai/workbench-
|
|
37
|
+
"@workbench-ai/agent-driver": "0.0.45",
|
|
38
|
+
"@workbench-ai/workbench-contract": "0.0.50",
|
|
39
|
+
"@workbench-ai/workbench-protocol": "0.0.50",
|
|
40
|
+
"@workbench-ai/workbench-core": "0.0.50"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
43
|
"@types/node": "^24.3.1",
|