@workbench-ai/workbench-built-in-adapters 0.0.49 → 0.0.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-turn.d.ts +8 -1
- package/dist/agent-turn.d.ts.map +1 -1
- package/dist/agent-turn.js +21 -6
- package/dist/execute.d.ts.map +1 -1
- package/dist/execute.js +204 -150
- package/dist/manifests.js +8 -8
- package/dist/runtime.js +5 -5
- package/package.json +6 -6
package/dist/agent-turn.d.ts
CHANGED
|
@@ -7,7 +7,7 @@ export interface AgentProviderSpec {
|
|
|
7
7
|
effort?: string;
|
|
8
8
|
}
|
|
9
9
|
export interface WorkbenchAgentTurnRequest {
|
|
10
|
-
role: "
|
|
10
|
+
role: "improver" | "runner" | "engine";
|
|
11
11
|
provider: AgentProviderSpec;
|
|
12
12
|
adapterAuthRoot?: string;
|
|
13
13
|
adapterAuthRequest?: JsonValue;
|
|
@@ -29,4 +29,11 @@ export interface WorkbenchAgentTurnResult {
|
|
|
29
29
|
export type WorkbenchAgentTurnExecutor = (request: WorkbenchAgentTurnRequest) => Promise<WorkbenchAgentTurnResult>;
|
|
30
30
|
export declare function executeWorkbenchAgentTurn(executor: (request: WorkbenchAgentTurnRequest) => Promise<WorkbenchAgentTurnResult>, request: WorkbenchAgentTurnRequest): Promise<WorkbenchAgentTurnResult>;
|
|
31
31
|
export declare function defaultWorkbenchAgentTurnExecutor(request: WorkbenchAgentTurnRequest): Promise<WorkbenchAgentTurnResult>;
|
|
32
|
+
export declare function resolveAgentTurnTimeouts(defaults: {
|
|
33
|
+
turn_timeout_ms?: number;
|
|
34
|
+
stall_timeout_ms?: number;
|
|
35
|
+
}): {
|
|
36
|
+
turnTimeoutMs: number;
|
|
37
|
+
stallTimeoutMs: number;
|
|
38
|
+
};
|
|
32
39
|
//# sourceMappingURL=agent-turn.d.ts.map
|
package/dist/agent-turn.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent-turn.d.ts","sourceRoot":"","sources":["../src/agent-turn.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAEV,mBAAmB,EACnB,YAAY,EACb,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAQL,KAAK,SAAS,EAEf,MAAM,4BAA4B,CAAC;AACpC,OAAO,KAAK,EACV,gCAAgC,EACjC,MAAM,8BAA8B,CAAC;
|
|
1
|
+
{"version":3,"file":"agent-turn.d.ts","sourceRoot":"","sources":["../src/agent-turn.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAEV,mBAAmB,EACnB,YAAY,EACb,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAQL,KAAK,SAAS,EAEf,MAAM,4BAA4B,CAAC;AACpC,OAAO,KAAK,EACV,gCAAgC,EACjC,MAAM,8BAA8B,CAAC;AAiBtC,MAAM,WAAW,iBAAiB;IAChC,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,yBAAyB;IACxC,IAAI,EAAE,UAAU,GAAG,QAAQ,GAAG,QAAQ,CAAC;IACvC,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,kBAAkB,CAAC,EAAE,SAAS,CAAC;IAC/B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACxC,aAAa,EAAE,MAAM,CAAC;IACtB,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,cAAc,CAAC,EAAE,gCAAgC,CAAC;CACnD;AAED,MAAM,WAAW,wBAAwB;IACvC,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,mBAAmB,EAAE,CAAC;IAClC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IACpC,KAAK,CAAC,EAAE,YAAY,CAAC;CACtB;AAED,MAAM,MAAM,0BAA0B,GAAG,CAAC,OAAO,EAAE,yBAAyB,KAAK,OAAO,CAAC,wBAAwB,CAAC,CAAC;AA4BnH,wBAAsB,yBAAyB,CAC7C,QAAQ,EAAE,CAAC,OAAO,EAAE,yBAAyB,KAAK,OAAO,CAAC,wBAAwB,CAAC,EACnF,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,wBAAwB,CAAC,CAenC;AAED,wBAAsB,iCAAiC,CACrD,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,wBAAwB,CAAC,CAwFnC;AAkHD,wBAAgB,wBAAwB,CAAC,QAAQ,EAAE;IACjD,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B,GAAG;IACF,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAQA"}
|
package/dist/agent-turn.js
CHANGED
|
@@ -8,6 +8,8 @@ import { importWorkbenchRuntime } from "./runtime.js";
|
|
|
8
8
|
const DEFAULT_AGENT_TURN_MAX_ATTEMPTS = 3;
|
|
9
9
|
const DEFAULT_AGENT_TURN_RETRY_BASE_MS = 5_000;
|
|
10
10
|
const DEFAULT_AGENT_TURN_RETRY_MAX_MS = 30_000;
|
|
11
|
+
const DEFAULT_AGENT_TURN_TIMEOUT_MS = 3_600_000;
|
|
12
|
+
const DEFAULT_AGENT_STALL_TIMEOUT_MS = 300_000;
|
|
11
13
|
const AGENT_PROVIDER_REGISTRY = {
|
|
12
14
|
codex: {
|
|
13
15
|
executable: "codex",
|
|
@@ -201,14 +203,14 @@ function agentProviderRegistration(providerName) {
|
|
|
201
203
|
return registration;
|
|
202
204
|
}
|
|
203
205
|
async function buildAgentExecutionPlan(provider, providerSpec, workspaceRoot, agentHome, adapterAuth) {
|
|
204
|
-
const turnTimeoutMs = provider.manifest.defaults
|
|
206
|
+
const { turnTimeoutMs, stallTimeoutMs } = resolveAgentTurnTimeouts(provider.manifest.defaults);
|
|
205
207
|
const harness = {
|
|
206
208
|
id: provider.manifest.id,
|
|
207
209
|
auth: await resolveAgentAuth(provider, providerSpec, workspaceRoot, agentHome, adapterAuth),
|
|
208
210
|
...(firstNonEmpty(providerSpec.model, provider.manifest.defaults.model) ? { model: firstNonEmpty(providerSpec.model, provider.manifest.defaults.model) } : {}),
|
|
209
211
|
...(firstNonEmpty(providerSpec.effort, provider.manifest.defaults.effort) ? { effort: firstNonEmpty(providerSpec.effort, provider.manifest.defaults.effort) } : {}),
|
|
210
212
|
turn_timeout_ms: turnTimeoutMs,
|
|
211
|
-
stall_timeout_ms:
|
|
213
|
+
stall_timeout_ms: stallTimeoutMs,
|
|
212
214
|
config: resolveAgentConfig(provider, defaultWorkbenchAgentConfig(provider, providerSpec.use)),
|
|
213
215
|
retry: DEFAULT_HARNESS_RETRY,
|
|
214
216
|
cancel: DEFAULT_HARNESS_CANCEL,
|
|
@@ -221,6 +223,19 @@ async function buildAgentExecutionPlan(provider, providerSpec, workspaceRoot, ag
|
|
|
221
223
|
harness,
|
|
222
224
|
};
|
|
223
225
|
}
|
|
226
|
+
export function resolveAgentTurnTimeouts(defaults) {
|
|
227
|
+
const turnTimeoutMs = positiveTimeoutMs(defaults.turn_timeout_ms) ?? DEFAULT_AGENT_TURN_TIMEOUT_MS;
|
|
228
|
+
const requestedStallTimeoutMs = positiveTimeoutMs(defaults.stall_timeout_ms) ?? DEFAULT_AGENT_STALL_TIMEOUT_MS;
|
|
229
|
+
return {
|
|
230
|
+
turnTimeoutMs,
|
|
231
|
+
stallTimeoutMs: Math.min(requestedStallTimeoutMs, turnTimeoutMs),
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
function positiveTimeoutMs(value) {
|
|
235
|
+
return typeof value === "number" && Number.isFinite(value) && value > 0
|
|
236
|
+
? value
|
|
237
|
+
: null;
|
|
238
|
+
}
|
|
224
239
|
function defaultWorkbenchAgentConfig(provider, providerName) {
|
|
225
240
|
const fallback = (provider.manifest.defaults.config ?? {});
|
|
226
241
|
return {
|
|
@@ -229,9 +244,9 @@ function defaultWorkbenchAgentConfig(provider, providerName) {
|
|
|
229
244
|
};
|
|
230
245
|
}
|
|
231
246
|
async function resolveAgentAuth(provider, providerSpec, workspaceRoot, agentHome, adapterAuth) {
|
|
232
|
-
const
|
|
247
|
+
const candidate = adapterAuthProviderCandidate(adapterAuth.request, providerSpec.use) ??
|
|
233
248
|
(provider.manifest.defaults.auth ?? {});
|
|
234
|
-
const parsed = provider.schemas.auth.safeParse(
|
|
249
|
+
const parsed = provider.schemas.auth.safeParse(candidate);
|
|
235
250
|
if (!parsed.success) {
|
|
236
251
|
throw new Error(`Agent provider "${provider.manifest.id}" auth is invalid: ${formatValidationIssues(parsed.error.issues)}`);
|
|
237
252
|
}
|
|
@@ -239,7 +254,7 @@ async function resolveAgentAuth(provider, providerSpec, workspaceRoot, agentHome
|
|
|
239
254
|
void agentHome;
|
|
240
255
|
return { ...parsed.data };
|
|
241
256
|
}
|
|
242
|
-
function
|
|
257
|
+
function adapterAuthProviderCandidate(auth, providerName) {
|
|
243
258
|
const record = jsonRecord(auth);
|
|
244
259
|
const self = jsonRecord(record?.self);
|
|
245
260
|
const adapters = jsonRecord(record?.adapters);
|
|
@@ -338,7 +353,7 @@ function isTransientAgentTurnError(error) {
|
|
|
338
353
|
if (isNativeCaCertificateFailure(message)) {
|
|
339
354
|
return false;
|
|
340
355
|
}
|
|
341
|
-
return /\b(fetch failed|error sending request|stream disconnected before completion|ECONNRESET|ETIMEDOUT|EAI_AGAIN|ENETUNREACH|ECONNREFUSED|socket hang up|network error|UND_ERR_|signal SIGTERM)/iu.test(message);
|
|
356
|
+
return /\b(fetch failed|error sending request|stream disconnected before completion|turn stalled after \d+ms|ECONNRESET|ETIMEDOUT|EAI_AGAIN|ENETUNREACH|ECONNREFUSED|socket hang up|network error|UND_ERR_|signal SIGTERM)/iu.test(message);
|
|
342
357
|
}
|
|
343
358
|
function isNativeCaCertificateFailure(message) {
|
|
344
359
|
return /\bno native root CA certificates found\b|install ca-certificates/iu.test(message);
|
package/dist/execute.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"execute.d.ts","sourceRoot":"","sources":["../src/execute.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"execute.d.ts","sourceRoot":"","sources":["../src/execute.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAEV,IAAI,EAKL,MAAM,kCAAkC,CAAC;AAc1C,OAAO,KAAK,EAEV,0BAA0B,EAG3B,MAAM,iBAAiB,CAAC;AAQzB,MAAM,WAAW,4CAA4C;IAC3D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,0BAA0B,CAAC;IAC3C,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,kBAAkB,CAAC,EAAE,IAAI,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;AA4CD,wBAAsB,qCAAqC,CACzD,IAAI,GAAE,4CAAiD,GACtD,OAAO,CAAC,IAAI,CAAC,CAiEf"}
|
package/dist/execute.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { spawn } from "node:child_process";
|
|
2
2
|
import { promises as fs } from "node:fs";
|
|
3
|
+
import os from "node:os";
|
|
3
4
|
import path from "node:path";
|
|
4
5
|
import { ensureWorkbenchAdapterOutputDir, readWorkbenchAdapterOperationResult, readWorkbenchAdapterOperationRequest, runWorkbenchRuntimeOperationSequence, writeWorkbenchAdapterOperationResult, workbenchAdapterOperationResultPath, } from "@workbench-ai/workbench-protocol";
|
|
5
6
|
import YAML from "yaml";
|
|
@@ -47,8 +48,8 @@ export async function executeWorkbenchBuiltInAdapterCommand(args = {}) {
|
|
|
47
48
|
if (isBuiltInAgentAdapterId(adapterId)) {
|
|
48
49
|
const workload = workloadFromAdapterOperationRequest(request);
|
|
49
50
|
const agent = builtInAgentSpecFromRequest(request);
|
|
50
|
-
if (request.operation === "
|
|
51
|
-
await
|
|
51
|
+
if (request.operation === "candidate.improve") {
|
|
52
|
+
await writeAgentCandidateRevisionOutput(request, workload, agent, {
|
|
52
53
|
agentExecutor: args.agentExecutor,
|
|
53
54
|
adapterAuthRoot: args.adapterAuthRoot,
|
|
54
55
|
adapterAuthRequest: args.adapterAuthRequest ?? request.auth,
|
|
@@ -56,8 +57,8 @@ export async function executeWorkbenchBuiltInAdapterCommand(args = {}) {
|
|
|
56
57
|
});
|
|
57
58
|
return;
|
|
58
59
|
}
|
|
59
|
-
if (request.operation === "
|
|
60
|
-
await
|
|
60
|
+
if (request.operation === "candidate.run") {
|
|
61
|
+
await writeAgentCandidateOutput(request, workload, agent, {
|
|
61
62
|
agentExecutor: args.agentExecutor,
|
|
62
63
|
adapterAuthRoot: args.adapterAuthRoot,
|
|
63
64
|
adapterAuthRequest: args.adapterAuthRequest ?? request.auth,
|
|
@@ -123,7 +124,7 @@ async function workbenchEngineOutcomeUsage(outcome) {
|
|
|
123
124
|
const operationUsage = outcome.usage
|
|
124
125
|
? undefined
|
|
125
126
|
: runtime.mergeUsageSummaries(outcome.operationResults.map((result) => {
|
|
126
|
-
if (result.operation === "
|
|
127
|
+
if (result.operation === "candidate.run") {
|
|
127
128
|
return runtime.assignUsageRole("runner", result.usage);
|
|
128
129
|
}
|
|
129
130
|
if (result.operation === "engine.run") {
|
|
@@ -163,16 +164,16 @@ function workbenchEngineScoreInvocation(request) {
|
|
|
163
164
|
: adapterCommandName(score.use),
|
|
164
165
|
};
|
|
165
166
|
}
|
|
166
|
-
function
|
|
167
|
-
const
|
|
168
|
-
if (!
|
|
169
|
-
throw new Error("Workbench engine requires context.
|
|
167
|
+
function workbenchEngineCandidateInvocation(request) {
|
|
168
|
+
const candidate = request.context?.candidate?.run;
|
|
169
|
+
if (!candidate?.use || !candidate.command) {
|
|
170
|
+
throw new Error("Workbench engine requires context.candidate.run.use and context.candidate.run.command.");
|
|
170
171
|
}
|
|
171
172
|
return {
|
|
172
|
-
use:
|
|
173
|
-
with: (
|
|
174
|
-
...(
|
|
175
|
-
command:
|
|
173
|
+
use: candidate.use,
|
|
174
|
+
with: (candidate.with ?? {}),
|
|
175
|
+
...(candidate.auth !== undefined ? { auth: candidate.auth } : {}),
|
|
176
|
+
command: candidate.command,
|
|
176
177
|
};
|
|
177
178
|
}
|
|
178
179
|
function workbenchEngineGradingIsolation(request) {
|
|
@@ -188,13 +189,13 @@ function workbenchEngineGradingIsolation(request) {
|
|
|
188
189
|
}
|
|
189
190
|
async function runWorkbenchEngineSharedGrading(request) {
|
|
190
191
|
const inputs = await workbenchEngineRuntimeInputs(request);
|
|
191
|
-
const
|
|
192
|
+
const candidate = workbenchEngineCandidateInvocation(request);
|
|
192
193
|
const score = workbenchEngineScoreInvocation(request);
|
|
193
194
|
const result = await runWorkbenchRuntimeOperationSequence({
|
|
194
195
|
inputs,
|
|
195
196
|
prepare: true,
|
|
196
197
|
operations: [
|
|
197
|
-
{ label: "
|
|
198
|
+
{ label: "candidate", operation: "candidate.run", invocation: candidate },
|
|
198
199
|
{ label: "score", operation: "engine.run", invocation: score },
|
|
199
200
|
],
|
|
200
201
|
});
|
|
@@ -203,25 +204,25 @@ async function runWorkbenchEngineSharedGrading(request) {
|
|
|
203
204
|
}
|
|
204
205
|
async function runWorkbenchEngineSeparateGrading(request) {
|
|
205
206
|
const inputs = await workbenchEngineRuntimeInputs(request);
|
|
206
|
-
const
|
|
207
|
+
const candidate = workbenchEngineCandidateInvocation(request);
|
|
207
208
|
const score = workbenchEngineScoreInvocation(request);
|
|
208
209
|
const runtime = await importWorkbenchRuntime();
|
|
209
210
|
const runner = await runWorkbenchRuntimeOperationSequence({
|
|
210
211
|
inputs: {
|
|
211
|
-
|
|
212
|
+
candidate: inputs.candidate,
|
|
212
213
|
case: inputs.case,
|
|
213
214
|
traces: inputs.traces,
|
|
214
215
|
},
|
|
215
216
|
prepare: true,
|
|
216
217
|
collectWorkspace: true,
|
|
217
218
|
operations: [
|
|
218
|
-
{ label: "
|
|
219
|
+
{ label: "candidate", operation: "candidate.run", invocation: candidate },
|
|
219
220
|
],
|
|
220
221
|
});
|
|
221
222
|
assertRuntimeControlResultOk(runner, "Workbench separate runner");
|
|
222
223
|
const grader = await runWorkbenchRuntimeOperationSequence({
|
|
223
224
|
inputs: {
|
|
224
|
-
|
|
225
|
+
candidate: inputs.candidate,
|
|
225
226
|
case: inputs.case,
|
|
226
227
|
enginePrivate: inputs.enginePrivate,
|
|
227
228
|
traces: inputs.traces,
|
|
@@ -243,14 +244,14 @@ async function runWorkbenchEngineSeparateGrading(request) {
|
|
|
243
244
|
};
|
|
244
245
|
}
|
|
245
246
|
async function workbenchEngineRuntimeInputs(request) {
|
|
246
|
-
const [
|
|
247
|
-
readOptionalSurfaceFiles(request.paths.
|
|
247
|
+
const [candidate, caseFiles, enginePrivate, traces] = await Promise.all([
|
|
248
|
+
readOptionalSurfaceFiles(request.paths.candidate),
|
|
248
249
|
readOptionalSurfaceFiles(request.paths.case),
|
|
249
250
|
readOptionalSurfaceFiles(request.paths.enginePrivate),
|
|
250
251
|
readOptionalSurfaceFiles(request.paths.traces),
|
|
251
252
|
]);
|
|
252
253
|
return {
|
|
253
|
-
|
|
254
|
+
candidate,
|
|
254
255
|
case: caseFiles,
|
|
255
256
|
enginePrivate,
|
|
256
257
|
traces,
|
|
@@ -309,12 +310,20 @@ function safeInternalPathSegment(value) {
|
|
|
309
310
|
}
|
|
310
311
|
async function executeCommandAdapterRequest(request) {
|
|
311
312
|
const command = requiredAdapterCommandString(request, "command");
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
313
|
+
const before = request.operation === "candidate.improve"
|
|
314
|
+
? await snapshotEditableCandidateWorkspace(request)
|
|
315
|
+
: null;
|
|
316
|
+
try {
|
|
317
|
+
await runAdapterShellCommand(command, request.paths.workspace);
|
|
318
|
+
if (request.operation === "engine.run") {
|
|
319
|
+
await requireCommandScoreResult(request);
|
|
320
|
+
return;
|
|
321
|
+
}
|
|
322
|
+
await writeOperationOkUnlessPresent(request, before?.root);
|
|
323
|
+
}
|
|
324
|
+
finally {
|
|
325
|
+
await before?.cleanup();
|
|
316
326
|
}
|
|
317
|
-
await writeOperationOkUnlessPresent(request);
|
|
318
327
|
}
|
|
319
328
|
async function requireCommandScoreResult(request) {
|
|
320
329
|
if (!await fileExists(workbenchAdapterOperationResultPath(request.paths.output))) {
|
|
@@ -379,15 +388,15 @@ async function runAdapterShellCommand(command, cwd, env = {}) {
|
|
|
379
388
|
});
|
|
380
389
|
});
|
|
381
390
|
}
|
|
382
|
-
async function writeOperationOkUnlessPresent(request) {
|
|
391
|
+
async function writeOperationOkUnlessPresent(request, beforeRoot) {
|
|
383
392
|
if (await fileExists(workbenchAdapterOperationResultPath(request.paths.output))) {
|
|
384
393
|
return;
|
|
385
394
|
}
|
|
386
|
-
if (request.operation === "
|
|
387
|
-
const patch = await
|
|
388
|
-
beforeRoot: requiredRequestPath(request.paths.
|
|
395
|
+
if (request.operation === "candidate.improve") {
|
|
396
|
+
const patch = await createCandidatePatchFromWorkspace({
|
|
397
|
+
beforeRoot: beforeRoot ?? requiredRequestPath(request.paths.candidate, "paths.candidate"),
|
|
389
398
|
afterRoot: request.paths.workspace,
|
|
390
|
-
edits: request.context?.
|
|
399
|
+
edits: request.context?.improve?.edits ?? [],
|
|
391
400
|
});
|
|
392
401
|
await writeWorkbenchAdapterOperationResult(request.paths.output, {
|
|
393
402
|
protocol: "workbench.adapter-result.v1",
|
|
@@ -403,6 +412,41 @@ async function writeOperationOkUnlessPresent(request) {
|
|
|
403
412
|
ok: true,
|
|
404
413
|
});
|
|
405
414
|
}
|
|
415
|
+
async function snapshotEditableCandidateWorkspace(request) {
|
|
416
|
+
const root = await fs.mkdtemp(path.join(os.tmpdir(), "workbench-candidate-before-"));
|
|
417
|
+
const edits = request.context?.improve?.edits ?? [];
|
|
418
|
+
const files = await readEditableCandidateWorkspaceFiles(request.paths.workspace, edits);
|
|
419
|
+
await writeSurfaceFiles(root, files);
|
|
420
|
+
return {
|
|
421
|
+
root,
|
|
422
|
+
cleanup: async () => {
|
|
423
|
+
await fs.rm(root, { recursive: true, force: true }).catch(() => undefined);
|
|
424
|
+
},
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
async function readEditableCandidateWorkspaceFiles(root, edits) {
|
|
428
|
+
const files = [];
|
|
429
|
+
for (const edit of edits) {
|
|
430
|
+
const normalized = normalizeRelativePath(edit);
|
|
431
|
+
if (!normalized || isRuntimeWorkspacePath(normalized)) {
|
|
432
|
+
continue;
|
|
433
|
+
}
|
|
434
|
+
const absolutePath = path.join(root, normalized);
|
|
435
|
+
const stat = await fs.stat(absolutePath).catch(() => null);
|
|
436
|
+
if (!stat) {
|
|
437
|
+
continue;
|
|
438
|
+
}
|
|
439
|
+
if (stat.isDirectory()) {
|
|
440
|
+
await readSurfaceFilesInto(root, normalized, files);
|
|
441
|
+
continue;
|
|
442
|
+
}
|
|
443
|
+
if (stat.isFile()) {
|
|
444
|
+
files.push(await readSurfaceFile(root, normalized));
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
return dedupeSurfaceFiles(files.filter((file) => isCandidateEditPath(file.path, edits) &&
|
|
448
|
+
!isRuntimeWorkspacePath(file.path)));
|
|
449
|
+
}
|
|
406
450
|
async function firstExistingFile(files) {
|
|
407
451
|
for (const file of files) {
|
|
408
452
|
const stat = await fs.stat(file).catch(() => null);
|
|
@@ -460,10 +504,13 @@ async function readWorkbenchEngineCase(args) {
|
|
|
460
504
|
throw new Error(`Task ${args.id} ${TASK_CONTROL_FILE} must include a task string.`);
|
|
461
505
|
}
|
|
462
506
|
const unsupportedTaskFields = Object.keys(taskRecord)
|
|
463
|
-
.filter((key) => !["version", "task", "files", "tests", "solution", "environment"].includes(key));
|
|
507
|
+
.filter((key) => !["version", "task", "split", "files", "tests", "solution", "environment"].includes(key));
|
|
464
508
|
if (unsupportedTaskFields.length > 0) {
|
|
465
509
|
throw new Error(`Task ${args.id} ${TASK_CONTROL_FILE} has unsupported field${unsupportedTaskFields.length === 1 ? "" : "s"}: ${unsupportedTaskFields.join(", ")}.`);
|
|
466
510
|
}
|
|
511
|
+
if (taskRecord.split !== undefined && (typeof taskRecord.split !== "string" || taskRecord.split.trim().length === 0)) {
|
|
512
|
+
throw new Error(`Task ${args.id} ${TASK_CONTROL_FILE} split must be a non-empty string when provided.`);
|
|
513
|
+
}
|
|
467
514
|
const publicPrefix = taskDirectoryPrefix(taskRecord.files, "files", args.id);
|
|
468
515
|
const testsPrefix = taskDirectoryPrefix(taskRecord.tests, "tests", args.id);
|
|
469
516
|
const solutionPrefix = taskDirectoryPrefix(taskRecord.solution, "solution", args.id);
|
|
@@ -483,6 +530,7 @@ async function readWorkbenchEngineCase(args) {
|
|
|
483
530
|
case: {
|
|
484
531
|
version: 3,
|
|
485
532
|
prompt: taskRecord.task,
|
|
533
|
+
...(typeof taskRecord.split === "string" ? { split: taskRecord.split.trim() } : {}),
|
|
486
534
|
...(taskRecord.environment !== undefined
|
|
487
535
|
? { environment: taskRecord.environment }
|
|
488
536
|
: {}),
|
|
@@ -539,21 +587,25 @@ async function readSurfaceFilesInto(root, relativeDir, result) {
|
|
|
539
587
|
if (!entry.isFile()) {
|
|
540
588
|
continue;
|
|
541
589
|
}
|
|
542
|
-
|
|
543
|
-
fs.readFile(absolutePath),
|
|
544
|
-
fs.stat(absolutePath),
|
|
545
|
-
]);
|
|
546
|
-
const text = body.toString("utf8");
|
|
547
|
-
const isUtf8 = Buffer.from(text, "utf8").equals(body);
|
|
548
|
-
result.push({
|
|
549
|
-
path: relativePath,
|
|
550
|
-
kind: isUtf8 ? "text" : "binary",
|
|
551
|
-
encoding: isUtf8 ? "utf8" : "base64",
|
|
552
|
-
content: isUtf8 ? text : body.toString("base64"),
|
|
553
|
-
executable: (stat.mode & 0o111) !== 0,
|
|
554
|
-
});
|
|
590
|
+
result.push(await readSurfaceFile(root, relativePath));
|
|
555
591
|
}
|
|
556
592
|
}
|
|
593
|
+
async function readSurfaceFile(root, relativePath) {
|
|
594
|
+
const absolutePath = path.join(root, normalizeRelativePath(relativePath));
|
|
595
|
+
const [body, stat] = await Promise.all([
|
|
596
|
+
fs.readFile(absolutePath),
|
|
597
|
+
fs.stat(absolutePath),
|
|
598
|
+
]);
|
|
599
|
+
const text = body.toString("utf8");
|
|
600
|
+
const isUtf8 = Buffer.from(text, "utf8").equals(body);
|
|
601
|
+
return {
|
|
602
|
+
path: normalizeRelativePath(relativePath),
|
|
603
|
+
kind: isUtf8 ? "text" : "binary",
|
|
604
|
+
encoding: isUtf8 ? "utf8" : "base64",
|
|
605
|
+
content: isUtf8 ? text : body.toString("base64"),
|
|
606
|
+
executable: (stat.mode & 0o111) !== 0,
|
|
607
|
+
};
|
|
608
|
+
}
|
|
557
609
|
async function fileExists(filePath) {
|
|
558
610
|
return fs.stat(filePath).then((stat) => stat.isFile(), () => false);
|
|
559
611
|
}
|
|
@@ -644,13 +696,13 @@ function workloadFromAdapterOperationRequest(request) {
|
|
|
644
696
|
name: context.benchmark?.name ?? "",
|
|
645
697
|
description: context.benchmark?.description ?? "",
|
|
646
698
|
},
|
|
647
|
-
|
|
648
|
-
path: context.
|
|
699
|
+
candidate: {
|
|
700
|
+
path: context.candidate?.path ?? "",
|
|
649
701
|
},
|
|
650
|
-
|
|
651
|
-
edits: context.
|
|
702
|
+
improve: {
|
|
703
|
+
edits: context.improve?.edits ?? [],
|
|
652
704
|
},
|
|
653
|
-
|
|
705
|
+
candidateId: context.candidate?.id ?? "",
|
|
654
706
|
attemptIndex: attempt.attemptIndex ?? 0,
|
|
655
707
|
sampleIndex: attempt.sampleIndex ?? 0,
|
|
656
708
|
caseId: attempt.caseId ?? "",
|
|
@@ -726,35 +778,35 @@ async function executeBuiltInAgentTurn(executor, request) {
|
|
|
726
778
|
const { defaultWorkbenchAgentTurnExecutor, executeWorkbenchAgentTurn, } = await import("./agent-turn.js");
|
|
727
779
|
return await executeWorkbenchAgentTurn(executor ?? defaultWorkbenchAgentTurnExecutor, request);
|
|
728
780
|
}
|
|
729
|
-
async function
|
|
730
|
-
if (request.operation !== "
|
|
731
|
-
throw new Error("Agent
|
|
781
|
+
async function writeAgentCandidateOutput(request, workload, candidate, options = {}) {
|
|
782
|
+
if (request.operation !== "candidate.run") {
|
|
783
|
+
throw new Error("Agent candidate results can only complete candidate.run operations.");
|
|
732
784
|
}
|
|
733
|
-
const traceRoot = path.join(request.paths.output, ".workbench", "internal", "agent-
|
|
785
|
+
const traceRoot = path.join(request.paths.output, ".workbench", "internal", "agent-candidate");
|
|
734
786
|
const agentResult = await executeBuiltInAgentTurn(options.agentExecutor, {
|
|
735
787
|
role: "runner",
|
|
736
|
-
provider:
|
|
788
|
+
provider: candidate.agent,
|
|
737
789
|
adapterAuthRoot: options.adapterAuthRoot,
|
|
738
790
|
adapterAuthRequest: options.adapterAuthRequest,
|
|
739
791
|
adapterAuthEnv: options.adapterAuthEnv,
|
|
740
792
|
workspaceRoot: request.paths.workspace,
|
|
741
793
|
cwd: request.paths.workspace,
|
|
742
|
-
prompt:
|
|
794
|
+
prompt: buildAgentCandidatePrompt(workload, candidate),
|
|
743
795
|
traceRoot,
|
|
744
796
|
jobId: workload.job.id,
|
|
745
797
|
});
|
|
746
|
-
const outputPath = path.join(request.paths.output, "
|
|
798
|
+
const outputPath = path.join(request.paths.output, "candidate-summary.md");
|
|
747
799
|
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
|
748
800
|
await fs.writeFile(outputPath, agentResult.output);
|
|
749
801
|
const trace = {
|
|
750
|
-
path: `.workbench/traces/${workload.job.id}/
|
|
802
|
+
path: `.workbench/traces/${workload.job.id}/candidate.json`,
|
|
751
803
|
kind: "text",
|
|
752
804
|
encoding: "utf8",
|
|
753
805
|
executable: false,
|
|
754
806
|
content: `${JSON.stringify({
|
|
755
|
-
kind: "
|
|
756
|
-
provider:
|
|
757
|
-
|
|
807
|
+
kind: "agent_candidate",
|
|
808
|
+
provider: candidate.agent.use,
|
|
809
|
+
candidateId: workload.candidateId,
|
|
758
810
|
attemptIndex: workload.attemptIndex,
|
|
759
811
|
sampleIndex: workload.sampleIndex,
|
|
760
812
|
summary: agentResult.output,
|
|
@@ -766,25 +818,25 @@ async function writeAgentSubjectOutput(request, workload, subject, options = {})
|
|
|
766
818
|
const usage = runtime.assignUsageRole("runner", agentResult.usage);
|
|
767
819
|
await writeWorkbenchAdapterOperationResult(request.paths.output, {
|
|
768
820
|
protocol: "workbench.adapter-result.v1",
|
|
769
|
-
operation: "
|
|
821
|
+
operation: "candidate.run",
|
|
770
822
|
ok: true,
|
|
771
823
|
...(agentResult.output ? { summary: agentResult.output } : {}),
|
|
772
824
|
feedback: {
|
|
773
|
-
|
|
774
|
-
agent:
|
|
825
|
+
candidate: "agent",
|
|
826
|
+
agent: candidate.agent.use,
|
|
775
827
|
metadata: agentResult.metadata,
|
|
776
828
|
},
|
|
777
829
|
...(usage ? { usage } : {}),
|
|
778
830
|
});
|
|
779
831
|
}
|
|
780
|
-
function
|
|
832
|
+
function buildAgentCandidatePrompt(workload, candidate) {
|
|
781
833
|
return [
|
|
782
|
-
...(
|
|
834
|
+
...(candidate.instructions ? ["Instructions:", candidate.instructions, ""] : []),
|
|
783
835
|
"Context:",
|
|
784
|
-
"-
|
|
785
|
-
"- Follow any
|
|
836
|
+
"- Candidate source files are mounted at /workspace/input/candidate.",
|
|
837
|
+
"- Follow any candidate guidance, skill files, scripts, or configuration under /workspace/input/candidate.",
|
|
786
838
|
"- The mutable working directory is /workspace.",
|
|
787
|
-
"- If the
|
|
839
|
+
"- If the candidate declares prepare.command, it has already run and may have copied files into /workspace.",
|
|
788
840
|
...(workload.case?.prompt ? ["Case:", workload.case.prompt, ""] : []),
|
|
789
841
|
"- Public case files are mounted at /workspace/input/case.",
|
|
790
842
|
"- Verifier tests are not present while you run.",
|
|
@@ -792,87 +844,89 @@ function buildAgentSubjectPrompt(workload, subject) {
|
|
|
792
844
|
"- You may write inspection artifacts under /workspace/output.",
|
|
793
845
|
].join("\n");
|
|
794
846
|
}
|
|
795
|
-
async function
|
|
796
|
-
if (request.operation !== "
|
|
797
|
-
throw new Error("Agent
|
|
847
|
+
async function writeAgentCandidateRevisionOutput(request, workload, improver, options) {
|
|
848
|
+
if (request.operation !== "candidate.improve") {
|
|
849
|
+
throw new Error("Agent improve results can only complete candidate.improve operations.");
|
|
798
850
|
}
|
|
799
|
-
const
|
|
800
|
-
const
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
851
|
+
const before = await snapshotEditableCandidateWorkspace(request);
|
|
852
|
+
const traceRoot = path.join(request.paths.output, ".workbench", "internal", "agent-improver");
|
|
853
|
+
try {
|
|
854
|
+
const agentResult = await executeBuiltInAgentTurn(options.agentExecutor, {
|
|
855
|
+
role: "improver",
|
|
856
|
+
provider: improver.agent,
|
|
857
|
+
adapterAuthRoot: options.adapterAuthRoot,
|
|
858
|
+
adapterAuthRequest: options.adapterAuthRequest,
|
|
859
|
+
adapterAuthEnv: options.adapterAuthEnv,
|
|
860
|
+
workspaceRoot: request.paths.workspace,
|
|
861
|
+
cwd: request.paths.workspace,
|
|
862
|
+
prompt: buildAgentImproverPrompt(workload),
|
|
863
|
+
traceRoot,
|
|
864
|
+
jobId: workload.job.id,
|
|
865
|
+
});
|
|
866
|
+
const candidatePatch = await createCandidatePatchFromWorkspace({
|
|
867
|
+
beforeRoot: before.root,
|
|
868
|
+
afterRoot: request.paths.workspace,
|
|
869
|
+
edits: workload.improve.edits,
|
|
870
|
+
});
|
|
871
|
+
const changedCandidatePaths = candidatePatch.fileChanges.filter((filePath) => isCandidateEditPath(filePath, workload.improve.edits));
|
|
872
|
+
if (changedCandidatePaths.length === 0) {
|
|
873
|
+
throw new Error("Agent improve adapter completed without changing a candidate file covered by improve edits.");
|
|
874
|
+
}
|
|
875
|
+
const trace = {
|
|
876
|
+
path: `.workbench/traces/${workload.job.id}/improver.json`,
|
|
877
|
+
kind: "text",
|
|
878
|
+
encoding: "utf8",
|
|
879
|
+
executable: false,
|
|
880
|
+
content: `${JSON.stringify({
|
|
881
|
+
kind: "agent_improver",
|
|
882
|
+
provider: improver.agent.use,
|
|
883
|
+
candidateId: workload.candidateId,
|
|
884
|
+
attemptIndex: workload.attemptIndex,
|
|
885
|
+
changedPaths: changedCandidatePaths,
|
|
886
|
+
summary: agentResult.output,
|
|
887
|
+
metadata: agentResult.metadata,
|
|
888
|
+
}, null, 2)}\n`,
|
|
889
|
+
};
|
|
890
|
+
await writeSurfaceFiles(request.paths.output, [trace, ...agentResult.traceFiles]);
|
|
891
|
+
const runtime = await importWorkbenchRuntime();
|
|
892
|
+
const usage = runtime.assignUsageRole("improver", agentResult.usage);
|
|
893
|
+
await writeWorkbenchAdapterOperationResult(request.paths.output, {
|
|
894
|
+
protocol: "workbench.adapter-result.v1",
|
|
895
|
+
operation: "candidate.improve",
|
|
896
|
+
ok: true,
|
|
897
|
+
value: {
|
|
898
|
+
...candidatePatch,
|
|
899
|
+
fileChanges: changedCandidatePaths,
|
|
900
|
+
},
|
|
901
|
+
...(agentResult.output ? { summary: agentResult.output } : {}),
|
|
902
|
+
feedback: {
|
|
903
|
+
improver: improver.agent.use,
|
|
904
|
+
changedPaths: changedCandidatePaths,
|
|
905
|
+
metadata: agentResult.metadata,
|
|
906
|
+
},
|
|
907
|
+
...(usage ? { usage } : {}),
|
|
908
|
+
});
|
|
909
|
+
}
|
|
910
|
+
finally {
|
|
911
|
+
await before.cleanup();
|
|
820
912
|
}
|
|
821
|
-
const trace = {
|
|
822
|
-
path: `.workbench/traces/${workload.job.id}/optimizer.json`,
|
|
823
|
-
kind: "text",
|
|
824
|
-
encoding: "utf8",
|
|
825
|
-
executable: false,
|
|
826
|
-
content: `${JSON.stringify({
|
|
827
|
-
kind: "agent_optimizer",
|
|
828
|
-
provider: optimizer.agent.use,
|
|
829
|
-
subjectId: workload.subjectId,
|
|
830
|
-
attemptIndex: workload.attemptIndex,
|
|
831
|
-
changedPaths: changedSubjectPaths,
|
|
832
|
-
summary: agentResult.output,
|
|
833
|
-
metadata: agentResult.metadata,
|
|
834
|
-
}, null, 2)}\n`,
|
|
835
|
-
};
|
|
836
|
-
await writeSurfaceFiles(request.paths.output, [trace, ...agentResult.traceFiles]);
|
|
837
|
-
const runtime = await importWorkbenchRuntime();
|
|
838
|
-
const usage = runtime.assignUsageRole("optimizer", agentResult.usage);
|
|
839
|
-
await writeWorkbenchAdapterOperationResult(request.paths.output, {
|
|
840
|
-
protocol: "workbench.adapter-result.v1",
|
|
841
|
-
operation: "optimizer.improve",
|
|
842
|
-
ok: true,
|
|
843
|
-
value: {
|
|
844
|
-
...subjectPatch,
|
|
845
|
-
fileChanges: changedSubjectPaths,
|
|
846
|
-
},
|
|
847
|
-
...(agentResult.output ? { summary: agentResult.output } : {}),
|
|
848
|
-
feedback: {
|
|
849
|
-
optimizer: optimizer.agent.use,
|
|
850
|
-
changedPaths: changedSubjectPaths,
|
|
851
|
-
metadata: agentResult.metadata,
|
|
852
|
-
},
|
|
853
|
-
...(usage ? { usage } : {}),
|
|
854
|
-
});
|
|
855
913
|
}
|
|
856
|
-
function
|
|
914
|
+
function buildAgentImproverPrompt(workload) {
|
|
857
915
|
return [
|
|
858
916
|
"Benchmark:",
|
|
859
917
|
workload.benchmark.description || workload.benchmark.name,
|
|
860
918
|
"",
|
|
861
|
-
"
|
|
862
|
-
"- Subject source files are mounted at /workspace/input/subject.",
|
|
863
|
-
"- Follow any subject guidance, skill files, scripts, or configuration under /workspace/input/subject.",
|
|
864
|
-
"- The mutable working directory is /workspace.",
|
|
865
|
-
"- If the subject declares prepare.command, it has already run and may have copied files into /workspace.",
|
|
866
|
-
"- Prior run traces are mounted at /workspace/input/traces.",
|
|
867
|
-
"- Use /workspace/input/traces as the source of truth for what happened in prior attempts.",
|
|
868
|
-
"- Do not mutate /workspace/input.",
|
|
919
|
+
"Improve the candidate for this benchmark.",
|
|
869
920
|
"",
|
|
870
|
-
"
|
|
871
|
-
|
|
921
|
+
"Candidate files are in the current directory.",
|
|
922
|
+
"Prior adapter executions are in /workspace/input/traces.",
|
|
872
923
|
"",
|
|
873
|
-
"
|
|
874
|
-
|
|
875
|
-
"
|
|
924
|
+
"Editable paths:",
|
|
925
|
+
workload.improve.edits.map((entry) => `- ${entry}`).join("\n"),
|
|
926
|
+
"",
|
|
927
|
+
"Rules:",
|
|
928
|
+
"- Modify only editable paths.",
|
|
929
|
+
"- Change at least one editable file.",
|
|
876
930
|
].join("\n");
|
|
877
931
|
}
|
|
878
932
|
async function writeRubricJudgeResult(request, workload, engine, options = {}) {
|
|
@@ -928,9 +982,9 @@ async function writeRubricEvidenceFiles(args) {
|
|
|
928
982
|
const root = `.workbench/traces/${args.workload.job.id}/engine/rubric`;
|
|
929
983
|
const scorecard = {
|
|
930
984
|
schema: "workbench.engine.rubric.evidence.v1",
|
|
931
|
-
|
|
985
|
+
safeForImprover: true,
|
|
932
986
|
jobId: args.workload.job.id,
|
|
933
|
-
|
|
987
|
+
candidateId: args.workload.candidateId,
|
|
934
988
|
attemptIndex: args.workload.attemptIndex,
|
|
935
989
|
sampleIndex: args.workload.sampleIndex,
|
|
936
990
|
caseId: args.workload.caseId,
|
|
@@ -957,7 +1011,7 @@ async function writeRubricEvidenceFiles(args) {
|
|
|
957
1011
|
jsonSurfaceFile(`${root}/scorecard.json`, scorecard),
|
|
958
1012
|
...args.criterionRuns.map((run) => jsonSurfaceFile(`${root}/criteria/${safeInternalPathSegment(run.result.criterion_id)}/result.json`, {
|
|
959
1013
|
schema: "workbench.engine.rubric.criterion-evidence.v1",
|
|
960
|
-
|
|
1014
|
+
safeForImprover: true,
|
|
961
1015
|
criterion: args.engine.criteria.find((criterion) => criterion.id === run.result.criterion_id) ?? {
|
|
962
1016
|
id: run.result.criterion_id,
|
|
963
1017
|
},
|
|
@@ -1079,8 +1133,8 @@ function buildRubricCriterionJudgePrompt(workload, engine, criterion) {
|
|
|
1079
1133
|
JSON.stringify(criterion, null, 2),
|
|
1080
1134
|
"",
|
|
1081
1135
|
"Context:",
|
|
1082
|
-
"- The
|
|
1083
|
-
"-
|
|
1136
|
+
"- The candidate already ran in this same working directory.",
|
|
1137
|
+
"- Candidate outputs are available in the current working directory.",
|
|
1084
1138
|
"- Public case files are mounted at /workspace/input/case.",
|
|
1085
1139
|
"- Verifier-private files are mounted at /workspace/private/engine when the task provides them.",
|
|
1086
1140
|
"- Score only from the current working directory, public case files, verifier-private files, and the criterion above.",
|
|
@@ -1276,12 +1330,12 @@ function requireWorkloadTask(workload, label) {
|
|
|
1276
1330
|
throw new Error(`${label} workload is missing case text.`);
|
|
1277
1331
|
}
|
|
1278
1332
|
}
|
|
1279
|
-
async function
|
|
1333
|
+
async function createCandidatePatchFromWorkspace(args) {
|
|
1280
1334
|
const before = new Map((await readSurfaceFilesRecursive(args.beforeRoot))
|
|
1281
1335
|
.map((file) => [normalizeRelativePath(file.path), file]));
|
|
1282
1336
|
const changedFiles = (await readSurfaceFilesRecursive(args.afterRoot))
|
|
1283
1337
|
.map((file) => ({ ...file, path: normalizeRelativePath(file.path) }))
|
|
1284
|
-
.filter((file) =>
|
|
1338
|
+
.filter((file) => isCandidateEditPath(file.path, args.edits) &&
|
|
1285
1339
|
!isRuntimeWorkspacePath(file.path) &&
|
|
1286
1340
|
!sameSurfaceFile(before.get(file.path), file))
|
|
1287
1341
|
.sort((left, right) => left.path.localeCompare(right.path));
|
|
@@ -1321,7 +1375,7 @@ async function writeSurfaceFiles(root, files) {
|
|
|
1321
1375
|
}
|
|
1322
1376
|
}
|
|
1323
1377
|
}
|
|
1324
|
-
function
|
|
1378
|
+
function isCandidateEditPath(filePath, edits) {
|
|
1325
1379
|
const normalized = normalizeRelativePath(filePath);
|
|
1326
1380
|
return edits.some((entry) => {
|
|
1327
1381
|
const editPath = normalizeRelativePath(entry).replace(/\/+$/u, "");
|
package/dist/manifests.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { adapterSlot, defineAdapter, defineEngineResolver,
|
|
1
|
+
import { adapterSlot, defineAdapter, defineEngineResolver, defineCandidate, defineImprover, defineEngineRunner, workbenchAdapterManifestFromDefinition, } from "@workbench-ai/workbench-protocol";
|
|
2
2
|
const BUILT_IN_ADAPTER_MANIFESTS = Object.fromEntries(Object.entries({
|
|
3
3
|
workbench: defineAdapter({
|
|
4
4
|
id: "workbench",
|
|
@@ -10,8 +10,8 @@ const BUILT_IN_ADAPTER_MANIFESTS = Object.fromEntries(Object.entries({
|
|
|
10
10
|
}),
|
|
11
11
|
codex: defineAdapter({
|
|
12
12
|
id: "codex",
|
|
13
|
-
|
|
14
|
-
improve:
|
|
13
|
+
candidate: defineCandidate(),
|
|
14
|
+
improve: defineImprover(),
|
|
15
15
|
setup: [
|
|
16
16
|
"npm install --global @openai/codex@0.125.0",
|
|
17
17
|
],
|
|
@@ -24,8 +24,8 @@ const BUILT_IN_ADAPTER_MANIFESTS = Object.fromEntries(Object.entries({
|
|
|
24
24
|
}),
|
|
25
25
|
claude: defineAdapter({
|
|
26
26
|
id: "claude",
|
|
27
|
-
|
|
28
|
-
improve:
|
|
27
|
+
candidate: defineCandidate(),
|
|
28
|
+
improve: defineImprover(),
|
|
29
29
|
setup: [
|
|
30
30
|
"npm install --global @anthropic-ai/claude-code@2.1.119",
|
|
31
31
|
],
|
|
@@ -57,15 +57,15 @@ const BUILT_IN_ADAPTER_MANIFESTS = Object.fromEntries(Object.entries({
|
|
|
57
57
|
}),
|
|
58
58
|
command: defineAdapter({
|
|
59
59
|
id: "command",
|
|
60
|
-
|
|
60
|
+
candidate: defineCandidate(),
|
|
61
61
|
engineRun: defineEngineRunner(),
|
|
62
|
-
improve:
|
|
62
|
+
improve: defineImprover(),
|
|
63
63
|
}),
|
|
64
64
|
rubric: defineAdapter({
|
|
65
65
|
id: "rubric",
|
|
66
66
|
engineRun: defineEngineRunner(),
|
|
67
67
|
slots: {
|
|
68
|
-
judge: adapterSlot("/judge", "
|
|
68
|
+
judge: adapterSlot("/judge", "candidate.run"),
|
|
69
69
|
},
|
|
70
70
|
}),
|
|
71
71
|
tests: defineAdapter({
|
package/dist/runtime.js
CHANGED
|
@@ -12,11 +12,11 @@ export async function importWorkbenchRuntime() {
|
|
|
12
12
|
return await runtimeModule;
|
|
13
13
|
}
|
|
14
14
|
async function importWorkbenchRuntimeUncached() {
|
|
15
|
-
const
|
|
15
|
+
const candidates = runtimeImportCandidates();
|
|
16
16
|
let lastError;
|
|
17
|
-
for (const
|
|
17
|
+
for (const candidate of candidates) {
|
|
18
18
|
try {
|
|
19
|
-
return await import(__rewriteRelativeImportExtension(
|
|
19
|
+
return await import(__rewriteRelativeImportExtension(candidate));
|
|
20
20
|
}
|
|
21
21
|
catch (error) {
|
|
22
22
|
lastError = error;
|
|
@@ -24,11 +24,11 @@ async function importWorkbenchRuntimeUncached() {
|
|
|
24
24
|
}
|
|
25
25
|
throw new Error(`Unable to load @workbench-ai/workbench-core for built-in adapters: ${lastError instanceof Error ? lastError.message : String(lastError)}`);
|
|
26
26
|
}
|
|
27
|
-
function
|
|
27
|
+
function runtimeImportCandidates() {
|
|
28
28
|
return [
|
|
29
29
|
process.env.WORKBENCH_RUNTIME_IMPORT,
|
|
30
30
|
"/app/products/workbench/packages/core/src/index.ts",
|
|
31
31
|
new URL("../../core/src/index.ts", import.meta.url).href,
|
|
32
32
|
"@workbench-ai/workbench-core",
|
|
33
|
-
].filter((
|
|
33
|
+
].filter((candidate) => typeof candidate === "string" && candidate.length > 0);
|
|
34
34
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@workbench-ai/workbench-built-in-adapters",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.51",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -32,12 +32,12 @@
|
|
|
32
32
|
],
|
|
33
33
|
"dependencies": {
|
|
34
34
|
"yaml": "^2.8.2",
|
|
35
|
-
"@workbench-ai/agent-driver-anthropic-claude-code": "0.0.45",
|
|
36
|
-
"@workbench-ai/agent-driver": "0.0.45",
|
|
37
35
|
"@workbench-ai/agent-driver-openai-codex": "0.0.45",
|
|
38
|
-
"@workbench-ai/workbench-
|
|
39
|
-
"@workbench-ai/
|
|
40
|
-
"@workbench-ai/workbench-
|
|
36
|
+
"@workbench-ai/workbench-contract": "0.0.51",
|
|
37
|
+
"@workbench-ai/agent-driver": "0.0.45",
|
|
38
|
+
"@workbench-ai/workbench-core": "0.0.51",
|
|
39
|
+
"@workbench-ai/agent-driver-anthropic-claude-code": "0.0.45",
|
|
40
|
+
"@workbench-ai/workbench-protocol": "0.0.51"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
43
|
"@types/node": "^24.3.1",
|