npm - @workbench-ai/workbench-built-in-adapters - Versions diffs - 0.0.49 → 0.0.51 - Mend

@workbench-ai/workbench-built-in-adapters 0.0.49 → 0.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/agent-turn.d.ts CHANGED Viewed

@@ -7,7 +7,7 @@ export interface AgentProviderSpec {
     effort?: string;
 }
 export interface WorkbenchAgentTurnRequest {
-    role: "optimizer" | "runner" | "engine";
+    role: "improver" | "runner" | "engine";
     provider: AgentProviderSpec;
     adapterAuthRoot?: string;
     adapterAuthRequest?: JsonValue;
@@ -29,4 +29,11 @@ export interface WorkbenchAgentTurnResult {
 export type WorkbenchAgentTurnExecutor = (request: WorkbenchAgentTurnRequest) => Promise<WorkbenchAgentTurnResult>;
 export declare function executeWorkbenchAgentTurn(executor: (request: WorkbenchAgentTurnRequest) => Promise<WorkbenchAgentTurnResult>, request: WorkbenchAgentTurnRequest): Promise<WorkbenchAgentTurnResult>;
 export declare function defaultWorkbenchAgentTurnExecutor(request: WorkbenchAgentTurnRequest): Promise<WorkbenchAgentTurnResult>;
+export declare function resolveAgentTurnTimeouts(defaults: {
+    turn_timeout_ms?: number;
+    stall_timeout_ms?: number;
+}): {
+    turnTimeoutMs: number;
+    stallTimeoutMs: number;
+};
 //# sourceMappingURL=agent-turn.d.ts.map

package/dist/agent-turn.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"agent-turn.d.ts","sourceRoot":"","sources":["../src/agent-turn.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAEV,mBAAmB,EACnB,YAAY,EACb,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAQL,KAAK,SAAS,EAEf,MAAM,4BAA4B,CAAC;AACpC,OAAO,KAAK,EACV,gCAAgC,EACjC,MAAM,8BAA8B,CAAC;~~AAetC~~,MAAM,WAAW,iBAAiB;IAChC,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,yBAAyB;IACxC,IAAI,EAAE,~~WAAW~~,GAAG,QAAQ,GAAG,QAAQ,CAAC;~~IACxC~~,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,kBAAkB,CAAC,EAAE,SAAS,CAAC;IAC/B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACxC,aAAa,EAAE,MAAM,CAAC;IACtB,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,cAAc,CAAC,EAAE,gCAAgC,CAAC;CACnD;AAED,MAAM,WAAW,wBAAwB;IACvC,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,mBAAmB,EAAE,CAAC;IAClC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IACpC,KAAK,CAAC,EAAE,YAAY,CAAC;CACtB;AAED,MAAM,MAAM,0BAA0B,GAAG,CAAC,OAAO,EAAE,yBAAyB,KAAK,OAAO,CAAC,wBAAwB,CAAC,CAAC;AA4BnH,wBAAsB,yBAAyB,CAC7C,QAAQ,EAAE,CAAC,OAAO,EAAE,yBAAyB,KAAK,OAAO,CAAC,wBAAwB,CAAC,EACnF,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,wBAAwB,CAAC,CAenC;AAED,wBAAsB,iCAAiC,CACrD,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,wBAAwB,CAAC,CAwFnC"}
1	+ {"version":3,"file":"agent-turn.d.ts","sourceRoot":"","sources":["../src/agent-turn.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAEV,mBAAmB,EACnB,YAAY,EACb,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAQL,KAAK,SAAS,EAEf,MAAM,4BAA4B,CAAC;AACpC,OAAO,KAAK,EACV,gCAAgC,EACjC,MAAM,8BAA8B,CAAC;AAiBtC,MAAM,WAAW,iBAAiB;IAChC,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,yBAAyB;IACxC,IAAI,EAAE,UAAU,GAAG,QAAQ,GAAG,QAAQ,CAAC;IACvC,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,kBAAkB,CAAC,EAAE,SAAS,CAAC;IAC/B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACxC,aAAa,EAAE,MAAM,CAAC;IACtB,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,cAAc,CAAC,EAAE,gCAAgC,CAAC;CACnD;AAED,MAAM,WAAW,wBAAwB;IACvC,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,mBAAmB,EAAE,CAAC;IAClC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IACpC,KAAK,CAAC,EAAE,YAAY,CAAC;CACtB;AAED,MAAM,MAAM,0BAA0B,GAAG,CAAC,OAAO,EAAE,yBAAyB,KAAK,OAAO,CAAC,wBAAwB,CAAC,CAAC;AA4BnH,wBAAsB,yBAAyB,CAC7C,QAAQ,EAAE,CAAC,OAAO,EAAE,yBAAyB,KAAK,OAAO,CAAC,wBAAwB,CAAC,EACnF,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,wBAAwB,CAAC,CAenC;AAED,wBAAsB,iCAAiC,CACrD,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,wBAAwB,CAAC,CAwFnC;AAkHD,wBAAgB,wBAAwB,CAAC,QAAQ,EAAE;IACjD,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B,GAAG;IACF,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAQA"}

package/dist/agent-turn.js CHANGED Viewed

@@ -8,6 +8,8 @@ import { importWorkbenchRuntime } from "./runtime.js";
 const DEFAULT_AGENT_TURN_MAX_ATTEMPTS = 3;
 const DEFAULT_AGENT_TURN_RETRY_BASE_MS = 5_000;
 const DEFAULT_AGENT_TURN_RETRY_MAX_MS = 30_000;
+const DEFAULT_AGENT_TURN_TIMEOUT_MS = 3_600_000;
+const DEFAULT_AGENT_STALL_TIMEOUT_MS = 300_000;
 const AGENT_PROVIDER_REGISTRY = {
     codex: {
         executable: "codex",
@@ -201,14 +203,14 @@ function agentProviderRegistration(providerName) {
     return registration;
 }
 async function buildAgentExecutionPlan(provider, providerSpec, workspaceRoot, agentHome, adapterAuth) {
-    const turnTimeoutMs = provider.manifest.defaults.turn_timeout_ms ?? 3_600_000;
+    const { turnTimeoutMs, stallTimeoutMs } = resolveAgentTurnTimeouts(provider.manifest.defaults);
     const harness = {
         id: provider.manifest.id,
         auth: await resolveAgentAuth(provider, providerSpec, workspaceRoot, agentHome, adapterAuth),
         ...(firstNonEmpty(providerSpec.model, provider.manifest.defaults.model) ? { model: firstNonEmpty(providerSpec.model, provider.manifest.defaults.model) } : {}),
         ...(firstNonEmpty(providerSpec.effort, provider.manifest.defaults.effort) ? { effort: firstNonEmpty(providerSpec.effort, provider.manifest.defaults.effort) } : {}),
         turn_timeout_ms: turnTimeoutMs,
-        stall_timeout_ms: Math.max(provider.manifest.defaults.stall_timeout_ms ?? 0, turnTimeoutMs),
+        stall_timeout_ms: stallTimeoutMs,
         config: resolveAgentConfig(provider, defaultWorkbenchAgentConfig(provider, providerSpec.use)),
         retry: DEFAULT_HARNESS_RETRY,
         cancel: DEFAULT_HARNESS_CANCEL,
@@ -221,6 +223,19 @@ async function buildAgentExecutionPlan(provider, providerSpec, workspaceRoot, ag
         harness,
     };
 }
+export function resolveAgentTurnTimeouts(defaults) {
+    const turnTimeoutMs = positiveTimeoutMs(defaults.turn_timeout_ms) ?? DEFAULT_AGENT_TURN_TIMEOUT_MS;
+    const requestedStallTimeoutMs = positiveTimeoutMs(defaults.stall_timeout_ms) ?? DEFAULT_AGENT_STALL_TIMEOUT_MS;
+    return {
+        turnTimeoutMs,
+        stallTimeoutMs: Math.min(requestedStallTimeoutMs, turnTimeoutMs),
+    };
+}
+function positiveTimeoutMs(value) {
+    return typeof value === "number" && Number.isFinite(value) && value > 0
+        ? value
+        : null;
+}
 function defaultWorkbenchAgentConfig(provider, providerName) {
     const fallback = (provider.manifest.defaults.config ?? {});
     return {
@@ -229,9 +244,9 @@ function defaultWorkbenchAgentConfig(provider, providerName) {
     };
 }
 async function resolveAgentAuth(provider, providerSpec, workspaceRoot, agentHome, adapterAuth) {
-    const subject = adapterAuthProviderSubject(adapterAuth.request, providerSpec.use) ??
+    const candidate = adapterAuthProviderCandidate(adapterAuth.request, providerSpec.use) ??
         (provider.manifest.defaults.auth ?? {});
-    const parsed = provider.schemas.auth.safeParse(subject);
+    const parsed = provider.schemas.auth.safeParse(candidate);
     if (!parsed.success) {
         throw new Error(`Agent provider "${provider.manifest.id}" auth is invalid: ${formatValidationIssues(parsed.error.issues)}`);
     }
@@ -239,7 +254,7 @@ async function resolveAgentAuth(provider, providerSpec, workspaceRoot, agentHome
     void agentHome;
     return { ...parsed.data };
 }
-function adapterAuthProviderSubject(auth, providerName) {
+function adapterAuthProviderCandidate(auth, providerName) {
     const record = jsonRecord(auth);
     const self = jsonRecord(record?.self);
     const adapters = jsonRecord(record?.adapters);
@@ -338,7 +353,7 @@ function isTransientAgentTurnError(error) {
     if (isNativeCaCertificateFailure(message)) {
         return false;
     }
-    return /\b(fetch failed|error sending request|stream disconnected before completion|ECONNRESET|ETIMEDOUT|EAI_AGAIN|ENETUNREACH|ECONNREFUSED|socket hang up|network error|UND_ERR_|signal SIGTERM)/iu.test(message);
+    return /\b(fetch failed|error sending request|stream disconnected before completion|turn stalled after \d+ms|ECONNRESET|ETIMEDOUT|EAI_AGAIN|ENETUNREACH|ECONNREFUSED|socket hang up|network error|UND_ERR_|signal SIGTERM)/iu.test(message);
 }
 function isNativeCaCertificateFailure(message) {
     return /\bno native root CA certificates found\b|install ca-certificates/iu.test(message);

package/dist/execute.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"execute.d.ts","sourceRoot":"","sources":["../src/execute.ts"],"names":[],"mappings":"~~AAIA~~,OAAO,KAAK,EAEV,IAAI,EAKL,MAAM,kCAAkC,CAAC;AAc1C,OAAO,KAAK,EAEV,0BAA0B,EAG3B,MAAM,iBAAiB,CAAC;AAQzB,MAAM,WAAW,4CAA4C;IAC3D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,0BAA0B,CAAC;IAC3C,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,kBAAkB,CAAC,EAAE,IAAI,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;AA4CD,wBAAsB,qCAAqC,CACzD,IAAI,GAAE,4CAAiD,GACtD,OAAO,CAAC,IAAI,CAAC,CAiEf"}
1	+ {"version":3,"file":"execute.d.ts","sourceRoot":"","sources":["../src/execute.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAEV,IAAI,EAKL,MAAM,kCAAkC,CAAC;AAc1C,OAAO,KAAK,EAEV,0BAA0B,EAG3B,MAAM,iBAAiB,CAAC;AAQzB,MAAM,WAAW,4CAA4C;IAC3D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,0BAA0B,CAAC;IAC3C,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,kBAAkB,CAAC,EAAE,IAAI,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;AA4CD,wBAAsB,qCAAqC,CACzD,IAAI,GAAE,4CAAiD,GACtD,OAAO,CAAC,IAAI,CAAC,CAiEf"}

package/dist/execute.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { spawn } from "node:child_process";
 import { promises as fs } from "node:fs";
+import os from "node:os";
 import path from "node:path";
 import { ensureWorkbenchAdapterOutputDir, readWorkbenchAdapterOperationResult, readWorkbenchAdapterOperationRequest, runWorkbenchRuntimeOperationSequence, writeWorkbenchAdapterOperationResult, workbenchAdapterOperationResultPath, } from "@workbench-ai/workbench-protocol";
 import YAML from "yaml";
@@ -47,8 +48,8 @@ export async function executeWorkbenchBuiltInAdapterCommand(args = {}) {
     if (isBuiltInAgentAdapterId(adapterId)) {
         const workload = workloadFromAdapterOperationRequest(request);
         const agent = builtInAgentSpecFromRequest(request);
-        if (request.operation === "optimizer.improve") {
-            await writeAgentSubjectRevisionOutput(request, workload, agent, {
+        if (request.operation === "candidate.improve") {
+            await writeAgentCandidateRevisionOutput(request, workload, agent, {
                 agentExecutor: args.agentExecutor,
                 adapterAuthRoot: args.adapterAuthRoot,
                 adapterAuthRequest: args.adapterAuthRequest ?? request.auth,
@@ -56,8 +57,8 @@ export async function executeWorkbenchBuiltInAdapterCommand(args = {}) {
             });
             return;
         }
-        if (request.operation === "subject.run") {
-            await writeAgentSubjectOutput(request, workload, agent, {
+        if (request.operation === "candidate.run") {
+            await writeAgentCandidateOutput(request, workload, agent, {
                 agentExecutor: args.agentExecutor,
                 adapterAuthRoot: args.adapterAuthRoot,
                 adapterAuthRequest: args.adapterAuthRequest ?? request.auth,
@@ -123,7 +124,7 @@ async function workbenchEngineOutcomeUsage(outcome) {
     const operationUsage = outcome.usage
         ? undefined
         : runtime.mergeUsageSummaries(outcome.operationResults.map((result) => {
-            if (result.operation === "subject.run") {
+            if (result.operation === "candidate.run") {
                 return runtime.assignUsageRole("runner", result.usage);
             }
             if (result.operation === "engine.run") {
@@ -163,16 +164,16 @@ function workbenchEngineScoreInvocation(request) {
             : adapterCommandName(score.use),
     };
 }
-function workbenchEngineSubjectInvocation(request) {
-    const subject = request.context?.subject?.run;
-    if (!subject?.use || !subject.command) {
-        throw new Error("Workbench engine requires context.subject.run.use and context.subject.run.command.");
+function workbenchEngineCandidateInvocation(request) {
+    const candidate = request.context?.candidate?.run;
+    if (!candidate?.use || !candidate.command) {
+        throw new Error("Workbench engine requires context.candidate.run.use and context.candidate.run.command.");
     }
     return {
-        use: subject.use,
-        with: (subject.with ?? {}),
-        ...(subject.auth !== undefined ? { auth: subject.auth } : {}),
-        command: subject.command,
+        use: candidate.use,
+        with: (candidate.with ?? {}),
+        ...(candidate.auth !== undefined ? { auth: candidate.auth } : {}),
+        command: candidate.command,
     };
 }
 function workbenchEngineGradingIsolation(request) {
@@ -188,13 +189,13 @@ function workbenchEngineGradingIsolation(request) {
 }
 async function runWorkbenchEngineSharedGrading(request) {
     const inputs = await workbenchEngineRuntimeInputs(request);
-    const subject = workbenchEngineSubjectInvocation(request);
+    const candidate = workbenchEngineCandidateInvocation(request);
     const score = workbenchEngineScoreInvocation(request);
     const result = await runWorkbenchRuntimeOperationSequence({
         inputs,
         prepare: true,
         operations: [
-            { label: "subject", operation: "subject.run", invocation: subject },
+            { label: "candidate", operation: "candidate.run", invocation: candidate },
             { label: "score", operation: "engine.run", invocation: score },
         ],
     });
@@ -203,25 +204,25 @@ async function runWorkbenchEngineSharedGrading(request) {
 }
 async function runWorkbenchEngineSeparateGrading(request) {
     const inputs = await workbenchEngineRuntimeInputs(request);
-    const subject = workbenchEngineSubjectInvocation(request);
+    const candidate = workbenchEngineCandidateInvocation(request);
     const score = workbenchEngineScoreInvocation(request);
     const runtime = await importWorkbenchRuntime();
     const runner = await runWorkbenchRuntimeOperationSequence({
         inputs: {
-            subject: inputs.subject,
+            candidate: inputs.candidate,
             case: inputs.case,
             traces: inputs.traces,
         },
         prepare: true,
         collectWorkspace: true,
         operations: [
-            { label: "subject", operation: "subject.run", invocation: subject },
+            { label: "candidate", operation: "candidate.run", invocation: candidate },
         ],
     });
     assertRuntimeControlResultOk(runner, "Workbench separate runner");
     const grader = await runWorkbenchRuntimeOperationSequence({
         inputs: {
-            subject: inputs.subject,
+            candidate: inputs.candidate,
             case: inputs.case,
             enginePrivate: inputs.enginePrivate,
             traces: inputs.traces,
@@ -243,14 +244,14 @@ async function runWorkbenchEngineSeparateGrading(request) {
     };
 }
 async function workbenchEngineRuntimeInputs(request) {
-    const [subject, caseFiles, enginePrivate, traces] = await Promise.all([
-        readOptionalSurfaceFiles(request.paths.subject),
+    const [candidate, caseFiles, enginePrivate, traces] = await Promise.all([
+        readOptionalSurfaceFiles(request.paths.candidate),
         readOptionalSurfaceFiles(request.paths.case),
         readOptionalSurfaceFiles(request.paths.enginePrivate),
         readOptionalSurfaceFiles(request.paths.traces),
     ]);
     return {
-        subject,
+        candidate,
         case: caseFiles,
         enginePrivate,
         traces,
@@ -309,12 +310,20 @@ function safeInternalPathSegment(value) {
 }
 async function executeCommandAdapterRequest(request) {
     const command = requiredAdapterCommandString(request, "command");
-    await runAdapterShellCommand(command, request.paths.workspace);
-    if (request.operation === "engine.run") {
-        await requireCommandScoreResult(request);
-        return;
+    const before = request.operation === "candidate.improve"
+        ? await snapshotEditableCandidateWorkspace(request)
+        : null;
+    try {
+        await runAdapterShellCommand(command, request.paths.workspace);
+        if (request.operation === "engine.run") {
+            await requireCommandScoreResult(request);
+            return;
+        }
+        await writeOperationOkUnlessPresent(request, before?.root);
+    }
+    finally {
+        await before?.cleanup();
     }
-    await writeOperationOkUnlessPresent(request);
 }
 async function requireCommandScoreResult(request) {
     if (!await fileExists(workbenchAdapterOperationResultPath(request.paths.output))) {
@@ -379,15 +388,15 @@ async function runAdapterShellCommand(command, cwd, env = {}) {
         });
     });
 }
-async function writeOperationOkUnlessPresent(request) {
+async function writeOperationOkUnlessPresent(request, beforeRoot) {
     if (await fileExists(workbenchAdapterOperationResultPath(request.paths.output))) {
         return;
     }
-    if (request.operation === "optimizer.improve") {
-        const patch = await createSubjectPatchFromWorkspace({
-            beforeRoot: requiredRequestPath(request.paths.subject, "paths.subject"),
+    if (request.operation === "candidate.improve") {
+        const patch = await createCandidatePatchFromWorkspace({
+            beforeRoot: beforeRoot ?? requiredRequestPath(request.paths.candidate, "paths.candidate"),
             afterRoot: request.paths.workspace,
-            edits: request.context?.optimizer?.edits ?? [],
+            edits: request.context?.improve?.edits ?? [],
         });
         await writeWorkbenchAdapterOperationResult(request.paths.output, {
             protocol: "workbench.adapter-result.v1",
@@ -403,6 +412,41 @@ async function writeOperationOkUnlessPresent(request) {
         ok: true,
     });
 }
+async function snapshotEditableCandidateWorkspace(request) {
+    const root = await fs.mkdtemp(path.join(os.tmpdir(), "workbench-candidate-before-"));
+    const edits = request.context?.improve?.edits ?? [];
+    const files = await readEditableCandidateWorkspaceFiles(request.paths.workspace, edits);
+    await writeSurfaceFiles(root, files);
+    return {
+        root,
+        cleanup: async () => {
+            await fs.rm(root, { recursive: true, force: true }).catch(() => undefined);
+        },
+    };
+}
+async function readEditableCandidateWorkspaceFiles(root, edits) {
+    const files = [];
+    for (const edit of edits) {
+        const normalized = normalizeRelativePath(edit);
+        if (!normalized || isRuntimeWorkspacePath(normalized)) {
+            continue;
+        }
+        const absolutePath = path.join(root, normalized);
+        const stat = await fs.stat(absolutePath).catch(() => null);
+        if (!stat) {
+            continue;
+        }
+        if (stat.isDirectory()) {
+            await readSurfaceFilesInto(root, normalized, files);
+            continue;
+        }
+        if (stat.isFile()) {
+            files.push(await readSurfaceFile(root, normalized));
+        }
+    }
+    return dedupeSurfaceFiles(files.filter((file) => isCandidateEditPath(file.path, edits) &&
+        !isRuntimeWorkspacePath(file.path)));
+}
 async function firstExistingFile(files) {
     for (const file of files) {
         const stat = await fs.stat(file).catch(() => null);
@@ -460,10 +504,13 @@ async function readWorkbenchEngineCase(args) {
         throw new Error(`Task ${args.id} ${TASK_CONTROL_FILE} must include a task string.`);
     }
     const unsupportedTaskFields = Object.keys(taskRecord)
-        .filter((key) => !["version", "task", "files", "tests", "solution", "environment"].includes(key));
+        .filter((key) => !["version", "task", "split", "files", "tests", "solution", "environment"].includes(key));
     if (unsupportedTaskFields.length > 0) {
         throw new Error(`Task ${args.id} ${TASK_CONTROL_FILE} has unsupported field${unsupportedTaskFields.length === 1 ? "" : "s"}: ${unsupportedTaskFields.join(", ")}.`);
     }
+    if (taskRecord.split !== undefined && (typeof taskRecord.split !== "string" || taskRecord.split.trim().length === 0)) {
+        throw new Error(`Task ${args.id} ${TASK_CONTROL_FILE} split must be a non-empty string when provided.`);
+    }
     const publicPrefix = taskDirectoryPrefix(taskRecord.files, "files", args.id);
     const testsPrefix = taskDirectoryPrefix(taskRecord.tests, "tests", args.id);
     const solutionPrefix = taskDirectoryPrefix(taskRecord.solution, "solution", args.id);
@@ -483,6 +530,7 @@ async function readWorkbenchEngineCase(args) {
         case: {
             version: 3,
             prompt: taskRecord.task,
+            ...(typeof taskRecord.split === "string" ? { split: taskRecord.split.trim() } : {}),
             ...(taskRecord.environment !== undefined
                 ? { environment: taskRecord.environment }
                 : {}),
@@ -539,21 +587,25 @@ async function readSurfaceFilesInto(root, relativeDir, result) {
         if (!entry.isFile()) {
             continue;
         }
-        const [body, stat] = await Promise.all([
-            fs.readFile(absolutePath),
-            fs.stat(absolutePath),
-        ]);
-        const text = body.toString("utf8");
-        const isUtf8 = Buffer.from(text, "utf8").equals(body);
-        result.push({
-            path: relativePath,
-            kind: isUtf8 ? "text" : "binary",
-            encoding: isUtf8 ? "utf8" : "base64",
-            content: isUtf8 ? text : body.toString("base64"),
-            executable: (stat.mode & 0o111) !== 0,
-        });
+        result.push(await readSurfaceFile(root, relativePath));
     }
 }
+async function readSurfaceFile(root, relativePath) {
+    const absolutePath = path.join(root, normalizeRelativePath(relativePath));
+    const [body, stat] = await Promise.all([
+        fs.readFile(absolutePath),
+        fs.stat(absolutePath),
+    ]);
+    const text = body.toString("utf8");
+    const isUtf8 = Buffer.from(text, "utf8").equals(body);
+    return {
+        path: normalizeRelativePath(relativePath),
+        kind: isUtf8 ? "text" : "binary",
+        encoding: isUtf8 ? "utf8" : "base64",
+        content: isUtf8 ? text : body.toString("base64"),
+        executable: (stat.mode & 0o111) !== 0,
+    };
+}
 async function fileExists(filePath) {
     return fs.stat(filePath).then((stat) => stat.isFile(), () => false);
 }
@@ -644,13 +696,13 @@ function workloadFromAdapterOperationRequest(request) {
             name: context.benchmark?.name ?? "",
             description: context.benchmark?.description ?? "",
         },
-        subject: {
-            path: context.subject?.path ?? "",
+        candidate: {
+            path: context.candidate?.path ?? "",
         },
-        optimizer: {
-            edits: context.optimizer?.edits ?? [],
+        improve: {
+            edits: context.improve?.edits ?? [],
         },
-        subjectId: context.subject?.id ?? "",
+        candidateId: context.candidate?.id ?? "",
         attemptIndex: attempt.attemptIndex ?? 0,
         sampleIndex: attempt.sampleIndex ?? 0,
         caseId: attempt.caseId ?? "",
@@ -726,35 +778,35 @@ async function executeBuiltInAgentTurn(executor, request) {
     const { defaultWorkbenchAgentTurnExecutor, executeWorkbenchAgentTurn, } = await import("./agent-turn.js");
     return await executeWorkbenchAgentTurn(executor ?? defaultWorkbenchAgentTurnExecutor, request);
 }
-async function writeAgentSubjectOutput(request, workload, subject, options = {}) {
-    if (request.operation !== "subject.run") {
-        throw new Error("Agent subject results can only complete subject.run operations.");
+async function writeAgentCandidateOutput(request, workload, candidate, options = {}) {
+    if (request.operation !== "candidate.run") {
+        throw new Error("Agent candidate results can only complete candidate.run operations.");
     }
-    const traceRoot = path.join(request.paths.output, ".workbench", "internal", "agent-subject");
+    const traceRoot = path.join(request.paths.output, ".workbench", "internal", "agent-candidate");
     const agentResult = await executeBuiltInAgentTurn(options.agentExecutor, {
         role: "runner",
-        provider: subject.agent,
+        provider: candidate.agent,
         adapterAuthRoot: options.adapterAuthRoot,
         adapterAuthRequest: options.adapterAuthRequest,
         adapterAuthEnv: options.adapterAuthEnv,
         workspaceRoot: request.paths.workspace,
         cwd: request.paths.workspace,
-        prompt: buildAgentSubjectPrompt(workload, subject),
+        prompt: buildAgentCandidatePrompt(workload, candidate),
         traceRoot,
         jobId: workload.job.id,
     });
-    const outputPath = path.join(request.paths.output, "subject-summary.md");
+    const outputPath = path.join(request.paths.output, "candidate-summary.md");
     await fs.mkdir(path.dirname(outputPath), { recursive: true });
     await fs.writeFile(outputPath, agentResult.output);
     const trace = {
-        path: `.workbench/traces/${workload.job.id}/subject.json`,
+        path: `.workbench/traces/${workload.job.id}/candidate.json`,
         kind: "text",
         encoding: "utf8",
         executable: false,
         content: `${JSON.stringify({
-            kind: "agent_subject",
-            provider: subject.agent.use,
-            subjectId: workload.subjectId,
+            kind: "agent_candidate",
+            provider: candidate.agent.use,
+            candidateId: workload.candidateId,
             attemptIndex: workload.attemptIndex,
             sampleIndex: workload.sampleIndex,
             summary: agentResult.output,
@@ -766,25 +818,25 @@ async function writeAgentSubjectOutput(request, workload, subject, options = {})
     const usage = runtime.assignUsageRole("runner", agentResult.usage);
     await writeWorkbenchAdapterOperationResult(request.paths.output, {
         protocol: "workbench.adapter-result.v1",
-        operation: "subject.run",
+        operation: "candidate.run",
         ok: true,
         ...(agentResult.output ? { summary: agentResult.output } : {}),
         feedback: {
-            subject: "agent",
-            agent: subject.agent.use,
+            candidate: "agent",
+            agent: candidate.agent.use,
             metadata: agentResult.metadata,
         },
         ...(usage ? { usage } : {}),
     });
 }
-function buildAgentSubjectPrompt(workload, subject) {
+function buildAgentCandidatePrompt(workload, candidate) {
     return [
-        ...(subject.instructions ? ["Instructions:", subject.instructions, ""] : []),
+        ...(candidate.instructions ? ["Instructions:", candidate.instructions, ""] : []),
         "Context:",
-        "- Subject source files are mounted at /workspace/input/subject.",
-        "- Follow any subject guidance, skill files, scripts, or configuration under /workspace/input/subject.",
+        "- Candidate source files are mounted at /workspace/input/candidate.",
+        "- Follow any candidate guidance, skill files, scripts, or configuration under /workspace/input/candidate.",
         "- The mutable working directory is /workspace.",
-        "- If the subject declares prepare.command, it has already run and may have copied files into /workspace.",
+        "- If the candidate declares prepare.command, it has already run and may have copied files into /workspace.",
         ...(workload.case?.prompt ? ["Case:", workload.case.prompt, ""] : []),
         "- Public case files are mounted at /workspace/input/case.",
         "- Verifier tests are not present while you run.",
@@ -792,87 +844,89 @@ function buildAgentSubjectPrompt(workload, subject) {
         "- You may write inspection artifacts under /workspace/output.",
     ].join("\n");
 }
-async function writeAgentSubjectRevisionOutput(request, workload, optimizer, options) {
-    if (request.operation !== "optimizer.improve") {
-        throw new Error("Agent subject revision results can only complete optimizer.improve operations.");
+async function writeAgentCandidateRevisionOutput(request, workload, improver, options) {
+    if (request.operation !== "candidate.improve") {
+        throw new Error("Agent improve results can only complete candidate.improve operations.");
     }
-    const traceRoot = path.join(request.paths.output, ".workbench", "internal", "agent-optimizer");
-    const agentResult = await executeBuiltInAgentTurn(options.agentExecutor, {
-        role: "optimizer",
-        provider: optimizer.agent,
-        adapterAuthRoot: options.adapterAuthRoot,
-        adapterAuthRequest: options.adapterAuthRequest,
-        adapterAuthEnv: options.adapterAuthEnv,
-        workspaceRoot: request.paths.workspace,
-        cwd: request.paths.workspace,
-        prompt: buildAgentOptimizerPrompt(workload),
-        traceRoot,
-        jobId: workload.job.id,
-    });
-    const subjectPatch = await createSubjectPatchFromWorkspace({
-        beforeRoot: requiredRequestPath(request.paths.subject, "paths.subject"),
-        afterRoot: request.paths.workspace,
-        edits: workload.optimizer.edits,
-    });
-    const changedSubjectPaths = subjectPatch.fileChanges.filter((filePath) => isSubjectEditPath(filePath, workload.optimizer.edits));
-    if (changedSubjectPaths.length === 0) {
-        throw new Error("Agent improve adapter completed without changing a subject file covered by optimizer edits.");
+    const before = await snapshotEditableCandidateWorkspace(request);
+    const traceRoot = path.join(request.paths.output, ".workbench", "internal", "agent-improver");
+    try {
+        const agentResult = await executeBuiltInAgentTurn(options.agentExecutor, {
+            role: "improver",
+            provider: improver.agent,
+            adapterAuthRoot: options.adapterAuthRoot,
+            adapterAuthRequest: options.adapterAuthRequest,
+            adapterAuthEnv: options.adapterAuthEnv,
+            workspaceRoot: request.paths.workspace,
+            cwd: request.paths.workspace,
+            prompt: buildAgentImproverPrompt(workload),
+            traceRoot,
+            jobId: workload.job.id,
+        });
+        const candidatePatch = await createCandidatePatchFromWorkspace({
+            beforeRoot: before.root,
+            afterRoot: request.paths.workspace,
+            edits: workload.improve.edits,
+        });
+        const changedCandidatePaths = candidatePatch.fileChanges.filter((filePath) => isCandidateEditPath(filePath, workload.improve.edits));
+        if (changedCandidatePaths.length === 0) {
+            throw new Error("Agent improve adapter completed without changing a candidate file covered by improve edits.");
+        }
+        const trace = {
+            path: `.workbench/traces/${workload.job.id}/improver.json`,
+            kind: "text",
+            encoding: "utf8",
+            executable: false,
+            content: `${JSON.stringify({
+                kind: "agent_improver",
+                provider: improver.agent.use,
+                candidateId: workload.candidateId,
+                attemptIndex: workload.attemptIndex,
+                changedPaths: changedCandidatePaths,
+                summary: agentResult.output,
+                metadata: agentResult.metadata,
+            }, null, 2)}\n`,
+        };
+        await writeSurfaceFiles(request.paths.output, [trace, ...agentResult.traceFiles]);
+        const runtime = await importWorkbenchRuntime();
+        const usage = runtime.assignUsageRole("improver", agentResult.usage);
+        await writeWorkbenchAdapterOperationResult(request.paths.output, {
+            protocol: "workbench.adapter-result.v1",
+            operation: "candidate.improve",
+            ok: true,
+            value: {
+                ...candidatePatch,
+                fileChanges: changedCandidatePaths,
+            },
+            ...(agentResult.output ? { summary: agentResult.output } : {}),
+            feedback: {
+                improver: improver.agent.use,
+                changedPaths: changedCandidatePaths,
+                metadata: agentResult.metadata,
+            },
+            ...(usage ? { usage } : {}),
+        });
+    }
+    finally {
+        await before.cleanup();
     }
-    const trace = {
-        path: `.workbench/traces/${workload.job.id}/optimizer.json`,
-        kind: "text",
-        encoding: "utf8",
-        executable: false,
-        content: `${JSON.stringify({
-            kind: "agent_optimizer",
-            provider: optimizer.agent.use,
-            subjectId: workload.subjectId,
-            attemptIndex: workload.attemptIndex,
-            changedPaths: changedSubjectPaths,
-            summary: agentResult.output,
-            metadata: agentResult.metadata,
-        }, null, 2)}\n`,
-    };
-    await writeSurfaceFiles(request.paths.output, [trace, ...agentResult.traceFiles]);
-    const runtime = await importWorkbenchRuntime();
-    const usage = runtime.assignUsageRole("optimizer", agentResult.usage);
-    await writeWorkbenchAdapterOperationResult(request.paths.output, {
-        protocol: "workbench.adapter-result.v1",
-        operation: "optimizer.improve",
-        ok: true,
-        value: {
-            ...subjectPatch,
-            fileChanges: changedSubjectPaths,
-        },
-        ...(agentResult.output ? { summary: agentResult.output } : {}),
-        feedback: {
-            optimizer: optimizer.agent.use,
-            changedPaths: changedSubjectPaths,
-            metadata: agentResult.metadata,
-        },
-        ...(usage ? { usage } : {}),
-    });
 }
-function buildAgentOptimizerPrompt(workload) {
+function buildAgentImproverPrompt(workload) {
     return [
         "Benchmark:",
         workload.benchmark.description || workload.benchmark.name,
         "",
-        "Context:",
-        "- Subject source files are mounted at /workspace/input/subject.",
-        "- Follow any subject guidance, skill files, scripts, or configuration under /workspace/input/subject.",
-        "- The mutable working directory is /workspace.",
-        "- If the subject declares prepare.command, it has already run and may have copied files into /workspace.",
-        "- Prior run traces are mounted at /workspace/input/traces.",
-        "- Use /workspace/input/traces as the source of truth for what happened in prior attempts.",
-        "- Do not mutate /workspace/input.",
+        "Improve the candidate for this benchmark.",
         "",
-        "Editable subject paths:",
-        workload.optimizer.edits.map((entry) => `- ${entry}`).join("\n"),
+        "Candidate files are in the current directory.",
+        "Prior adapter executions are in /workspace/input/traces.",
         "",
-        "Output:",
-        "- Create or mutate editable subject files directly in the current working directory.",
-        "- Include at least one changed subject file covered by the optimizer edits list.",
+        "Editable paths:",
+        workload.improve.edits.map((entry) => `- ${entry}`).join("\n"),
+        "",
+        "Rules:",
+        "- Modify only editable paths.",
+        "- Change at least one editable file.",
     ].join("\n");
 }
 async function writeRubricJudgeResult(request, workload, engine, options = {}) {
@@ -928,9 +982,9 @@ async function writeRubricEvidenceFiles(args) {
     const root = `.workbench/traces/${args.workload.job.id}/engine/rubric`;
     const scorecard = {
         schema: "workbench.engine.rubric.evidence.v1",
-        safeForOptimizer: true,
+        safeForImprover: true,
         jobId: args.workload.job.id,
-        subjectId: args.workload.subjectId,
+        candidateId: args.workload.candidateId,
         attemptIndex: args.workload.attemptIndex,
         sampleIndex: args.workload.sampleIndex,
         caseId: args.workload.caseId,
@@ -957,7 +1011,7 @@ async function writeRubricEvidenceFiles(args) {
         jsonSurfaceFile(`${root}/scorecard.json`, scorecard),
         ...args.criterionRuns.map((run) => jsonSurfaceFile(`${root}/criteria/${safeInternalPathSegment(run.result.criterion_id)}/result.json`, {
             schema: "workbench.engine.rubric.criterion-evidence.v1",
-            safeForOptimizer: true,
+            safeForImprover: true,
             criterion: args.engine.criteria.find((criterion) => criterion.id === run.result.criterion_id) ?? {
                 id: run.result.criterion_id,
             },
@@ -1079,8 +1133,8 @@ function buildRubricCriterionJudgePrompt(workload, engine, criterion) {
         JSON.stringify(criterion, null, 2),
         "",
         "Context:",
-        "- The subject already ran in this same working directory.",
-        "- Subject outputs are available in the current working directory.",
+        "- The candidate already ran in this same working directory.",
+        "- Candidate outputs are available in the current working directory.",
         "- Public case files are mounted at /workspace/input/case.",
         "- Verifier-private files are mounted at /workspace/private/engine when the task provides them.",
         "- Score only from the current working directory, public case files, verifier-private files, and the criterion above.",
@@ -1276,12 +1330,12 @@ function requireWorkloadTask(workload, label) {
         throw new Error(`${label} workload is missing case text.`);
     }
 }
-async function createSubjectPatchFromWorkspace(args) {
+async function createCandidatePatchFromWorkspace(args) {
     const before = new Map((await readSurfaceFilesRecursive(args.beforeRoot))
         .map((file) => [normalizeRelativePath(file.path), file]));
     const changedFiles = (await readSurfaceFilesRecursive(args.afterRoot))
         .map((file) => ({ ...file, path: normalizeRelativePath(file.path) }))
-        .filter((file) => isSubjectEditPath(file.path, args.edits) &&
+        .filter((file) => isCandidateEditPath(file.path, args.edits) &&
         !isRuntimeWorkspacePath(file.path) &&
         !sameSurfaceFile(before.get(file.path), file))
         .sort((left, right) => left.path.localeCompare(right.path));
@@ -1321,7 +1375,7 @@ async function writeSurfaceFiles(root, files) {
         }
     }
 }
-function isSubjectEditPath(filePath, edits) {
+function isCandidateEditPath(filePath, edits) {
     const normalized = normalizeRelativePath(filePath);
     return edits.some((entry) => {
         const editPath = normalizeRelativePath(entry).replace(/\/+$/u, "");

package/dist/manifests.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { adapterSlot, defineAdapter, defineEngineResolver, defineSubject, defineOptimizer, defineEngineRunner, workbenchAdapterManifestFromDefinition, } from "@workbench-ai/workbench-protocol";
+import { adapterSlot, defineAdapter, defineEngineResolver, defineCandidate, defineImprover, defineEngineRunner, workbenchAdapterManifestFromDefinition, } from "@workbench-ai/workbench-protocol";
 const BUILT_IN_ADAPTER_MANIFESTS = Object.fromEntries(Object.entries({
     workbench: defineAdapter({
         id: "workbench",
@@ -10,8 +10,8 @@ const BUILT_IN_ADAPTER_MANIFESTS = Object.fromEntries(Object.entries({
     }),
     codex: defineAdapter({
         id: "codex",
-        subject: defineSubject(),
-        improve: defineOptimizer(),
+        candidate: defineCandidate(),
+        improve: defineImprover(),
         setup: [
             "npm install --global @openai/codex@0.125.0",
         ],
@@ -24,8 +24,8 @@ const BUILT_IN_ADAPTER_MANIFESTS = Object.fromEntries(Object.entries({
     }),
     claude: defineAdapter({
         id: "claude",
-        subject: defineSubject(),
-        improve: defineOptimizer(),
+        candidate: defineCandidate(),
+        improve: defineImprover(),
         setup: [
             "npm install --global @anthropic-ai/claude-code@2.1.119",
         ],
@@ -57,15 +57,15 @@ const BUILT_IN_ADAPTER_MANIFESTS = Object.fromEntries(Object.entries({
     }),
     command: defineAdapter({
         id: "command",
-        subject: defineSubject(),
+        candidate: defineCandidate(),
         engineRun: defineEngineRunner(),
-        improve: defineOptimizer(),
+        improve: defineImprover(),
     }),
     rubric: defineAdapter({
         id: "rubric",
         engineRun: defineEngineRunner(),
         slots: {
-            judge: adapterSlot("/judge", "subject.run"),
+            judge: adapterSlot("/judge", "candidate.run"),
         },
     }),
     tests: defineAdapter({

package/dist/runtime.js CHANGED Viewed

@@ -12,11 +12,11 @@ export async function importWorkbenchRuntime() {
     return await runtimeModule;
 }
 async function importWorkbenchRuntimeUncached() {
-    const subjects = runtimeImportSubjects();
+    const candidates = runtimeImportCandidates();
     let lastError;
-    for (const subject of subjects) {
+    for (const candidate of candidates) {
         try {
-            return await import(__rewriteRelativeImportExtension(subject));
+            return await import(__rewriteRelativeImportExtension(candidate));
         }
         catch (error) {
             lastError = error;
@@ -24,11 +24,11 @@ async function importWorkbenchRuntimeUncached() {
     }
     throw new Error(`Unable to load @workbench-ai/workbench-core for built-in adapters: ${lastError instanceof Error ? lastError.message : String(lastError)}`);
 }
-function runtimeImportSubjects() {
+function runtimeImportCandidates() {
     return [
         process.env.WORKBENCH_RUNTIME_IMPORT,
         "/app/products/workbench/packages/core/src/index.ts",
         new URL("../../core/src/index.ts", import.meta.url).href,
         "@workbench-ai/workbench-core",
-    ].filter((subject) => typeof subject === "string" && subject.length > 0);
+    ].filter((candidate) => typeof candidate === "string" && candidate.length > 0);
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@workbench-ai/workbench-built-in-adapters",
-  "version": "0.0.49",
+  "version": "0.0.51",
   "type": "module",
   "repository": {
     "type": "git",
@@ -32,12 +32,12 @@
   ],
   "dependencies": {
     "yaml": "^2.8.2",
-    "@workbench-ai/agent-driver-anthropic-claude-code": "0.0.45",
-    "@workbench-ai/agent-driver": "0.0.45",
     "@workbench-ai/agent-driver-openai-codex": "0.0.45",
-    "@workbench-ai/workbench-protocol": "0.0.49",
-    "@workbench-ai/workbench-core": "0.0.49",
-    "@workbench-ai/workbench-contract": "0.0.49"
+    "@workbench-ai/workbench-contract": "0.0.51",
+    "@workbench-ai/agent-driver": "0.0.45",
+    "@workbench-ai/workbench-core": "0.0.51",
+    "@workbench-ai/agent-driver-anthropic-claude-code": "0.0.45",
+    "@workbench-ai/workbench-protocol": "0.0.51"
   },
   "devDependencies": {
     "@types/node": "^24.3.1",