npm - @workbench-ai/workbench-core - Versions diffs - 0.0.49 → 0.0.51 - Mend

@workbench-ai/workbench-core 0.0.49 → 0.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/dist/candidate-patch.d.ts +8 -0
package/dist/candidate-patch.d.ts.map +1 -0
package/dist/{subject-patch.js → candidate-patch.js} +5 -5
package/dist/execution-evidence.d.ts +5 -5
package/dist/execution-evidence.d.ts.map +1 -1
package/dist/execution-evidence.js +8 -8
package/dist/execution-graph.d.ts +2 -2
package/dist/execution-graph.d.ts.map +1 -1
package/dist/execution-graph.js +13 -13
package/dist/execution-jobs.d.ts +7 -6
package/dist/execution-jobs.d.ts.map +1 -1
package/dist/execution-jobs.js +32 -17
package/dist/execution-outputs.d.ts +2 -2
package/dist/execution-outputs.d.ts.map +1 -1
package/dist/execution-outputs.js +25 -13
package/dist/execution-runtime-types.d.ts +1 -1
package/dist/execution-runtime-types.d.ts.map +1 -1
package/dist/execution-traces.js +7 -7
package/dist/execution-usage.js +9 -9
package/dist/generic-spec.d.ts +46 -30
package/dist/generic-spec.d.ts.map +1 -1
package/dist/generic-spec.js +173 -80
package/dist/index.d.ts +68 -39
package/dist/index.d.ts.map +1 -1
package/dist/index.js +805 -359
package/dist/runtime-utils.d.ts +1 -1
package/dist/runtime-utils.d.ts.map +1 -1
package/dist/runtime-utils.js +3 -3
package/dist/sandbox-backends/docker.js +5 -5
package/dist/sandbox-inputs.js +3 -3
package/dist/sandbox-plane.js +7 -7
package/package.json +3 -3
package/worker/sandbox-adapter-runner.cjs +2 -2
package/dist/subject-patch.d.ts +0 -8
package/dist/subject-patch.d.ts.map +0 -1

package/dist/index.js CHANGED Viewed

@@ -4,19 +4,19 @@ import path from "node:path";
 import { fileURLToPath } from "node:url";
 import YAML from "yaml";
 import { adapterCommandName, assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterAuthRequirements, parseWorkbenchAdapterManifest, readWorkbenchAdapterOperationResult, WORKBENCH_RUNTIME_CONTROL_TOKEN_ENV, WORKBENCH_RUNTIME_CONTROL_URL_ENV, workbenchAdapterOperationCommand, workbenchAdapterOperationExecutor, workbenchAdapterOperationResultPath, } from "@workbench-ai/workbench-protocol";
-import { BENCHMARK_SPEC_FILE, engineCasePrivateFiles, engineCaseFilesForRuntimeInput, engineCasePublicFiles, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml as resolveWorkbenchResolvedSourceYamlInternal, validateWorkbenchResolvedSourceYaml as validateWorkbenchResolvedSourceYamlInternal, isWorkbenchSubjectManifestPath, } from "./generic-spec.js";
+import { BENCHMARK_SPEC_FILE, DEFAULT_EXECUTION_RESOURCES, engineCasePrivateFiles, engineCaseFilesForRuntimeInput, engineCasePublicFiles, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml as resolveWorkbenchResolvedSourceYamlInternal, validateWorkbenchResolvedSourceYaml as validateWorkbenchResolvedSourceYamlInternal, isWorkbenchCandidateManifestPath, } from "./generic-spec.js";
 import { attachSandboxMetadataToJob, createWorkbenchSandboxFileStore, isSurfaceSnapshotFile, readWorkbenchExecutionSpec, } from "./sandbox-inputs.js";
 import { asRuntimeRecord, importNodeModule, isJsonPayload, jsonRecord, nodeBuiltin, quoteShellArg, resolveWorkbenchWorkerId, } from "./runtime-utils.js";
 import { createWorkbenchExecutionCapability, createWorkbenchSandboxAllocation, collectExecutionCapabilityScopeIssues, collectSandboxAllocationScopeIssues, collectSandboxHandleScopeIssues, assertSandboxBackendSupportsNetworkPolicy, executeValidatedSandboxExecution, } from "./sandbox-plane.js";
 import { createSandboxBackendPlaneForProvider, } from "./sandbox-backends/index.js";
-import { applyWorkbenchSubjectPatch } from "./subject-patch.js";
+import { applyWorkbenchCandidatePatch } from "./candidate-patch.js";
 import { assignUsageRole, completeUsageSummary, mergeUsageSummaries, normalizeUsageSummary, usageStats, } from "./execution-usage.js";
 import { traceFilePaths, workbenchTraceExecutionDirectory, } from "./trace-files.js";
 import { engineCaseForCase, } from "./execution-jobs.js";
 import { createWorkbenchExecutionEventPublisher, publishCommandStepEvent, } from "./execution-events.js";
 import { readWorkbenchExecutionPurpose } from "./execution-evidence.js";
 import { adapterAuthEnv, localWorkbenchAdapterAuthStore, normalizeWorkbenchAdapterAuthTarget, sanitizeWorkbenchAdapterAuthBundle, } from "./adapter-auth.js";
-export { BENCHMARK_SPEC_FILE, DEFAULT_EXECUTION_RESOURCES, engineCasePrivateFiles, engineCaseFilesForRuntimeInput, engineCasePublicFiles, engineResolveInvocationForSpec, engineResolveBindingForSpec, engineResolveBindingForSourceYaml, isWorkbenchSubjectManifestPath, parseWorkbenchSourceFiles, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, resolveWorkbenchSourceFiles, runtimeNetwork, runtimeResources, serializeWorkbenchResolvedSourceYaml, validateWorkbenchResolvedSourceYaml, } from "./generic-spec.js";
+export { BENCHMARK_SPEC_FILE, CANDIDATE_SPEC_FILE, DEFAULT_EXECUTION_RESOURCES, engineCasePrivateFiles, engineCaseFilesForRuntimeInput, engineCasePublicFiles, engineResolveInvocationForSpec, engineResolveBindingForSpec, engineResolveBindingForSourceYaml, isWorkbenchCandidateManifestPath, parseWorkbenchSourceFiles, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, resolveWorkbenchSourceFiles, runtimeNetwork, runtimeResources, serializeWorkbenchResolvedSourceYaml, validateWorkbenchResolvedSourceYaml, } from "./generic-spec.js";
 export { composeRuntimeDockerfileWithAdapterInstallers, } from "./runtime-dockerfile.js";
 export { adapterCommandName, cloneWorkbenchAdapterManifest, collectWorkbenchAdapterAuthRequirements, collectWorkbenchAdapterInvocations, parseWorkbenchAdapterManifest, workbenchAdapterManifestRequiresAuth, workbenchAdapterManifestSupportsOperation, workbenchAdapterOperationCommand, workbenchAdapterOperationExecutor, withDefaultWorkbenchAdapterAuth, withDefaultWorkbenchAdapterAuthProfiles, } from "@workbench-ai/workbench-protocol";
 export { adapterAuthEnv, createWorkbenchAdapterAuthBundle, defaultWorkbenchAdapterAuthStoreRoot, localWorkbenchAdapterAuthStore, normalizeWorkbenchAdapterAuthTarget, parseWorkbenchAdapterAuthTarget, sanitizeWorkbenchAdapterAuthBundle, } from "./adapter-auth.js";
@@ -26,16 +26,127 @@ export { createWorkbenchProgressStdoutParser, publishWorkbenchProgressStdoutEnve
 export { resolveSandboxTemplateImage, } from "./sandbox-backends/template-images.js";
 export { readOutputTraceFiles, workbenchTraceExecutionDirectory, workbenchTraceRunDirectory, workbenchTraceRunDirectoryName, } from "./trace-files.js";
 export { assertWorkbenchAdapterOperationSupport, assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterOperationIssues, collectWorkbenchAdapterOperationRequirements, ensureWorkbenchAdapterOutputDir, WORKBENCH_ADAPTER_RESULT_FILE, normalizeWorkbenchAdapterOperationRequest, normalizeWorkbenchAdapterOperationResult, readWorkbenchAdapterOperationRequest, readWorkbenchAdapterOperationResult, workbenchAdapterOperationResultPath, writeWorkbenchAdapterOperationResult, } from "@workbench-ai/workbench-protocol";
-export { applyWorkbenchSubjectPatch, } from "./subject-patch.js";
+export { applyWorkbenchCandidatePatch, } from "./candidate-patch.js";
 export { createWorkbenchSandboxFileStore, createSandboxAdapterRequest, executionResultFromCompletedSandboxJob, materializeWorkbenchSandboxInput, readWorkbenchExecutionSpec, sanitizeWorkbenchExecutionJobForSandbox, } from "./sandbox-inputs.js";
 export { compileWorkbenchExecutionGraph, } from "./execution-graph.js";
-export { createBaselineSubjectExecution, createBaselineSubjectJob, createWorkbenchExecutionJob, expectedWorkbenchRunJobCount, engineCaseForCase, engineCaseIds, attemptJobCountForRunSpec, workbenchExecutionJobPurpose, MAX_WORKBENCH_RUN_BUDGET, planWorkbenchExecutionJobsForPurpose, validateWorkbenchRunEnvelope, workbenchExecutionJobId, } from "./execution-jobs.js";
+export { createBaselineCandidateExecution, createBaselineCandidateJob, createWorkbenchExecutionJob, expectedWorkbenchRunJobCount, engineCaseForCase, engineCaseIds, attemptJobCountForRunSpec, workbenchExecutionJobPurpose, MAX_WORKBENCH_RUN_BUDGET, planWorkbenchExecutionJobsForPurpose, validateWorkbenchRunEnvelope, workbenchExecutionJobId, } from "./execution-jobs.js";
 export { addCapacity, capacityFits, runWorkbenchExecutionDag, subtractCapacity, workbenchJobDependencies, workbenchJobHostCost, workbenchJobResources, } from "./execution-scheduler.js";
 export { assertWorkbenchExecutionIsolation, collectWorkbenchExecutionIsolationIssues, validateWorkbenchExecutionOutputPayloads, } from "./execution-outputs.js";
 export { collectSandboxAllocationScopeIssues, collectExecutionCapabilityScopeIssues, collectSandboxHandleScopeIssues, createWorkbenchSandboxAllocation, createWorkbenchSandboxExecutionMetadata, createWorkbenchExecutionCapability, executeValidatedSandboxExecution, } from "./sandbox-plane.js";
-export { buildSubjectCaseExecutionRefs, buildWorkbenchExecutionEvidence, isWorkbenchExecutionActive, readWorkbenchExecutionId, readWorkbenchExecutionMetadataNumber, readWorkbenchExecutionMetadataString, readWorkbenchExecutionPurpose, resolveWorkbenchJobGroupStatus, } from "./execution-evidence.js";
+export { buildCandidateCaseExecutionRefs, buildWorkbenchExecutionEvidence, isWorkbenchExecutionActive, readWorkbenchExecutionId, readWorkbenchExecutionMetadataNumber, readWorkbenchExecutionMetadataString, readWorkbenchExecutionPurpose, resolveWorkbenchJobGroupStatus, } from "./execution-evidence.js";
 export { buildWorkbenchTraceSessionsFromFiles, combineWorkbenchTraceSessions, finalizeWorkbenchExecutionTraceForJob, mergeWorkbenchExecutionTracesByJob, readWorkbenchExecutionTraceFiles, traceSessionLabel, } from "./execution-traces.js";
 export { DOCKER_SANDBOX_BACKEND, assertSandboxHostHealthForProvider, createDockerSandboxBackendDescriptor, createDockerSandboxPlane, resolveWorkbenchSandboxProviderName, sandboxProviderAdmissionForResources, sandboxProviderDefaultMaxConcurrentJobs, sandboxProviderLeaseScope, sandboxHostHealthExpectationForProvider, } from "./sandbox-backends/index.js";
+export function sanitizeWorkbenchRuntimeJobForExchange(job) {
+    const { leaseUntil: _leaseUntil, wakeupLeaseUntil: _wakeupLeaseUntil, hostId: _hostId, workerId: _workerId, claimTokenHash: _claimTokenHash, trace: _trace, traceSessions: _traceSessions, ...portable } = job;
+    return { ...portable };
+}
+export function sanitizeWorkbenchRuntimeCandidateForExchange(candidate) {
+    const { ownerUserId: _ownerUserId, ownerUsername: _ownerUsername, visibility: _visibility, metrics: _metrics, candidateRunId: _candidateRunId, candidateRunName: _candidateRunName, ...portable } = candidate;
+    return { ...portable };
+}
+export function workbenchProjectSourceFingerprint(input) {
+    const canonical = {
+        sourceYaml: normalizeTextForProjectStateFingerprint(input.source),
+        candidateFiles: canonicalFilesForProjectStateFingerprint(input.candidateFiles),
+        engineResolveFiles: canonicalFilesForProjectStateFingerprint(input.engineResolveFiles),
+        engineResolveBinding: {
+            engine: input.engineResolveBinding.engine,
+            resolver: {
+                use: input.engineResolveBinding.resolver.use,
+                withFingerprint: input.engineResolveBinding.resolver.withFingerprint,
+            },
+        },
+        adapterFiles: canonicalFilesForProjectStateFingerprint(input.adapterFiles),
+        runtimeFiles: canonicalFilesForProjectStateFingerprint(input.runtimeFiles),
+        dockerfile: normalizeTextForProjectStateFingerprint(input.dockerfile),
+        runtimeDockerfile: normalizeTextForProjectStateFingerprint(input.runtimeDockerfile),
+        resources: normalizeProjectStateResources(input.resources),
+        network: input.network,
+    };
+    return createHash("sha256").update(JSON.stringify(canonicalizeProjectState(canonical))).digest("hex");
+}
+export function workbenchRuntimeBundleFingerprint(bundle) {
+    const canonical = {
+        schema: bundle.schema,
+        activeId: bundle.activeId,
+        candidates: sortByStableKey(bundle.candidates.map(sanitizeWorkbenchRuntimeCandidateForExchange), (candidate) => candidate.id),
+        candidateFiles: sortByStableKey(bundle.candidateFiles.map((group) => ({
+            candidateId: group.candidateId,
+            files: canonicalFilesForProjectStateFingerprint(group.files),
+        })), (group) => group.candidateId),
+        evaluations: sortByStableKey(bundle.evaluations, (evaluation) => evaluation.id),
+        runs: sortByStableKey(bundle.runs, (run) => run.id),
+        jobs: sortByStableKey(bundle.jobs.map(runtimeJobForProjectStateFingerprint), (job) => job.id),
+        executionFiles: sortByStableKey(bundle.executionFiles.map((group) => ({
+            jobId: group.jobId,
+            files: canonicalFilesForProjectStateFingerprint(group.files),
+        })), (group) => group.jobId),
+        events: sortByStableKey(bundle.events, (event) => [event.runId ?? "_", event.jobId ?? "_", event.at, event.id].join("#")),
+    };
+    return createHash("sha256").update(JSON.stringify(canonicalizeProjectState(canonical))).digest("hex");
+}
+export function workbenchSurfaceFilesEqualForExchange(left, right) {
+    return JSON.stringify(canonicalFilesForProjectStateFingerprint(left)) ===
+        JSON.stringify(canonicalFilesForProjectStateFingerprint(right));
+}
+export function workbenchRuntimeBundleStats(bundle) {
+    return {
+        candidates: bundle.candidates.length,
+        candidateFiles: bundle.candidateFiles.reduce((sum, group) => sum + group.files.length, 0),
+        evaluations: bundle.evaluations.length,
+        runs: bundle.runs.length,
+        jobs: bundle.jobs.length,
+        executionFiles: bundle.executionFiles.reduce((sum, group) => sum + group.files.length, 0),
+        events: bundle.events.length,
+        activeId: bundle.activeId,
+    };
+}
+function runtimeJobForProjectStateFingerprint(job) {
+    const portable = sanitizeWorkbenchRuntimeJobForExchange(job);
+    const output = portable.output;
+    if (!output || typeof output !== "object" || Array.isArray(output)) {
+        return portable;
+    }
+    const { files: _files, fileSet: _fileSet, ...portableOutput } = output;
+    return {
+        ...portable,
+        output: portableOutput,
+    };
+}
+function canonicalFilesForProjectStateFingerprint(files) {
+    return sortByStableKey(files.map((file) => ({
+        path: file.path,
+        encoding: file.encoding,
+        executable: Boolean(file.executable),
+        content: file.content,
+    })), (file) => file.path);
+}
+function normalizeTextForProjectStateFingerprint(value) {
+    return value.replace(/\r\n/gu, "\n").replace(/\r/gu, "\n");
+}
+function normalizeProjectStateResources(resources) {
+    return {
+        cpu: resources.cpu ?? DEFAULT_EXECUTION_RESOURCES.cpu,
+        memoryGb: resources.memoryGb ?? DEFAULT_EXECUTION_RESOURCES.memoryGb,
+        diskGb: resources.diskGb ?? DEFAULT_EXECUTION_RESOURCES.diskGb,
+        timeoutMinutes: resources.timeoutMinutes ?? DEFAULT_EXECUTION_RESOURCES.timeoutMinutes,
+    };
+}
+function sortByStableKey(items, keyFor) {
+    return [...items].sort((left, right) => keyFor(left).localeCompare(keyFor(right)));
+}
+function canonicalizeProjectState(value) {
+    if (Array.isArray(value)) {
+        return value.map(canonicalizeProjectState);
+    }
+    if (!value || typeof value !== "object") {
+        return value;
+    }
+    const record = value;
+    return Object.fromEntries(Object.keys(record)
+        .sort()
+        .map((key) => [key, canonicalizeProjectState(record[key])]));
+}
 export const DEFAULT_ENVIRONMENT_VERSIONS = [
     {
         id: "envv_python_3_12",
@@ -153,7 +264,7 @@ export const DEFAULT_ENVIRONMENTS = [
     {
         id: "env_node",
         name: "Node",
-        description: "Node runtime for JavaScript and TypeScript subjects.",
+        description: "Node runtime for JavaScript and TypeScript candidates.",
         currentVersionId: "envv_node_22",
         builtIn: true,
         createdAt: "2026-04-23T00:00:00.000Z",
@@ -191,8 +302,7 @@ function splitAuthoredSourceYaml(sourceYaml) {
     }
     const entries = [
         [BENCHMARK_SPEC_FILE, parsed.benchmark],
-        ["subjects/current/subject.yaml", splitSubjectSourceRecord(parsed.subject)],
-        ["optimizers/current.yaml", splitOptimizerSourceRecord(parsed.optimizer)],
+        ["candidates/current/candidate.yaml", splitCandidateSourceRecord(parsed.candidate)],
     ];
     return entries.flatMap(([filePath, value]) => {
         if (!value || typeof value !== "object" || Array.isArray(value)) {
@@ -204,23 +314,20 @@ function splitAuthoredSourceYaml(sourceYaml) {
             }];
     });
 }
-function splitSubjectSourceRecord(value) {
+function splitCandidateSourceRecord(value) {
     const record = cloneYamlRecord(value);
     if (!record) {
         return value;
     }
     delete record.benchmark;
     delete record.path;
-    rewriteAdapterSources(record, "subjects");
+    stripCandidateRuntimeSelection(record);
+    rewriteAdapterSources(record, "candidates/current");
     return record;
 }
-function splitOptimizerSourceRecord(value) {
-    const record = cloneYamlRecord(value);
-    if (!record) {
-        return value;
-    }
-    rewriteAdapterSources(record, "optimizers");
-    return record;
+function stripCandidateRuntimeSelection(record) {
+    delete record.selectedRunId;
+    delete record.selectedRunName;
 }
 function cloneYamlRecord(value) {
     return value && typeof value === "object" && !Array.isArray(value)
@@ -242,11 +349,10 @@ function sourcePathRelativeTo(yamlDir, sourcePath) {
 }
 function isAuthoredSourceYamlPath(filePath) {
     return filePath === BENCHMARK_SPEC_FILE ||
-        isWorkbenchSubjectManifestPath(filePath) ||
-        /^optimizers\/[^/]+\.ya?ml$/iu.test(filePath);
+        isWorkbenchCandidateManifestPath(filePath);
 }
-function formatOptimizerSummary(spec) {
-    return spec.improve ? `adapter:${spec.improve.use}` : "optimizer not configured";
+function formatImproveSummary(spec) {
+    return spec.improve ? `adapter:${spec.improve.use}` : "improve not configured";
 }
 function formatEngineRunSummary(spec) {
     return `adapter:${spec.engineRun.use}`;
@@ -287,10 +393,10 @@ function protocolStepForExecution(execution, manifests) {
     if (execution.purpose !== "improve") {
         throw new Error(`Protocol execution step only supports improve executions, not ${execution.purpose}.`);
     }
-    const operation = "optimizer.improve";
+    const operation = "candidate.improve";
     const command = adapterProtocolCommandSpec(execution.adapter, operation, manifests);
     return {
-        kind: "optimizer",
+        kind: "improver",
         label: execution.purpose,
         operation,
         executor: command.executor,
@@ -387,35 +493,32 @@ export function materializeWorkbenchRunResult(args) {
     const completed = args.jobs.filter((job) => job.status === "succeeded");
     const failedJobCount = args.jobs.filter((job) => job.status === "failed").length;
     const completedJobCount = args.jobs.filter((job) => job.status === "succeeded").length;
-    const subjectRevisions = completed
+    const candidateRevisions = completed
         .filter((job) => workbenchExecutionPurpose(job) === "improve")
-        .map((job) => normalizeSubjectRevisionJobOutput(job.output))
+        .map((job) => normalizeCandidateRevisionJobOutput(job.output))
         .filter((output) => output !== null)
         .sort((left, right) => left.attemptIndex - right.attemptIndex);
     const evaluationJobs = args.jobs.filter((job) => workbenchExecutionPurpose(job) === "attempt");
-    const evaluationsBySubject = new Map();
+    const evaluationsByCandidate = new Map();
     for (const job of evaluationJobs) {
-        const subjectId = readJobString(job.output, "subjectId") ??
-            readJobString(job.input, "subjectId") ??
-            job.subjectId;
-        if (subjectId) {
-            evaluationsBySubject.set(subjectId, [
-                ...(evaluationsBySubject.get(subjectId) ?? []),
+        const candidateId = readJobString(job.output, "candidateId") ??
+            readJobString(job.input, "candidateId") ??
+            job.candidateId;
+        if (candidateId) {
+            evaluationsByCandidate.set(candidateId, [
+                ...(evaluationsByCandidate.get(candidateId) ?? []),
                 job,
             ]);
         }
     }
-    const subjects = [];
-    const subjectFiles = {};
+    const candidates = [];
+    const candidateFiles = {};
     const evaluations = [];
-    for (const subjectRevision of subjectRevisions) {
-        const subjectId = subjectRevision.subjectId;
-        const subjectJobs = evaluationsBySubject.get(subjectId) ?? [];
-        const succeededEvaluationJobs = subjectJobs.filter((job) => job.status === "succeeded");
-        const outputs = normalizeEvaluationSampleOutputs({
-            jobs: succeededEvaluationJobs,
-            allJobs: completed,
-        })
+    for (const candidateRevision of candidateRevisions) {
+        const candidateId = candidateRevision.candidateId;
+        const candidateJobs = evaluationsByCandidate.get(candidateId) ?? [];
+        const succeededEvaluationJobs = candidateJobs.filter((job) => job.status === "succeeded");
+        const outputs = normalizeEvaluationSampleOutputs(succeededEvaluationJobs)
             .sort((left, right) => compareSampleOutputs(left.output, right.output));
         const outputJobIds = new Set(outputs.flatMap(({ jobs }) => jobs.map((job) => job.id)));
         const completedSampleKeys = new Set(outputs
@@ -425,39 +528,38 @@ export function materializeWorkbenchRunResult(args) {
         ])
             .filter((key) => key !== null));
         const errorSampleJobs = [
-            ...subjectJobs.filter((job) => job.status === "failed"),
+            ...candidateJobs.filter((job) => job.status === "failed"),
             ...succeededEvaluationJobs.filter((job) => !outputJobIds.has(job.id)),
         ];
-        const errorSamples = errorEvaluationSamplesFromJobs(errorSampleJobs, subjectId, subjectRevision.attemptIndex, completedSampleKeys);
+        const errorSamples = errorEvaluationSamplesFromJobs(errorSampleJobs, candidateId, candidateRevision.attemptIndex, completedSampleKeys);
         const samples = [
             ...outputs.map(({ jobs, output }) => withJobUsage(output.sample, completed, jobs[0])),
             ...errorSamples,
         ].sort((left, right) => left.index - right.index || left.id.localeCompare(right.id));
-        const subjectName = normalizedSubjectDisplayName(args.spec.subject.name);
-        const evalRecord = createEvaluationRecord(subjectId, subjectName, samples);
+        const candidateName = normalizedCandidateDisplayName(args.spec.candidate.name);
+        const evalRecord = createEvaluationRecord(candidateId, candidateName, samples);
         const usage = mergeUsageSummaries([
-            subjectRevision.usage,
+            candidateRevision.usage,
             ...samples.map((sample) => sample.usage),
         ]);
-        const metrics = evaluationMeanMetrics(evalRecord);
-        const attemptIndex = subjectRevision.attemptIndex;
+        const attemptIndex = candidateRevision.attemptIndex;
         const evaluationTraces = [
             ...outputs.flatMap(({ output }) => output.traces),
             ...errorSampleJobs.flatMap(jobTracePaths),
         ].sort();
-        const baseId = subjectRevision.baseId && subjectRevision.baseId !== subjectId
-            ? subjectRevision.baseId
+        const baseId = candidateRevision.baseId && candidateRevision.baseId !== candidateId
+            ? candidateRevision.baseId
             : null;
-        const sourceMeta = subjectSourceMetadata(args.subjectSourceFiles);
+        const sourceMeta = candidateSourceMetadata(args.candidateSourceFiles);
         const benchmarkMeta = benchmarkSourceMetadata(args.benchmarkSourceFiles);
         const meta = {
             attemptIndex,
             sampleCount: evalRecord.sampleCount,
-            optimizer: formatOptimizerSummary(args.spec),
+            improver: formatImproveSummary(args.spec),
             engineRun: formatEngineRunSummary(args.spec),
             strategy: "greedy",
             traces: {
-                improve: subjectRevision.traces,
+                improve: candidateRevision.traces,
                 evaluations: evaluationTraces,
             },
         };
@@ -467,52 +569,124 @@ export function materializeWorkbenchRunResult(args) {
         if (benchmarkMeta) {
             meta.benchmark = benchmarkMeta;
         }
-        const record = {
-            id: subjectId,
-            ...(subjectName ? { name: subjectName } : {}),
-            ordinal: args.existingSubjectCount + subjects.length,
-            benchmarkFingerprint: args.benchmarkFingerprint,
-            subjectFingerprint: args.subjectFingerprint ?? materializedSubjectFingerprint(args.spec, subjectRevision.files),
-            createdAt: args.startedAt,
-            ...(baseId ? { baseId } : {}),
-            referenceIds: [],
-            status: evalRecord.completedSampleCount > 0 ? "evaluated" : "eval_error",
-            fileChanges: subjectRevision.fileChanges,
-            ...(metrics ? { metrics } : {}),
-            ...(usage ? { usage } : {}),
-            eval: evalRecord,
-            ...(subjectRevision.prompt ? { prompt: subjectRevision.prompt } : {}),
-            meta,
-        };
-        subjects.push(record);
+        const record = preserveExistingCandidateIdentity({
+            candidate: {
+                id: candidateId,
+                ...(candidateName ? { name: candidateName } : {}),
+                version: args.existingCandidateCount + candidates.length + 1,
+                ordinal: args.existingCandidateCount + candidates.length + 1,
+                benchmarkFingerprint: args.benchmarkFingerprint,
+                candidateFingerprint: args.candidateFingerprint ?? materializedCandidateFingerprint(args.spec, candidateRevision.files),
+                createdAt: args.startedAt,
+                ...(baseId ? { baseId } : {}),
+                referenceIds: [],
+                status: evalRecord.completedSampleCount > 0 ? "evaluated" : "eval_error",
+                fileChanges: candidateRevision.fileChanges,
+                ...(usage ? { usage } : {}),
+                eval: evalRecord,
+                ...(candidateRevision.prompt ? { prompt: candidateRevision.prompt } : {}),
+                meta,
+            },
+            previousCandidate: args.previousCandidate ?? null,
+        });
+        candidates.push(record);
         evaluations.push(createEvaluationScorecard({
             runId: args.runId,
             benchmarkFingerprint: args.benchmarkFingerprint,
             createdAt: args.startedAt,
-            subject: record,
+            candidate: record,
+            candidateRunId: args.spec.candidate.selectedRunId,
+            candidateRunName: args.spec.candidate.selectedRunName,
             evaluation: evalRecord,
+            ...(args.selection
+                ? {
+                    selection: {
+                        metric: args.selection.metric,
+                        caseIds: args.selection.caseIds,
+                        ...(args.selection.label ? { label: args.selection.label } : {}),
+                    },
+                }
+                : {}),
         }));
-        subjectFiles[subjectId] = materializedSubjectFiles({
-            subjectRevisionFiles: subjectRevision.files,
+        candidateFiles[candidateId] = materializedCandidateFiles({
+            candidateRevisionFiles: candidateRevision.files,
         });
     }
-    const selectedSubject = selectSubject({
-        subjects,
-        previousSubject: args.previousSubject ?? null,
+    const selectedCandidate = selectCandidate({
+        candidates,
+        previousCandidate: args.previousCandidate ?? null,
+        selection: args.selection,
     });
     return {
-        subjects,
-        subjectFiles,
+        candidates,
+        candidateFiles,
         evaluations,
-        activeSubjectId: selectedSubject?.id ?? args.previousSubject?.id ?? null,
-        selectedSubject,
+        activeCandidateId: selectedCandidate?.id ?? args.previousCandidate?.id ?? null,
+        selectedCandidate,
         completedJobCount,
         failedJobCount,
     };
 }
-function subjectSourceMetadata(files) {
+function preserveExistingCandidateIdentity(args) {
+    const previous = args.previousCandidate;
+    if (!previous || previous.id !== args.candidate.id) {
+        return args.candidate;
+    }
+    const baseId = args.candidate.baseId ?? previous.baseId;
+    const prompt = args.candidate.prompt ?? previous.prompt;
+    const meta = mergeExistingCandidateMeta(previous.meta, args.candidate.meta);
+    return {
+        ...args.candidate,
+        version: previous.version,
+        ordinal: previous.version,
+        createdAt: previous.createdAt,
+        ...(args.candidate.name ?? previous.name
+            ? { name: (args.candidate.name ?? previous.name) }
+            : {}),
+        ...(baseId ? { baseId } : {}),
+        referenceIds: previous.referenceIds.length > 0
+            ? [...previous.referenceIds]
+            : args.candidate.referenceIds,
+        fileChanges: args.candidate.fileChanges.length > 0
+            ? args.candidate.fileChanges
+            : [...previous.fileChanges],
+        ...(prompt ? { prompt } : {}),
+        ...(meta ? { meta } : {}),
+    };
+}
+function mergeExistingCandidateMeta(previousMeta, candidateMeta) {
+    const previous = jsonRecord(previousMeta);
+    const candidate = jsonRecord(candidateMeta);
+    if (!previous) {
+        return candidateMeta;
+    }
+    if (!candidate) {
+        return previousMeta;
+    }
+    const previousTraces = jsonRecord(previous.traces);
+    const candidateTraces = jsonRecord(candidate.traces);
+    if (!previousTraces || !candidateTraces) {
+        return { ...previous, ...candidate };
+    }
+    const traces = {
+        ...previousTraces,
+        ...candidateTraces,
+    };
+    const candidateImproveTraces = Array.isArray(candidateTraces.improve)
+        ? candidateTraces.improve
+        : [];
+    if (candidateImproveTraces.length === 0 && previousTraces.improve !== undefined) {
+        traces.improve = previousTraces.improve;
+    }
+    return {
+        ...previous,
+        ...candidate,
+        traces,
+    };
+}
+function candidateSourceMetadata(files) {
     const sourceFiles = (files ?? [])
-        .filter((file) => /^subjects\/[^/]+\/subject\.ya?ml$/iu.test(file.path))
+        .filter((file) => /^candidates\/[^/]+\/candidate\.ya?ml$/iu.test(file.path))
         .sort((left, right) => left.path.localeCompare(right.path))
         .map((file) => ({
         path: file.path,
@@ -536,14 +710,13 @@ function benchmarkSourceMetadata(files) {
     }));
     return sourceFiles.length > 0 ? { files: sourceFiles } : null;
 }
-function materializedSubjectFingerprint(spec, files) {
+function materializedCandidateFingerprint(spec, files) {
     const hash = createHash("sha256");
-    hash.update("workbench-subject-v1\0");
-    hash.update("materialized\0runner\0");
-    hash.update(JSON.stringify(spec.run));
+    hash.update("workbench-candidate-v1\0");
+    hash.update("materialized\0");
     hash.update("prepare");
-    hash.update(JSON.stringify(spec.subject.prepare ?? null));
-    for (const file of filterSubjectSourceFiles(files).slice().sort((left, right) => left.path.localeCompare(right.path))) {
+    hash.update(JSON.stringify(spec.candidate.prepare ?? null));
+    for (const file of filterCandidateSourceFiles(files).slice().sort((left, right) => left.path.localeCompare(right.path))) {
         hash.update("\0file\0");
         hash.update(file.path);
         hash.update("\0");
@@ -555,22 +728,28 @@ function materializedSubjectFingerprint(spec, files) {
     }
     return hash.digest("hex");
 }
-function materializedSubjectFiles(args) {
+function materializedCandidateFiles(args) {
     const byPath = new Map();
-    for (const file of filterSubjectSourceFiles(args.subjectRevisionFiles)) {
+    for (const file of filterCandidateSourceFiles(args.candidateRevisionFiles)) {
         byPath.set(file.path, { ...file });
     }
     return [...byPath.values()].sort((left, right) => left.path.localeCompare(right.path));
 }
 function createEvaluationScorecard(args) {
     const evaluation = args.evaluation;
+    const selectionScore = args.selection
+        ? readEvaluationSelectionStats(evaluation, args.selection.metric, args.selection.caseIds)
+        : null;
     return {
-        id: evaluationScorecardId(args.runId, args.subject.id),
+        id: evaluationScorecardId(args.runId, args.candidate.id),
         runId: args.runId,
         benchmarkFingerprint: args.benchmarkFingerprint,
-        subjectFingerprint: args.subject.subjectFingerprint,
-        subjectId: args.subject.id,
-        ...(args.subject.name ? { subjectName: args.subject.name } : {}),
+        candidateFingerprint: args.candidate.candidateFingerprint,
+        candidateId: args.candidate.id,
+        ...(args.candidate.name ? { candidateName: args.candidate.name } : {}),
+        candidateVersion: args.candidate.version,
+        ...(args.candidateRunId ? { candidateRunId: args.candidateRunId } : {}),
+        ...(args.candidateRunName ? { candidateRunName: args.candidateRunName } : {}),
         createdAt: args.createdAt,
         updatedAt: evaluation.finishedAt ?? args.createdAt,
         status: evaluation.status,
@@ -578,16 +757,19 @@ function createEvaluationScorecard(args) {
         completedSampleCount: evaluation.completedSampleCount,
         errorSampleCount: evaluation.errorSampleCount,
         ...(evaluation.metrics ? { metrics: evaluation.metrics } : {}),
+        ...(args.selection ? { selectionMetric: args.selection.metric } : {}),
+        ...(args.selection ? { selectionLabel: args.selection.label ?? `${args.selection.metric} on selected cases` } : {}),
+        ...(selectionScore ? { selectionScore } : {}),
         ...(evaluation.durationMs ? { durationMs: evaluation.durationMs } : {}),
         ...(evaluation.usage ? { usage: evaluation.usage } : {}),
         ...(evaluation.error ? { error: evaluation.error } : {}),
         evaluation,
     };
 }
-export function evaluationScorecardId(runId, subjectId) {
+export function evaluationScorecardId(runId, candidateId) {
     const runPart = runId.replace(/[^a-z0-9]+/giu, "_").replace(/^_+|_+$/gu, "").slice(-24);
-    const subjectPart = subjectId.replace(/[^a-z0-9]+/giu, "_").replace(/^_+|_+$/gu, "").slice(-24);
-    return `eval_${runPart}_${subjectPart}`;
+    const candidatePart = candidateId.replace(/[^a-z0-9]+/giu, "_").replace(/^_+|_+$/gu, "").slice(-24);
+    return `eval_${runPart}_${candidatePart}`;
 }
 export function selectExecutionOutputFilesForInspection(args) {
     return args.files.filter((file) => !isWorkbenchInternalOutputPath(file.path));
@@ -602,56 +784,145 @@ export function isWorkbenchInternalOutputPath(filePath) {
         normalized === "exit_code" ||
         /^[a-z_-]+_(stdout\.log|stderr\.log|exit_code)$/u.test(normalized));
 }
-export function createSubjectRevisionTraceInputFiles(args) {
+export function createOptimizerTraceInputFiles(args) {
     const files = [];
-    const manifestJobs = [];
+    const executions = [];
     const jobs = args.jobs
-        .filter((job) => job.runId === args.runId && isTerminalExecutionJob(job))
+        .filter(isOptimizerTraceInputJob)
         .sort(compareTraceInputJobs);
-    for (const job of jobs) {
+    jobs.forEach((job, index) => {
+        const sequence = String(index + 1).padStart(6, "0");
+        const executionPath = `executions/${sequence}`;
+        const operation = "engine.run";
         const jobFiles = completedJobOutputFiles(job);
-        const rawTraceFiles = jobFiles.filter((file) => normalizeRelativePath(file.path).startsWith(".workbench/traces/"));
-        files.push(...rawTraceFiles.map((file) => ({ ...file })));
-        const events = args.events
-            .filter((event) => event.runId === args.runId && event.jobId === job.id)
-            .sort((left, right) => left.at.localeCompare(right.at));
-        const eventPath = `events/${job.id}.ndjson`;
-        if (events.length > 0) {
-            files.push(textSurfaceFile(eventPath, `${events.map((event) => JSON.stringify(event)).join("\n")}\n`));
-        }
-        const summaryPath = `jobs/${job.id}.json`;
-        const summary = subjectRevisionTraceJobSummary(job, {
-            eventPath: events.length > 0 ? eventPath : null,
-            rawTracePaths: rawTraceFiles.map((file) => file.path).sort(),
-        });
-        files.push(textSurfaceFile(summaryPath, `${JSON.stringify(summary, null, 2)}\n`));
-        manifestJobs.push({
-            ...summary,
-            summary_path: summaryPath,
+        const requestFile = traceInputRequestFile(jobFiles, operation);
+        const resultFile = traceInputResultFile(jobFiles, operation);
+        const requestPath = `${executionPath}/request.json`;
+        const resultPath = `${executionPath}/result.json`;
+        const filesPath = `${executionPath}/files`;
+        files.push(textSurfaceFile(requestPath, requestFile?.content ?? `${JSON.stringify(traceInputRequestFallback(job, operation), null, 2)}\n`));
+        files.push(textSurfaceFile(resultPath, resultFile?.content ?? `${JSON.stringify(traceInputResultFallback(job, operation), null, 2)}\n`));
+        files.push(...jobFiles.map((file) => ({
+            ...file,
+            path: normalizeRelativePath(`${filesPath}/${file.path}`),
+        })));
+        executions.push({
+            path: executionPath,
+            operation,
+            status: job.status,
+            candidateId: job.candidateId ?? readJobString(job.input, "candidateId") ?? null,
+            runId: job.runId,
+            jobId: job.id,
+            attemptIndex: readOptionalJobNumber(job.input, "attemptIndex") ?? null,
+            sampleIndex: readOptionalJobNumber(job.input, "sampleIndex") ?? null,
+            caseId: readJobString(job.input, "caseId") ?? null,
+            requestPath,
+            resultPath,
+            filesPath,
         });
-    }
-    files.push(textSurfaceFile("manifest.json", `${JSON.stringify({
-        run_id: args.runId,
-        jobs: manifestJobs,
+    });
+    files.push(textSurfaceFile("index.json", `${JSON.stringify({
+        schema: "workbench.optimizer-traces.v1",
+        executions,
     }, null, 2)}\n`));
     return dedupeSurfaceFiles(files);
 }
-export function createSubjectEvaluationTraceInputFiles(args) {
-    const subject = args.subject;
-    if (!subject?.eval && !subject?.metrics) {
+export function workbenchImproveOptimizeSelector(spec) {
+    return cloneWorkbenchCaseSelector(spec.candidate.improve?.optimizeOn ?? { all: true });
+}
+export function workbenchImproveSelectionPolicy(spec) {
+    const optimizeOn = workbenchImproveOptimizeSelector(spec);
+    const selectBy = spec.candidate.improve?.selectBy;
+    return {
+        metric: selectBy?.metric ?? "score",
+        selector: cloneWorkbenchCaseSelector(selectBy?.cases ?? optimizeOn),
+    };
+}
+export function workbenchEngineCaseIdsForSelector(engineCases, selector) {
+    return engineCases
+        .filter((engineCase) => workbenchEngineCaseMatchesSelector(engineCase, selector))
+        .map((engineCase) => engineCase.id);
+}
+export function workbenchEngineCaseIdsForImproveEvaluation(args) {
+    const optimizeIds = new Set(workbenchEngineCaseIdsForSelector(args.engineCases, workbenchImproveOptimizeSelector(args.spec)));
+    const selectionIds = new Set(workbenchEngineCaseIdsForSelector(args.engineCases, workbenchImproveSelectionPolicy(args.spec).selector));
+    return args.engineCases
+        .map((engineCase) => engineCase.id)
+        .filter((caseId) => optimizeIds.has(caseId) || selectionIds.has(caseId));
+}
+export function filterOptimizerTraceJobsForCaseIds(jobs, caseIds) {
+    const allowed = new Set(caseIds);
+    if (allowed.size === 0) {
         return [];
     }
-    const filePath = normalizeRelativePath(args.path ?? `base-subject/${subject.id}/evaluation.json`);
-    const payload = {
-        kind: "subject_evaluation",
-        subjectId: subject.id,
-        status: subject.status,
-        metrics: subject.metrics ?? null,
-        fileChanges: subject.fileChanges,
-        eval: subject.eval ?? null,
-        prompt: subject.prompt ?? null,
-    };
-    return [textSurfaceFile(filePath, `${JSON.stringify(payload, null, 2)}\n`)];
+    return jobs.filter((job) => {
+        if (workbenchExecutionPurpose(job) !== "attempt") {
+            return false;
+        }
+        const caseId = readJobString(job.input, "caseId");
+        return caseId !== null && allowed.has(caseId);
+    });
+}
+export function formatWorkbenchCaseSelector(selector) {
+    return workbenchCaseSelectorUsesAllCases(selector)
+        ? "all cases"
+        : `split=${selector.split}`;
+}
+export function formatWorkbenchSelectionPolicy(policy) {
+    return `${policy.metric} on ${formatWorkbenchCaseSelector(policy.selector)}`;
+}
+export function workbenchCaseSelectorUsesAllCases(selector) {
+    return !selector.split;
+}
+function workbenchEngineCaseMatchesSelector(engineCase, selector) {
+    if (workbenchCaseSelectorUsesAllCases(selector)) {
+        return true;
+    }
+    return engineCase.case.split === selector.split;
+}
+function cloneWorkbenchCaseSelector(selector) {
+    return selector.split ? { split: selector.split } : { all: true };
+}
+export function evaluationMeanMetrics(evaluation) {
+    const entries = Object.entries(evaluation?.metrics ?? {})
+        .filter((entry) => Number.isFinite(entry[1].mean));
+    return entries.length > 0
+        ? Object.fromEntries(entries.map(([key, stats]) => [key, stats.mean]))
+        : undefined;
+}
+export function candidateRecordWithoutDerivedFields(candidate) {
+    const { metrics: _metrics, candidateRunId: _candidateRunId, candidateRunName: _candidateRunName, ...record } = candidate;
+    return record;
+}
+export function candidateSummaryFromRecord(candidate) {
+    const { eval: _eval, prompt: _prompt, meta: _meta, ...summary } = candidateRecordWithoutDerivedFields(candidate);
+    return summary;
+}
+export function workbenchRunExecutionFingerprint(args) {
+    const hash = createHash("sha256");
+    hash.update("workbench-run-execution-v1\0");
+    hash.update(args.specVersionId ?? "");
+    hash.update("\0");
+    hash.update(args.environmentVersionId ?? "");
+    hash.update("\0");
+    hash.update(args.sourceYaml ?? "");
+    for (const file of (args.adapterFiles ?? []).slice().sort((left, right) => left.path.localeCompare(right.path))) {
+        hash.update("\0file\0");
+        hash.update(file.path);
+        hash.update("\0");
+        hash.update(file.kind);
+        hash.update("\0");
+        hash.update(file.encoding);
+        hash.update("\0");
+        hash.update(file.executable ? "1" : "0");
+        hash.update("\0");
+        hash.update(file.content);
+    }
+    return hash.digest("hex");
+}
+function isOptimizerTraceInputJob(job) {
+    return isTerminalExecutionJob(job) &&
+        workbenchExecutionPurpose(job) === "attempt";
 }
 function isTerminalExecutionJob(job) {
     return job.kind === "execute" && (job.status === "succeeded" ||
@@ -662,20 +933,10 @@ function compareTraceInputJobs(left, right) {
     const leftAttempt = readOptionalJobNumber(left.input, "attemptIndex") ?? -1;
     const rightAttempt = readOptionalJobNumber(right.input, "attemptIndex") ?? -1;
     return leftAttempt - rightAttempt ||
-        purposeSortKey(workbenchExecutionPurpose(left)) - purposeSortKey(workbenchExecutionPurpose(right)) ||
         (readOptionalJobNumber(left.input, "sampleIndex") ?? -1) - (readOptionalJobNumber(right.input, "sampleIndex") ?? -1) ||
         (readJobString(left.input, "caseId") ?? "").localeCompare(readJobString(right.input, "caseId") ?? "") ||
         left.id.localeCompare(right.id);
 }
-function purposeSortKey(purpose) {
-    if (purpose === "improve") {
-        return 0;
-    }
-    if (purpose === "attempt") {
-        return 1;
-    }
-    return 3;
-}
 function completedJobOutputFiles(job) {
     const output = jsonRecord(job.output);
     if (!Array.isArray(output.files)) {
@@ -689,35 +950,70 @@ function completedJobOutputFiles(job) {
     }
     return files;
 }
-function subjectRevisionTraceJobSummary(job, paths) {
-    const output = jsonRecord(job.output);
+function traceInputRequestFile(files, operation) {
+    return files.find((file) => {
+        const normalized = normalizeRelativePath(file.path);
+        return normalized.startsWith(".workbench/traces/") &&
+            normalized.endsWith("/request.json") &&
+            file.encoding === "utf8" &&
+            traceJsonOperation(file) === operation;
+    }) ?? null;
+}
+function traceInputResultFile(files, operation) {
+    return files.find((file) => {
+        const normalized = normalizeRelativePath(file.path);
+        return normalized.startsWith(".workbench/traces/") &&
+            normalized.endsWith("/result.json") &&
+            file.encoding === "utf8" &&
+            traceJsonOperation(file) === operation;
+    }) ?? null;
+}
+function traceJsonOperation(file) {
+    try {
+        const parsed = JSON.parse(file.content);
+        return typeof parsed?.operation === "string" ? parsed.operation : null;
+    }
+    catch {
+        return null;
+    }
+}
+function traceInputRequestFallback(job, operation) {
+    const execution = jsonRecord(jsonRecord(job.input).execution);
     return {
-        job_id: job.id,
-        purpose: workbenchExecutionPurpose(job) ?? "unknown",
-        status: job.status,
-        subject_id: job.subjectId ?? readJobString(job.input, "subjectId"),
-        attempt_index: readOptionalJobNumber(job.input, "attemptIndex"),
-        sample_index: readOptionalJobNumber(job.input, "sampleIndex"),
-        case_id: readJobString(job.input, "caseId"),
-        created_at: job.createdAt,
-        ...(job.startedAt ? { started_at: job.startedAt } : {}),
-        ...(job.finishedAt ? { finished_at: job.finishedAt } : {}),
-        ...(job.error ? { error: job.error } : {}),
-        traces: jobTracePaths(job),
-        event_path: paths.eventPath,
-        raw_trace_paths: [...paths.rawTracePaths],
-        output: summarizeJobOutputForTrace(output),
+        protocol: "workbench.adapter.v3",
+        id: typeof execution.id === "string" ? execution.id : job.id,
+        jobId: job.id,
+        operation,
+        invocation: jsonRecord(execution.adapter),
+        context: {
+            candidate: {
+                id: job.candidateId ?? readJobString(job.input, "candidateId") ?? null,
+            },
+            attempt: {
+                attemptIndex: readOptionalJobNumber(job.input, "attemptIndex") ?? null,
+                sampleIndex: readOptionalJobNumber(job.input, "sampleIndex") ?? null,
+                caseId: readJobString(job.input, "caseId") ?? null,
+            },
+        },
     };
 }
-function summarizeJobOutputForTrace(output) {
-    const { files: _files, fileSet: _fileSet, subjectPatch, ...rest } = output;
-    const patch = jsonRecord(subjectPatch);
-    const { files: _patchFiles, ...patchSummary } = patch;
+function traceInputResultFallback(job, operation) {
+    const output = jsonRecord(job.output);
+    const ok = job.status === "succeeded" && output.ok !== false;
+    const value = operation === "candidate.improve"
+        ? jsonRecord(output.candidatePatch)
+        : operation === "engine.run"
+            ? jsonRecord(output.result)
+            : {};
     return {
-        ...rest,
-        ...(Object.keys(patch).length > 0
-            ? { subjectPatch: patchSummary }
-            : {}),
+        protocol: "workbench.adapter-result.v1",
+        operation,
+        ok,
+        ...(Object.keys(value).length > 0 ? { value: value } : {}),
+        ...(typeof output.summary === "string" ? { summary: output.summary } : {}),
+        ...(output.feedback !== undefined ? { feedback: output.feedback } : {}),
+        ...(output.usage !== undefined ? { usage: output.usage } : {}),
+        ...(!ok ? { error: job.error ?? "Execution did not complete successfully." } : {}),
     };
 }
 function textSurfaceFile(path, content) {
@@ -744,7 +1040,7 @@ export function buildWorkbenchProjectSourceFiles(input) {
         ...(input.specFiles
             ? input.specFiles.map((file) => ({ ...file }))
             : [textSurfaceFile("benchmark.yaml", input.specSource ?? "")]),
-        ...prefixProjectSourceFiles(input.subjectFiles, input.subjectFilesPath),
+        ...prefixProjectSourceFiles(input.candidateFiles, input.candidateFilesPath),
         ...prefixProjectSourceFiles(input.engineResolveFiles, input.engineResolveFilesPath),
         ...(input.adapterFiles ?? []).map((file) => ({ ...file })),
         ...(input.dockerfiles ?? []).map((file) => ({ ...file })),
@@ -772,18 +1068,18 @@ function prefixProjectSourceFiles(files, rootPath) {
         };
     });
 }
-export function isSubjectSourceFilePath(filePath) {
+export function isCandidateSourceFilePath(filePath) {
     const normalized = normalizeRelativePath(filePath);
     return (normalized !== ".workbench" &&
         !normalized.startsWith(".workbench/") &&
         normalized !== "workbench-result.json");
 }
-export function filterSubjectSourceFiles(files) {
+export function filterCandidateSourceFiles(files) {
     return files
-        .filter((file) => isSubjectSourceFilePath(file.path))
+        .filter((file) => isCandidateSourceFilePath(file.path))
         .map((file) => ({ ...file }));
 }
-export function buildSubjectLineage(args) {
+export function buildCandidateLineage(args) {
     const orderedSummaries = args.summaries.slice().sort((left, right) => {
         const createdAt = left.createdAt.localeCompare(right.createdAt);
         return createdAt !== 0 ? createdAt : left.id.localeCompare(right.id);
@@ -856,7 +1152,7 @@ function globPatternToRegExp(pattern) {
 function escapeRegExp(value) {
     return value.replace(/[\\^$.*+?()[\]{}|]/gu, "\\$&");
 }
-export function summarizeSubjectFiles(files, changedPaths = files.map((file) => file.path)) {
+export function summarizeCandidateFiles(files, changedPaths = files.map((file) => file.path)) {
     const changed = new Set(changedPaths);
     return [...files]
         .sort((left, right) => left.path.localeCompare(right.path))
@@ -875,7 +1171,7 @@ export function summarizeSubjectFiles(files, changedPaths = files.map((file) =>
         };
     });
 }
-export function createSubjectFilePreview(args) {
+export function createCandidateFilePreview(args) {
     if (args.view === "diff") {
         throw new Error("Diff previews require explicit before and after file content.");
     }
@@ -901,14 +1197,14 @@ export function createSubjectFilePreview(args) {
 export function createCaseReview(args) {
     const preferredSampleIndex = uniqueExecutionSampleIndex(args.executions ?? []);
     const sampleMatchesCase = (sample) => (sample.cases ?? []).some((entry) => entry.id === args.caseId);
-    const samples = args.subject.eval?.samples ?? [];
+    const samples = args.candidate.eval?.samples ?? [];
     const sampleResult = samples.find((sample) => typeof preferredSampleIndex === "number" &&
         sample.index === preferredSampleIndex &&
         sampleMatchesCase(sample)) ?? samples.find(sampleMatchesCase);
     const caseResult = sampleResult?.cases?.find((entry) => entry.id === args.caseId);
     if (!sampleResult && (args.executions?.length ?? 0) > 0) {
         return {
-            subjectId: args.subject.id,
+            candidateId: args.candidate.id,
             caseId: args.caseId,
             caseLabel: args.caseId,
             ...(typeof preferredSampleIndex === "number"
@@ -920,13 +1216,13 @@ export function createCaseReview(args) {
         };
     }
     if (!sampleResult) {
-        throw new Error(`Case ${args.caseId} was not found on subject ${args.subject.id}.`);
+        throw new Error(`Case ${args.caseId} was not found on candidate ${args.candidate.id}.`);
     }
     const durationMs = typeof caseResult?.durationMs === "number"
         ? caseResult.durationMs
         : undefined;
     return {
-        subjectId: args.subject.id,
+        candidateId: args.candidate.id,
         caseId: caseResult?.id ?? args.caseId,
         caseLabel: caseResult?.label ?? args.caseId,
         sampleId: sampleResult.id,
@@ -965,37 +1261,45 @@ function parseAuthoredWorkbenchSourceSpec(source) {
     }
     const resolved = resolveWorkbenchResolvedSourceYamlInternal(source);
     return {
-        version: 3,
+        version: 4,
         benchmark: {
             name: resolved.benchmark.name,
             description: resolved.benchmark.description,
             engine: authoredAdapterSpecFromInvocation(resolved.engine),
         },
-        subject: {
-            name: resolved.subject.name,
-            description: resolved.subject.description,
-            files: { path: resolved.subject.files.path },
-            ...(resolved.subject.prepare ? { prepare: { ...resolved.subject.prepare } } : {}),
-            run: runSpecFromInvocation(resolved.run),
-        },
-        ...(resolved.optimizer
-            ? {
-                optimizer: {
-                    name: resolved.optimizer.name,
-                    ...(resolved.optimizer.description ? { description: resolved.optimizer.description } : {}),
-                    edits: [...resolved.optimizer.edits],
-                    improve: improveSpecFromInvocation(resolved.improve),
+        candidate: {
+            name: resolved.candidate.name,
+            description: resolved.candidate.description,
+            files: { path: resolved.candidate.files.path },
+            ...(resolved.candidate.prepare ? { prepare: { ...resolved.candidate.prepare } } : {}),
+            defaultRun: resolved.candidate.defaultRun,
+            runs: Object.fromEntries(Object.entries(resolved.candidate.runs).map(([runId, run]) => [
+                runId,
+                {
+                    name: run.name,
+                    ...authoredAdapterSpecFromInvocation(run),
                 },
-            }
-            : {}),
+            ])),
+            ...(resolved.candidate.improve
+                ? {
+                    improve: {
+                        edits: [...resolved.candidate.improve.edits],
+                        ...(resolved.candidate.improve.optimizeOn
+                            ? { optimizeOn: resolved.candidate.improve.optimizeOn }
+                            : {}),
+                        ...(resolved.candidate.improve.selectBy
+                            ? { selectBy: resolved.candidate.improve.selectBy }
+                            : {}),
+                        ...improveSpecFromInvocation(resolved.improve),
+                    },
+                }
+                : {}),
+        },
     };
 }
 function improveSpecFromInvocation(invocation) {
     return authoredAdapterSpecFromInvocation(invocation);
 }
-function runSpecFromInvocation(invocation) {
-    return authoredAdapterSpecFromInvocation(invocation);
-}
 function authoredAdapterSpecFromInvocation(invocation) {
     const config = jsonRecord(invocation.with);
     return {
@@ -1048,9 +1352,9 @@ export function createWorkbenchRunWorkload(args) {
     if (!purpose) {
         throw new Error(`Unsupported runtime job kind: ${args.job.kind}`);
     }
-    const subjectId = readJobString(args.job.input, "subjectId") ?? args.job.subjectId;
-    if (!subjectId) {
-        throw new Error(`${purpose} execution job is missing subjectId.`);
+    const candidateId = readJobString(args.job.input, "candidateId") ?? args.job.candidateId;
+    if (!candidateId) {
+        throw new Error(`${purpose} execution job is missing candidateId.`);
     }
     const attemptIndex = readRequiredJobNumber(args.job.input, "attemptIndex", `${purpose} execution job`);
     const sampleIndex = purpose === "improve"
@@ -1066,7 +1370,7 @@ export function createWorkbenchRunWorkload(args) {
         ? engineCaseFilesForRuntimeInput({ spec: args.spec, engineCase })
         : [];
     const engineCaseSpec = engineCase?.case;
-    const initial = createInitialSubjectFiles({
+    const initial = createInitialCandidateFiles({
         baseFiles: args.baseFiles,
         spec: args.spec,
         attemptIndex,
@@ -1074,10 +1378,10 @@ export function createWorkbenchRunWorkload(args) {
     return {
         job: args.job,
         spec: args.spec,
-        subjectId,
+        candidateId,
         attemptIndex,
         sampleIndex,
-        subjectFiles: initial.files,
+        candidateFiles: initial.files,
         caseId,
         engineResolveFiles: selectedEngineResolveFiles,
         traceFiles: (args.traceFiles ?? []).map((file) => ({ ...file })),
@@ -1088,22 +1392,22 @@ export function createWorkbenchRunWorkload(args) {
         baseId: readJobString(args.job.input, "baseId"),
     };
 }
-function createInitialSubjectFiles(args) {
-    const editablePaths = optimizerEdits(args.spec).map(normalizeRelativePath);
+function createInitialCandidateFiles(args) {
+    const editablePaths = improveEdits(args.spec).map(normalizeRelativePath);
     const editPath = editablePaths[0];
-    const subjectPaths = editPath ? [editPath] : [];
+    const candidatePaths = editPath ? [editPath] : [];
     const files = args.baseFiles.length > 0
         ? args.baseFiles.map((file) => ({ ...file }))
         : editPath
             ? normalizeSurfaceFiles([{ path: editPath, content: "" }])
             : [];
     const prompt = [
-        `Run the subject workload for benchmark: ${args.spec.benchmark.description}`,
-        `Attempt ${args.attemptIndex + 1} uses ${formatOptimizerSummary(args.spec)}; the improve adapter may edit the subject before Workbench scores it.`,
+        `Run the candidate workload for benchmark: ${args.spec.benchmark.description}`,
+        `Attempt ${args.attemptIndex + 1} uses ${formatImproveSummary(args.spec)}; the improve adapter may edit the candidate before Workbench scores it.`,
     ].join("\n");
     const byPath = new Map(files.map((file) => [file.path, file]));
     if (editPath &&
-        ![...byPath.keys()].some((filePath) => subjectPaths.includes(filePath))) {
+        ![...byPath.keys()].some((filePath) => candidatePaths.includes(filePath))) {
         byPath.set(editPath, {
             path: editPath,
             kind: "text",
@@ -1167,7 +1471,7 @@ export function workbenchExecutionExecutorForRuntimeInput(args) {
 }
 function adapterOperationForExecutionPurpose(purpose) {
     if (purpose === "improve") {
-        return "optimizer.improve";
+        return "candidate.improve";
     }
     if (purpose === "attempt") {
         return "engine.run";
@@ -1281,8 +1585,8 @@ function normalizeRuntimeControlInputs(value) {
     }
     const record = value;
     const inputs = {};
-    if (hasOwn(record, "subject")) {
-        inputs.subject = normalizeRuntimeControlFiles(record.subject, "inputs.subject");
+    if (hasOwn(record, "candidate")) {
+        inputs.candidate = normalizeRuntimeControlFiles(record.candidate, "inputs.candidate");
     }
     if (hasOwn(record, "case")) {
         inputs.case = normalizeRuntimeControlFiles(record.case, "inputs.case");
@@ -1326,8 +1630,8 @@ function normalizeRuntimeControlOperation(value, label) {
     const operation = record.operation;
     if (operation !== "engine.resolve" &&
         operation !== "engine.run" &&
-        operation !== "subject.run" &&
-        operation !== "optimizer.improve") {
+        operation !== "candidate.run" &&
+        operation !== "candidate.improve") {
         throw new Error(`Workbench runtime-control ${label}.operation is invalid.`);
     }
     const invocation = record.invocation;
@@ -1415,7 +1719,7 @@ export async function executeAdapterInCurrentRuntime(args, execution, startedAt,
     };
     try {
         if (execution.purpose === "improve") {
-            return await executeSubjectRevisionExecutionInCurrentRuntime(runtimeInput, execution, startedAt, capability, eventPublisher);
+            return await executeCandidateRevisionExecutionInCurrentRuntime(runtimeInput, execution, startedAt, capability, eventPublisher);
         }
         if (execution.purpose === "attempt") {
             return await executeAttemptExecutionInCurrentRuntime(runtimeInput, execution, startedAt, capability, eventPublisher);
@@ -1589,22 +1893,22 @@ function completedJobFromSandboxResult(fallbackJob, startedAt, result) {
     }
     return attachSandboxMetadataToJob(failWorkbenchRunJob(fallbackJob, result.startedAt || startedAt, result.error ?? `Sandbox execution ${result.status}.`, result.finishedAt), asRuntimeRecord(result.metadata).sandbox);
 }
-async function executeSubjectRevisionExecutionInCurrentRuntime(args, execution, startedAt, capability, eventPublisher) {
+async function executeCandidateRevisionExecutionInCurrentRuntime(args, execution, startedAt, capability, eventPublisher) {
     const { workload, result } = await runHostedProtocolExecutionResult(args, execution, startedAt, capability, eventPublisher);
     if (result.error || (result.exitCode ?? 0) !== 0) {
         return failWorkbenchRunJob(args.job, startedAt, result.error ?? `Adapter ${execution.adapter.use} exited with status ${result.exitCode}.`, result.finishedAt, result);
     }
     const finishedAt = result.finishedAt ?? new Date().toISOString();
-    const subjectPatch = createSubjectPatchFromResult(result, args.spec);
-    if (subjectPatch.fileChanges.length === 0) {
-        return failWorkbenchRunJob(args.job, startedAt, `${execution.adapter.use === "command" ? "Command improve adapter" : `Adapter ${execution.adapter.use}`} completed without changing subject files covered by optimizer edits.`, finishedAt, result);
-    }
-    const subjectRevisionFiles = applyWorkbenchSubjectPatch({
-        baseFiles: workload.subjectFiles,
-        patch: subjectPatch,
-        edits: requireOptimizerEdits(args.spec),
+    const candidatePatch = createCandidatePatchFromResult(result, args.spec);
+    if (candidatePatch.fileChanges.length === 0) {
+        return failWorkbenchRunJob(args.job, startedAt, `${execution.adapter.use === "command" ? "Command improve adapter" : `Adapter ${execution.adapter.use}`} completed without changing candidate files covered by improve edits.`, finishedAt, result);
+    }
+    const candidateRevisionFiles = applyWorkbenchCandidatePatch({
+        baseFiles: workload.candidateFiles,
+        patch: candidatePatch,
+        edits: requireImproveEdits(args.spec),
     });
-    const usage = assignUsageRole("optimizer", result.usage);
+    const usage = assignUsageRole("improver", result.usage);
     return {
         ...args.job,
         status: "succeeded",
@@ -1616,13 +1920,13 @@ async function executeSubjectRevisionExecutionInCurrentRuntime(args, execution,
             ok: true,
             executionId: execution.id,
             purpose: execution.purpose,
-            subjectId: workload.subjectId,
+            candidateId: workload.candidateId,
             attemptIndex: workload.attemptIndex,
             baseId: workload.baseId,
             prompt: workload.prompt,
-            subjectPatch,
-            fileChanges: subjectPatch.fileChanges,
-            files: subjectRevisionFiles,
+            candidatePatch,
+            fileChanges: candidatePatch.fileChanges,
+            files: candidateRevisionFiles,
             traces: traceFilePaths(result.files),
             ...(usage ? { usage } : {}),
             ...(result.summary !== undefined ? { summary: result.summary } : {}),
@@ -1655,13 +1959,14 @@ async function executeAttemptExecutionInCurrentRuntime(args, execution, startedA
     const finishedAt = workloadResult.finishedAt ?? new Date().toISOString();
     const usage = attemptUsageSummary(workloadResult.usage, engineResult.usage);
     const sample = evaluateSample({
-        subjectId: workload.subjectId,
+        candidateId: workload.candidateId,
         files: workloadResult.files,
         engineResolveFiles: workload.engineResolveFiles,
         spec: workload.spec,
         attemptIndex: workload.attemptIndex,
         sampleIndex: workload.sampleIndex,
         caseId: workload.caseId,
+        split: workload.engineCaseSpec?.split,
         startedAt,
         finishedAt,
         durationMs: workloadResult.durationMs,
@@ -1682,7 +1987,7 @@ async function executeAttemptExecutionInCurrentRuntime(args, execution, startedA
             ok: true,
             executionId: execution.id,
             purpose: execution.purpose,
-            subjectId: workload.subjectId,
+            candidateId: workload.candidateId,
             attemptIndex: workload.attemptIndex,
             sampleIndex: workload.sampleIndex,
             caseId: workload.caseId,
@@ -1725,7 +2030,7 @@ export async function executeRuntimeControlOperationSequenceInCurrentRuntime(arg
                 ? { adapterAuthEnv: adapterAuth.env }
                 : {}),
         }, workload, args.runtimeControlOperation.operations.map((operation, index) => runtimeControlStepForOperation(operation, index, args.adapterManifests)), startedAt, {
-            runSubjectPrepare: args.runtimeControlOperation.prepare ?? false,
+            runCandidatePrepare: args.runtimeControlOperation.prepare ?? false,
             workspaceFiles: args.runtimeControlOperation.inputs?.workspace ?? [],
             outputFiles: args.runtimeControlOperation.inputs?.output ?? [],
             collectWorkspace: args.runtimeControlOperation.collectWorkspace ?? false,
@@ -1823,7 +2128,7 @@ function createRuntimeControlSandboxInput(args, request) {
     const parentInput = asRuntimeRecord(args.job.input);
     const publicFiles = runtimeControlInputFiles(request.inputs, "case", parentWorkload.engineCase ? engineCasePublicFiles(parentWorkload.engineCase) : []);
     const privateFiles = runtimeControlInputFiles(request.inputs, "enginePrivate", parentWorkload.engineCase ? engineCasePrivateFiles(parentWorkload.engineCase) : []);
-    const subjectFiles = runtimeControlInputFiles(request.inputs, "subject", parentWorkload.subjectFiles);
+    const candidateFiles = runtimeControlInputFiles(request.inputs, "candidate", parentWorkload.candidateFiles);
     const traceFiles = runtimeControlInputFiles(request.inputs, "traces", parentWorkload.traceFiles);
     const adapter = request.operations[request.operations.length - 1]?.invocation;
     const childExecution = {
@@ -1866,7 +2171,7 @@ function createRuntimeControlSandboxInput(args, request) {
     const childArgs = {
         ...args,
         job: childJob,
-        baseFiles: subjectFiles,
+        baseFiles: candidateFiles,
         engineResolveFiles: [...publicFiles, ...privateFiles],
         engineCases: [engineCase],
         traceFiles,
@@ -1890,10 +2195,10 @@ function runtimeControlStepForOperation(operation, index, manifests = []) {
             ...(operation.invocation.auth !== undefined ? { auth: operation.invocation.auth } : {}),
         }, operation.operation, manifests).command;
     return {
-        kind: operation.operation === "subject.run"
-            ? "subject"
-            : operation.operation === "optimizer.improve"
-                ? "optimizer"
+        kind: operation.operation === "candidate.run"
+            ? "candidate"
+            : operation.operation === "candidate.improve"
+                ? "improver"
                 : "engine",
         label: operation.label ?? `${operation.operation.replace(".", "_")}_${index + 1}`,
         operation: operation.operation,
@@ -1960,8 +2265,8 @@ function isWorkbenchAdapterOperationResult(value) {
     return record.protocol === "workbench.adapter-result.v1" &&
         (record.operation === "engine.resolve" ||
             record.operation === "engine.run" ||
-            record.operation === "subject.run" ||
-            record.operation === "optimizer.improve");
+            record.operation === "candidate.run" ||
+            record.operation === "candidate.improve");
 }
 function cloneSurfaceFiles(files) {
     return files.map((file) => ({ ...file, path: normalizeRelativePath(file.path) }));
@@ -2040,9 +2345,11 @@ async function runHostedCommandExecutionSteps(args, workload, steps, startedAt,
             const stepTimeoutMs = environmentVersion
                 ? environmentVersionTimeoutMs(environmentVersion)
                 : 5 * 60 * 1000;
-            const shouldRunSubjectPrepare = options.runSubjectPrepare ?? steps.some((step) => step.executor === "sandbox");
-            if (shouldRunSubjectPrepare) {
-                await runSubjectPrepareCommand({
+            const shouldRunCandidatePrepare = options.runCandidatePrepare ??
+                (readWorkloadExecutionPurpose(workload) === "attempt" &&
+                    steps.some((step) => step.executor === "sandbox"));
+            if (shouldRunCandidatePrepare) {
+                await runCandidatePrepareCommand({
                     root: workspace.root,
                     workload,
                     execution,
@@ -2081,6 +2388,9 @@ async function runHostedCommandExecutionSteps(args, workload, steps, startedAt,
                     });
                     const operationResult = await readWorkbenchAdapterOperationResult(outputDir(workspace.root), step.operation);
                     assertWorkbenchAdapterOperationResultOk(operationResult, `Adapter ${step.adapter?.use ?? execution.adapter.use} ${step.operation}`);
+                    await writeSurfaceFiles(outputDir(workspace.root), [
+                        textSurfaceFile(`.workbench/traces/${workload.job.id}/${step.label}/result.json`, `${JSON.stringify(operationResult, null, 2)}\n`),
+                    ]);
                     operationResults.push(operationResult);
                     await publishCommandStepEvent(options.eventPublisher, {
                         step: step.label,
@@ -2132,19 +2442,19 @@ async function runHostedCommandExecutionSteps(args, workload, steps, startedAt,
         await workspace.cleanup();
     }
 }
-async function runSubjectPrepareCommand(args) {
-    const command = args.workload.spec.subject.prepare?.command;
+async function runCandidatePrepareCommand(args) {
+    const command = args.workload.spec.candidate.prepare?.command;
     if (!command) {
         return;
     }
-    const role = args.execution.purpose === "improve" ? "optimizer" : "runner";
+    const role = args.execution.purpose === "improve" ? "improver" : "runner";
     await publishCommandStepEvent(args.eventPublisher, {
-        step: "subject_prepare",
+        step: "candidate_prepare",
         status: "started",
         role,
     });
     try {
-        const shellCommand = createHostedWorkloadShellCommand(args.root, command, "subject_prepare");
+        const shellCommand = createHostedWorkloadShellCommand(args.root, command, "candidate_prepare");
         await args.execFileAsync("sh", ["-c", shellCommand], {
             cwd: args.root,
             env: createHostedWorkloadPrepareEnv(args.root),
@@ -2152,20 +2462,20 @@ async function runSubjectPrepareCommand(args) {
             timeout: args.timeoutMs,
         });
         await publishCommandStepEvent(args.eventPublisher, {
-            step: "subject_prepare",
+            step: "candidate_prepare",
             status: "succeeded",
             role,
         });
     }
     catch (error) {
         await publishCommandStepEvent(args.eventPublisher, {
-            step: "subject_prepare",
+            step: "candidate_prepare",
             status: "failed",
             exitCode: readExitCode(error),
             error: error instanceof Error ? error.message : String(error),
             role,
         });
-        throw new Error(`Subject prepare command failed: ${error instanceof Error ? error.message : String(error)}`);
+        throw new Error(`Candidate prepare command failed: ${error instanceof Error ? error.message : String(error)}`);
     }
 }
 async function createRuntimeWorkspaceRoot(args, fs, os, path, prefix) {
@@ -2204,10 +2514,10 @@ async function createRuntimeWorkspaceRoot(args, fs, os, path, prefix) {
     };
 }
 function stepEventRole(step) {
-    if (step.kind === "optimizer") {
-        return "optimizer";
+    if (step.kind === "improver") {
+        return "improver";
     }
-    if (step.kind === "subject") {
+    if (step.kind === "candidate") {
         return "runner";
     }
     if (step.kind === "engine") {
@@ -2219,10 +2529,10 @@ function adapterOperationUsageSummary(result) {
     if (hasExplicitUsageRole(result.usage)) {
         return completeUsageSummary(result.usage);
     }
-    if (result.operation === "optimizer.improve") {
-        return assignUsageRole("optimizer", result.usage);
+    if (result.operation === "candidate.improve") {
+        return assignUsageRole("improver", result.usage);
     }
-    if (result.operation === "subject.run") {
+    if (result.operation === "candidate.run") {
         return assignUsageRole("runner", result.usage);
     }
     if (result.operation === "engine.run") {
@@ -2239,16 +2549,16 @@ function attemptUsageSummary(workloadUsage, resultUsage) {
 }
 function hasExplicitUsageRole(usage) {
     const normalized = completeUsageSummary(usage);
-    return Boolean(normalized?.optimizer || normalized?.runner || normalized?.engine);
+    return Boolean(normalized?.improver || normalized?.runner || normalized?.engine);
 }
-function createSubjectPatchFromResult(result, spec) {
-    if (result.subjectPatch) {
-        return result.subjectPatch;
+function createCandidatePatchFromResult(result, spec) {
+    if (result.candidatePatch) {
+        return result.candidatePatch;
     }
     const changedEditPaths = result.fileChanges
         .map(normalizeRelativePath)
         .filter((filePath) => !filePath.startsWith(".workbench/") &&
-        isSubjectEditPath(filePath, optimizerEdits(spec)));
+        isCandidateEditPath(filePath, improveEdits(spec)));
     const changedSet = new Set(changedEditPaths);
     const files = result.files
         .filter((file) => changedSet.has(normalizeRelativePath(file.path)))
@@ -2260,7 +2570,7 @@ function createSubjectPatchFromResult(result, spec) {
         ...(result.feedback !== undefined ? { feedback: result.feedback } : {}),
     };
 }
-function isSubjectEditPath(filePath, edits) {
+function isCandidateEditPath(filePath, edits) {
     const normalized = normalizeRelativePath(filePath);
     return edits.some((entry) => {
         const editPath = normalizeRelativePath(entry).replace(/\/+$/u, "");
@@ -2320,21 +2630,33 @@ export async function stageWorkbenchRunWorkload(root, workload) {
     ]);
     await fs.mkdir(inputDir(root), { recursive: true });
     await fs.mkdir(outputDir(root), { recursive: true });
+    await clearMutableWorkspaceFiles(root);
     if (purpose === "attempt") {
-        await fs.mkdir(subjectDir(root), { recursive: true });
+        await fs.mkdir(candidateDir(root), { recursive: true });
         await fs.mkdir(caseDir(root), { recursive: true });
         const engineCase = requireWorkloadEngineCase(workload, "Attempt staging");
-        await writeSurfaceFiles(subjectDir(root), workload.subjectFiles);
+        await writeSurfaceFiles(candidateDir(root), workload.candidateFiles);
         await writeSurfaceFiles(caseDir(root), engineCasePublicFiles(engineCase));
         return;
     }
     if (purpose === "improve") {
-        await fs.mkdir(subjectDir(root), { recursive: true });
-        await writeSurfaceFiles(subjectDir(root), workload.subjectFiles);
+        await writeSurfaceFiles(root, workload.candidateFiles.filter((file) => isMutableWorkspaceSnapshotPath(file.path)));
         await fs.mkdir(tracesDir(root), { recursive: true });
         await writeSurfaceFiles(tracesDir(root), workload.traceFiles);
     }
 }
+async function clearMutableWorkspaceFiles(root) {
+    const fs = await importNodeModule(nodeBuiltin("fs/promises"));
+    const path = await importNodeModule(nodeBuiltin("path"));
+    const entries = await fs.readdir(root, { withFileTypes: true }).catch(() => []);
+    await Promise.all(entries.map(async (entry) => {
+        const relativePath = normalizeRelativePath(entry.name);
+        if (!isMutableWorkspaceSnapshotPath(relativePath)) {
+            return;
+        }
+        await fs.rm(path.join(root, entry.name), { recursive: true, force: true });
+    }));
+}
 async function stageWorkbenchEnginePrivateFiles(root, workload) {
     if (readWorkloadExecutionPurpose(workload) !== "attempt") {
         return;
@@ -2417,7 +2739,7 @@ function adapterFilePathWithinRoot(filePath, sourceRoot) {
 }
 async function readHostedRunFailureResult(root, workload, options) {
     const traceFiles = await readRuntimeTraceFiles(root, workload);
-    const outputFiles = filterRuntimeOutputFiles(await readSurfaceFiles(outputDir(root)));
+    const outputFiles = filterRuntimeOutputFiles(await readSurfaceFiles(outputDir(root), { ignorePath: isWorkbenchInternalOutputPath }));
     const startedAt = options.startedAt ?? new Date().toISOString();
     const finishedAt = new Date().toISOString();
     const files = [...outputFiles, ...traceFiles].sort((left, right) => left.path.localeCompare(right.path));
@@ -2433,13 +2755,13 @@ async function readHostedRunFailureResult(root, workload, options) {
 async function readWorkbenchRunWorkloadResult(root, workload, options = {}) {
     const path = await importNodeModule(nodeBuiltin("path"));
     const traceFiles = await readRuntimeTraceFiles(root, workload);
-    const outputFiles = filterRuntimeOutputFiles(await readSurfaceFiles(outputDir(root)));
+    const outputFiles = filterRuntimeOutputFiles(await readSurfaceFiles(outputDir(root), { ignorePath: isWorkbenchInternalOutputPath }));
     const outputExitCode = await readOptionalNumber(path.join(outputDir(root), "exit_code"));
     const startedAt = options.startedAt ?? new Date().toISOString();
     const finishedAt = new Date().toISOString();
     const purpose = readWorkloadExecutionPurpose(workload);
     const primaryOperation = purpose === "improve"
-        ? "optimizer.improve"
+        ? "candidate.improve"
         : "engine.run";
     const primaryResult = [...(options.operationResults ?? [])]
         .reverse()
@@ -2453,9 +2775,9 @@ async function readWorkbenchRunWorkloadResult(root, workload, options = {}) {
     const cases = normalizeResultCases(resultPayload.cases);
     const includeResultScoring = purpose === "attempt";
     const files = [...outputFiles, ...traceFiles].sort((left, right) => left.path.localeCompare(right.path));
-    const subjectPatch = purpose === "improve" ? primaryResult?.value : undefined;
+    const candidatePatch = purpose === "improve" ? primaryResult?.value : undefined;
     const engineResult = purpose === "attempt" ? primaryResult?.value : undefined;
-    const declaredChanges = subjectPatch?.fileChanges ??
+    const declaredChanges = candidatePatch?.fileChanges ??
         (Array.isArray(resultPayload.fileChanges)
             ? resultPayload.fileChanges.filter((entry) => typeof entry === "string")
             : files.map((file) => file.path));
@@ -2463,7 +2785,7 @@ async function readWorkbenchRunWorkloadResult(root, workload, options = {}) {
         files,
         fileChanges: declaredChanges,
         ...(options.operationResults ? { operationResults: [...options.operationResults] } : {}),
-        ...(subjectPatch ? { subjectPatch } : {}),
+        ...(candidatePatch ? { candidatePatch } : {}),
         ...(engineResult ? { result: engineResult } : {}),
         ...(includeResultScoring && metrics ? { metrics } : {}),
         ...(includeResultScoring && cases ? { cases } : {}),
@@ -2536,9 +2858,10 @@ async function writeWorkbenchAdapterRequest(root, workload, execution, step, aut
     const requestPath = path.join(root, ".workbench", "request.json");
     await fs.mkdir(path.dirname(requestPath), { recursive: true });
     const casePrompt = workload.engineCaseSpec?.prompt;
+    const caseSplit = workload.engineCaseSpec?.split;
     const adapter = step.adapter ?? execution.adapter;
-    const subjectCommand = adapterProtocolCommandSpec(workload.spec.run, "subject.run", manifests).command;
-    await fs.writeFile(requestPath, `${JSON.stringify({
+    const candidateCommand = adapterProtocolCommandSpec(workload.spec.run, "candidate.run", manifests).command;
+    const payload = {
         protocol: "workbench.adapter.v3",
         id: execution.id,
         jobId: workload.job.id,
@@ -2554,17 +2877,17 @@ async function writeWorkbenchAdapterRequest(root, workload, execution, step, aut
                 name: workload.spec.benchmark.name,
                 description: workload.spec.benchmark.description,
             },
-            subject: {
-                id: workload.subjectId,
-                path: workload.spec.subject.files.path,
-                ...(workload.spec.subject.prepare ? { prepare: { ...workload.spec.subject.prepare } } : {}),
+            candidate: {
+                id: workload.candidateId,
+                path: workload.spec.candidate.files.path,
+                ...(workload.spec.candidate.prepare ? { prepare: { ...workload.spec.candidate.prepare } } : {}),
                 run: {
                     ...workload.spec.run,
-                    command: subjectCommand,
+                    command: candidateCommand,
                 },
             },
-            ...(workload.spec.optimizer
-                ? { optimizer: { edits: [...workload.spec.optimizer.edits] } }
+            ...(workload.spec.candidate.improve
+                ? { improve: { edits: [...workload.spec.candidate.improve.edits] } }
                 : {}),
             attempt: {
                 attemptIndex: workload.attemptIndex,
@@ -2574,27 +2897,48 @@ async function writeWorkbenchAdapterRequest(root, workload, execution, step, aut
             case: {
                 id: workload.caseId,
                 ...(casePrompt ? { prompt: casePrompt } : {}),
+                ...(caseSplit ? { split: caseSplit } : {}),
             },
         },
         paths: {
             workspace: root,
             output: outputDir(root),
             result: workbenchAdapterOperationResultPath(outputDir(root)),
-            subject: subjectDir(root),
+            ...(readWorkloadExecutionPurpose(workload) === "attempt" ? { candidate: candidateDir(root) } : {}),
             ...(workload.engineCaseSpec ? { case: caseDir(root) } : {}),
             traces: tracesDir(root),
             ...(step.kind === "engine" ? { enginePrivate: runtimeEnginePrivateDir(root) } : {}),
         },
-    }, null, 2)}\n`);
+    };
+    await fs.writeFile(requestPath, `${JSON.stringify(payload, null, 2)}\n`);
+    await writeSurfaceFiles(outputDir(root), [
+        textSurfaceFile(`.workbench/traces/${workload.job.id}/${step.label}/request.json`, `${JSON.stringify(sanitizeAdapterRequestTracePayload(payload), null, 2)}\n`),
+    ]);
     return requestPath;
 }
-function optimizerEdits(spec) {
-    return spec.optimizer?.edits ?? [];
+function sanitizeAdapterRequestTracePayload(value) {
+    if (Array.isArray(value)) {
+        return value.map((entry) => sanitizeAdapterRequestTracePayload(entry));
+    }
+    if (!value || typeof value !== "object") {
+        return (value ?? null);
+    }
+    const sanitized = {};
+    for (const [key, entry] of Object.entries(value)) {
+        if (key === "auth" || key === "enginePrivate") {
+            continue;
+        }
+        sanitized[key] = sanitizeAdapterRequestTracePayload(entry);
+    }
+    return sanitized;
+}
+function improveEdits(spec) {
+    return spec.candidate.improve?.edits ?? [];
 }
-function requireOptimizerEdits(spec) {
-    const edits = optimizerEdits(spec);
+function requireImproveEdits(spec) {
+    const edits = improveEdits(spec);
     if (edits.length === 0) {
-        throw new Error("Optimizer YAML must declare at least one entry in edits.");
+        throw new Error("Candidate improve configuration must declare at least one entry in edits.");
     }
     return edits;
 }
@@ -2691,8 +3035,8 @@ function requireWorkloadEngineCase(workload, label) {
     }
     return workload.engineCase;
 }
-function subjectDir(root) {
-    return `${inputDir(root)}/subject`;
+function candidateDir(root) {
+    return `${inputDir(root)}/candidate`;
 }
 function caseDir(root) {
     return `${inputDir(root)}/case`;
@@ -2727,7 +3071,7 @@ async function writeSurfaceFiles(root, files) {
         }
     }
 }
-async function readSurfaceFiles(root) {
+async function readSurfaceFiles(root, options = {}) {
     const fs = await importNodeModule(nodeBuiltin("fs/promises"));
     const path = await importNodeModule(nodeBuiltin("path"));
     const utf8Decoder = new TextDecoder("utf-8", { fatal: true });
@@ -2738,6 +3082,10 @@ async function readSurfaceFiles(root) {
             .catch(() => []);
         for (const entry of entries) {
             const absolutePath = path.join(directory, entry.name);
+            const relativePath = normalizeRelativePath(path.relative(root, absolutePath).replace(/\\/gu, "/"));
+            if (options.ignorePath?.(relativePath)) {
+                continue;
+            }
             if (entry.isDirectory()) {
                 await walk(absolutePath);
                 continue;
@@ -2745,9 +3093,18 @@ async function readSurfaceFiles(root) {
             if (!entry.isFile()) {
                 continue;
             }
-            const relativePath = normalizeRelativePath(path.relative(root, absolutePath).replace(/\\/gu, "/"));
-            const body = await fs.readFile(absolutePath);
-            const stats = await fs.stat(absolutePath);
+            let body;
+            let stats;
+            try {
+                body = await fs.readFile(absolutePath);
+                stats = await fs.stat(absolutePath);
+            }
+            catch (error) {
+                if (isVanishedWalkEntry(error)) {
+                    continue;
+                }
+                throw error;
+            }
             const content = encodeSurfaceSnapshotContent(body, utf8Decoder);
             files.push({
                 path: relativePath,
@@ -2761,6 +3118,10 @@ async function readSurfaceFiles(root) {
     await walk(root);
     return files.sort((left, right) => left.path.localeCompare(right.path));
 }
+function isVanishedWalkEntry(error) {
+    const code = error?.code;
+    return code === "ENOENT" || code === "ENOTDIR";
+}
 function encodeSurfaceSnapshotContent(body, utf8Decoder) {
     try {
         return {
@@ -2943,7 +3304,14 @@ function evaluateSample(args) {
     if (metrics.score === undefined) {
         metrics.score = sampleScore;
     }
-    const cases = args.workload.cases?.length ? args.workload.cases : undefined;
+    const cases = runtimeTimedCaseResults({
+        caseId: args.caseId,
+        split: args.split,
+        status: "completed",
+        durationMs,
+        metrics,
+        cases: args.workload.cases,
+    });
     const feedback = {
         ...(args.workload.summary !== undefined
             ? { summary: args.workload.summary }
@@ -2956,10 +3324,10 @@ function evaluateSample(args) {
     return {
         id: `${args.caseId}__sample_${String(args.sampleIndex + 1).padStart(3, "0")}`,
         index: args.sampleIndex,
-        subject: {
-            id: args.subjectId,
-            kind: "subject",
-            label: args.subjectId,
+        candidate: {
+            id: args.candidateId,
+            kind: "candidate",
+            label: args.candidateId,
         },
         status: "completed",
         startedAt: args.startedAt,
@@ -2967,7 +3335,7 @@ function evaluateSample(args) {
         durationMs,
         metrics,
         ...(usage ? { usage } : {}),
-        ...(cases ? { cases } : {}),
+        cases,
         feedback,
     };
 }
@@ -2976,7 +3344,7 @@ function normalizeSampleJobOutput(value) {
         return null;
     }
     const record = value;
-    if (record.ok !== true || typeof record.subjectId !== "string") {
+    if (record.ok !== true || typeof record.candidateId !== "string") {
         return null;
     }
     const files = Array.isArray(record.files)
@@ -2991,7 +3359,7 @@ function normalizeSampleJobOutput(value) {
         return null;
     }
     return {
-        subjectId: record.subjectId,
+        candidateId: record.candidateId,
         attemptIndex: record.attemptIndex,
         sample,
         fileChanges: Array.isArray(record.fileChanges)
@@ -3003,12 +3371,72 @@ function normalizeSampleJobOutput(value) {
             : traceFilePaths(files),
     };
 }
-function normalizeEvaluationSampleOutputs(args) {
-    return args.jobs.flatMap((job) => {
+function normalizeEvaluationSampleOutputs(jobs) {
+    return jobs.flatMap((job) => {
         const output = normalizeSampleJobOutput(job.output);
-        return output ? [{ jobs: [job], output }] : [];
+        if (!output) {
+            return [];
+        }
+        const caseId = readJobString(job.input, "caseId") ?? output.sample.cases?.[0]?.id ?? null;
+        const durationMs = runtimeJobDurationMs(job) ?? output.sample.durationMs;
+        const sample = caseId && typeof durationMs === "number" && Number.isFinite(durationMs)
+            ? {
+                ...output.sample,
+                cases: runtimeTimedCaseResults({
+                    caseId,
+                    split: readJobEngineCaseSplit(job),
+                    status: output.sample.status === "error" ? "error" : "completed",
+                    durationMs,
+                    metrics: output.sample.metrics ?? {},
+                    cases: output.sample.cases,
+                }),
+            }
+            : output.sample;
+        return [{
+                jobs: [job],
+                output: {
+                    ...output,
+                    sample,
+                },
+            }];
     });
 }
+function runtimeTimedCaseResults(args) {
+    const cases = args.cases?.length
+        ? args.cases
+        : [{
+                id: args.caseId,
+                status: args.status,
+                metrics: args.metrics,
+            }];
+    return cases.map((entry) => ({
+        ...entry,
+        ...(!entry.split && args.split && entry.id === args.caseId ? { split: args.split } : {}),
+        status: entry.status ?? args.status,
+        metrics: entry.metrics ?? args.metrics,
+        durationMs: args.durationMs,
+    }));
+}
+function readJobEngineCaseSplit(job) {
+    const input = jsonRecord(job.input);
+    const execution = jsonRecord(input.execution);
+    const metadata = jsonRecord(execution.metadata);
+    const engineCase = jsonRecord(metadata.engineCase);
+    const split = engineCase.split;
+    return typeof split === "string" && split.trim().length > 0
+        ? split.trim()
+        : undefined;
+}
+function runtimeJobDurationMs(job) {
+    if (typeof job.startedAt !== "string" || typeof job.finishedAt !== "string") {
+        return undefined;
+    }
+    const startedMs = Date.parse(job.startedAt);
+    const finishedMs = Date.parse(job.finishedAt);
+    return Number.isFinite(startedMs) && Number.isFinite(finishedMs)
+        ? Math.max(0, finishedMs - startedMs)
+        : undefined;
+}
 function meanFinite(values) {
     const finite = values.filter((value) => typeof value === "number" && Number.isFinite(value));
     if (finite.length === 0) {
@@ -3039,12 +3467,12 @@ function withJobUsage(sample, _jobs, attemptJob) {
         usage,
     };
 }
-function normalizeSubjectRevisionJobOutput(value) {
+function normalizeCandidateRevisionJobOutput(value) {
     if (!value || typeof value !== "object" || Array.isArray(value)) {
         return null;
     }
     const record = value;
-    if (record.ok !== true || typeof record.subjectId !== "string") {
+    if (record.ok !== true || typeof record.candidateId !== "string") {
         return null;
     }
     const files = Array.isArray(record.files)
@@ -3056,7 +3484,7 @@ function normalizeSubjectRevisionJobOutput(value) {
     }
     const usage = normalizeUsageSummary(record.usage);
     return {
-        subjectId: record.subjectId,
+        candidateId: record.candidateId,
         attemptIndex: record.attemptIndex,
         baseId: typeof record.baseId === "string" && record.baseId.length > 0
             ? record.baseId
@@ -3072,7 +3500,7 @@ function normalizeSubjectRevisionJobOutput(value) {
         ...(usage ? { usage } : {}),
     };
 }
-function errorEvaluationSamplesFromJobs(jobs, subjectId, attemptIndex, completedSampleKeys) {
+function errorEvaluationSamplesFromJobs(jobs, candidateId, attemptIndex, completedSampleKeys) {
     const groups = new Map();
     for (const job of jobs) {
         const key = evaluationSampleGroupKeyFromJob(job);
@@ -3082,40 +3510,44 @@ function errorEvaluationSamplesFromJobs(jobs, subjectId, attemptIndex, completed
         groups.set(key, [...(groups.get(key) ?? []), job]);
     }
     return [...groups.values()]
-        .map((group) => errorEvaluationSampleFromJobGroup(group, subjectId, attemptIndex))
+        .map((group) => errorEvaluationSampleFromJobGroup(group, candidateId, attemptIndex))
         .filter((sample) => sample !== null);
 }
-function errorEvaluationSampleFromJobGroup(jobs, subjectId, attemptIndex) {
+function errorEvaluationSampleFromJobGroup(jobs, candidateId, attemptIndex) {
     const job = jobs[0];
     if (!job) {
         return null;
     }
     const sampleIndex = readOptionalJobNumber(job.input, "sampleIndex");
     const caseId = readJobString(job.input, "caseId");
+    const split = readJobEngineCaseSplit(job);
     if (sampleIndex === null || !caseId) {
         return null;
     }
     const startedAt = minIsoTimestamp(jobs.map((entry) => entry.startedAt ?? entry.createdAt));
     const finishedAt = maxIsoTimestamp(jobs.map((entry) => entry.finishedAt ?? entry.updatedAt ?? entry.startedAt));
+    const durationMs = startedAt && finishedAt
+        ? Math.max(0, Date.parse(finishedAt) - Date.parse(startedAt))
+        : undefined;
     const error = summarizeEvaluationJobErrors(jobs) ?? "Evaluation job did not produce a valid sample.";
     return {
         id: `${caseId}__sample_${String(sampleIndex + 1).padStart(3, "0")}`,
         index: sampleIndex,
-        subject: {
-            id: subjectId,
-            kind: "subject",
-            label: subjectId,
+        candidate: {
+            id: candidateId,
+            kind: "candidate",
+            label: candidateId,
         },
         status: "error",
         ...(startedAt ? { startedAt } : {}),
         ...(finishedAt ? { finishedAt } : {}),
-        ...(startedAt && finishedAt
-            ? { durationMs: Math.max(0, Date.parse(finishedAt) - Date.parse(startedAt)) }
-            : {}),
+        ...(durationMs !== undefined ? { durationMs } : {}),
         ...(error ? { error } : {}),
         cases: [{
                 id: caseId,
+                ...(split ? { split } : {}),
                 status: "error",
+                ...(durationMs !== undefined ? { durationMs } : {}),
                 metrics: {},
                 ...(error ? { feedback: { summary: error } } : {}),
             }],
@@ -3171,13 +3603,13 @@ function compareSampleOutputs(left, right) {
     }
     return left.sample.id.localeCompare(right.sample.id);
 }
-function createEvaluationRecord(subjectId, subjectName, rawSamples) {
-    const samples = mergeEvaluationSampleRecords(rawSamples).map((sample) => subjectName
+function createEvaluationRecord(candidateId, candidateName, rawSamples) {
+    const samples = mergeEvaluationSampleRecords(rawSamples).map((sample) => candidateName
         ? {
             ...sample,
-            subject: {
-                ...sample.subject,
-                label: subjectName,
+            candidate: {
+                ...sample.candidate,
+                label: candidateName,
             },
         }
         : sample);
@@ -3191,10 +3623,10 @@ function createEvaluationRecord(subjectId, subjectName, rawSamples) {
     const errorSampleCount = samples.filter((sample) => sample.status === "error")
         .length;
     return {
-        subject: {
-            id: subjectId,
-            kind: "subject",
-            ...(subjectName ? { label: subjectName } : {}),
+        candidate: {
+            id: candidateId,
+            kind: "candidate",
+            ...(candidateName ? { label: candidateName } : {}),
         },
         status: samples.length > 0 && completedSampleCount === samples.length
             ? "completed"
@@ -3215,7 +3647,7 @@ function createEvaluationRecord(subjectId, subjectName, rawSamples) {
         samples,
     };
 }
-function normalizedSubjectDisplayName(value) {
+function normalizedCandidateDisplayName(value) {
     const normalized = value?.trim();
     return normalized ? normalized : null;
 }
@@ -3263,7 +3695,7 @@ function mergeEvaluationSampleGroup(group) {
     return {
         id: `sample_${String(first.index + 1).padStart(3, "0")}`,
         index: first.index,
-        subject: first.subject,
+        candidate: first.candidate,
         status: mergeEvaluationSampleStatus(group),
         ...(startedAt ? { startedAt } : {}),
         ...(finishedAt ? { finishedAt } : {}),
@@ -3355,35 +3787,49 @@ function aggregateCaseStatus(results) {
     }
     return undefined;
 }
-function evaluationMeanMetrics(evaluation) {
-    const entries = Object.entries(evaluation.metrics ?? {}).filter((entry) => Number.isFinite(entry[1].mean));
-    return entries.length > 0
-        ? Object.fromEntries(entries.map(([key, stats]) => [key, Number(stats.mean.toFixed(3))]))
-        : undefined;
-}
-function selectSubject(args) {
-    let selected = args.previousSubject;
-    for (const subject of args.subjects) {
-        if (!selected || hasHigherScore(subject, selected)) {
-            selected = subject;
+function selectCandidate(args) {
+    let selected = args.previousCandidate;
+    for (const candidate of args.candidates) {
+        if (!selected || hasHigherEvaluationMetric(candidate, selected, args.selection)) {
+            selected = candidate;
         }
     }
     return selected;
 }
-function hasHigherScore(subject, incumbent) {
-    const subjectValue = readMetric(subject, "score");
-    const incumbentValue = readMetric(incumbent, "score");
-    if (subjectValue == null) {
+function hasHigherEvaluationMetric(candidate, incumbent, selection) {
+    const metric = selection?.metric ?? "score";
+    const candidateValue = readEvaluationSelectionMean(candidate.eval, metric, selection?.caseIds);
+    const incumbentValue = readEvaluationSelectionMean(incumbent.eval, metric, selection?.caseIds);
+    if (candidateValue == null) {
         return false;
     }
     if (incumbentValue == null) {
         return true;
     }
-    return subjectValue > incumbentValue;
+    return candidateValue > incumbentValue;
+}
+function readEvaluationSelectionMean(evaluation, metric, caseIds) {
+    const stats = readEvaluationSelectionStats(evaluation, metric, caseIds);
+    return stats ? stats.mean : null;
 }
-function readMetric(subject, metric) {
-    const direct = subject.metrics?.[metric];
-    return typeof direct === "number" && Number.isFinite(direct) ? direct : null;
+function readEvaluationSelectionStats(evaluation, metric, caseIds) {
+    if (!caseIds) {
+        const direct = evaluation?.metrics?.[metric];
+        return direct && Number.isFinite(direct.mean) ? direct : null;
+    }
+    if (caseIds.length === 0) {
+        return null;
+    }
+    const allowed = new Set(caseIds);
+    const values = (evaluation?.samples ?? [])
+        .flatMap((sample) => sample.cases ?? [])
+        .flatMap((caseResult) => {
+        const metricValue = caseResult.metrics[metric];
+        return allowed.has(caseResult.id) && typeof metricValue === "number" && Number.isFinite(metricValue)
+            ? [metricValue]
+            : [];
+    });
+    return values.length > 0 ? metricStats(values) : null;
 }
 function metricStats(values) {
     const count = values.length;
@@ -3501,7 +3947,7 @@ function isEvaluationSampleRecord(value) {
         !Array.isArray(value) &&
         typeof record.id === "string" &&
         typeof record.index === "number" &&
-        typeof record.subject === "object" &&
+        typeof record.candidate === "object" &&
         isEvaluationSampleStatus(record.status) &&
         hasOperationalCaseStatuses(record.cases));
 }