npm - @workbench-ai/workbench - Versions diffs - 0.0.49 → 0.0.51 - Mend

@workbench-ai/workbench 0.0.49 → 0.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dist/adapter-project.js +3 -3
package/dist/benchmark-fingerprint.d.ts +1 -1
package/dist/benchmark-fingerprint.d.ts.map +1 -1
package/dist/benchmark-fingerprint.js +4 -6
package/dist/command-model.d.ts.map +1 -1
package/dist/command-model.js +95 -453
package/dist/dev-open/client.css +42 -43
package/dist/dev-open/client.js +145 -145
package/dist/dev-open-server.d.ts +12 -22
package/dist/dev-open-server.d.ts.map +1 -1
package/dist/dev-open-server.js +82 -42
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1758 -1245
package/dist/init-scaffold.d.ts +4 -4
package/dist/init-scaffold.d.ts.map +1 -1
package/dist/init-scaffold.js +2 -2
package/dist/init-template-pack.d.ts +4 -4
package/dist/init-template-pack.d.ts.map +1 -1
package/dist/init-template-pack.js +47 -59
package/dist/local-archive.d.ts +15 -11
package/dist/local-archive.d.ts.map +1 -1
package/dist/local-archive.js +325 -83
package/dist/project-source.d.ts +14 -17
package/dist/project-source.d.ts.map +1 -1
package/dist/project-source.js +80 -151
package/package.json +4 -4

package/dist/index.js CHANGED Viewed

@@ -5,7 +5,7 @@ import { createRequire } from "node:module";
 import os from "node:os";
 import path from "node:path";
 import { Writable } from "node:stream";
-import { createSubjectFilePreview, createBaselineSubjectJob as createRuntimeBaselineSubjectJob, evaluationScorecardId, executeWorkbenchExecutionJob, engineResolveBindingForSpec, filterSubjectSourceFiles, workbenchExecutionPurpose, createWorkbenchAdapterAuthBundle, createSubjectEvaluationTraceInputFiles, createSubjectRevisionTraceInputFiles, DOCKER_SANDBOX_BACKEND, localWorkbenchAdapterAuthStore, materializeWorkbenchRunResult, normalizeSurfaceFiles, planWorkbenchExecutionJobsForPurpose, runWorkbenchExecutionDag, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, summarizeSubjectFiles, validateWorkbenchRunEnvelope, parseWorkbenchAdapterAuthTarget, } from "@workbench-ai/workbench-core";
+import { createCandidateFilePreview, createBaselineCandidateJob as createRuntimeBaselineCandidateJob, evaluationScorecardId, evaluationMeanMetrics, executeWorkbenchExecutionJob, engineResolveBindingForSpec, filterOptimizerTraceJobsForCaseIds, filterCandidateSourceFiles, formatWorkbenchCaseSelector, formatWorkbenchSelectionPolicy, workbenchCaseSelectorUsesAllCases, workbenchExecutionPurpose, workbenchRunExecutionFingerprint, createWorkbenchAdapterAuthBundle, createOptimizerTraceInputFiles, DOCKER_SANDBOX_BACKEND, localWorkbenchAdapterAuthStore, materializeWorkbenchRunResult, normalizeSurfaceFiles, planWorkbenchExecutionJobsForPurpose, runWorkbenchExecutionDag, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, summarizeCandidateFiles, validateWorkbenchRunEnvelope, parseWorkbenchAdapterAuthTarget, workbenchEngineCaseIdsForImproveEvaluation, workbenchEngineCaseIdsForSelector, workbenchImproveOptimizeSelector, workbenchImproveSelectionPolicy, workbenchProjectSourceFingerprint, workbenchRuntimeBundleFingerprint, } from "@workbench-ai/workbench-core";
 import { assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterAuthRequirements, WORKBENCH_ADAPTER_RESULT_FILE, WORKBENCH_ADAPTER_RESULT_PROTOCOL, normalizeWorkbenchAdapterOperationRequest, readWorkbenchAdapterOperationResult, workbenchAdapterOperationCommand, workbenchAdapterOperationResultPath, withDefaultWorkbenchAdapterAuthProfiles as applyDefaultWorkbenchAdapterAuthProfiles, } from "@workbench-ai/workbench-protocol";
 import { builtinLocalTraceAdapter, builtinLocalTraceAdapters, sortLocalTraceRefs, } from "@workbench-ai/workbench-built-in-adapters/local-traces";
 import { commandUsage, HOSTED_WATCH_LIFECYCLE_NOTE, LOCAL_DEV_OPEN_LIFECYCLE_NOTE, rootUsage, } from "./command-model.js";
@@ -13,10 +13,10 @@ import { startLocalWorkbenchDevServer } from "./dev-open-server.js";
 import { createWorkbenchInitScaffold, } from "./init-scaffold.js";
 import { defaultAdapterManifests, composeRuntimeDockerfileWithAdapters, resolveDefaultWorkbenchAdapter, resolveProjectAdapterSource, resolveWorkbenchAdaptersForProject, WORKBENCH_ADAPTER_MANIFEST_FILE, } from "./adapter-project.js";
 import { createAdapterCommandEnv } from "./adapter-command-env.js";
-import { appendLocalRun, loadLocalArchive, loadLocalArchiveIndex, materializeSubjectRoot, readLocalSubject, readLocalSubjectFiles, saveLocalArchive, saveLocalJobs, setLocalActive, upsertLocalSubject, upsertLocalEvaluation, } from "./local-archive.js";
+import { loadLocalArchive, loadLocalArchiveIndex, exportLocalRuntimeBundle, importLocalRuntimeBundle, runtimeBundleStats, materializeCandidateRoot, readLocalCandidate, readLocalCandidateFiles, readLocalJobs, saveLocalArchive, saveLocalJobs, setLocalActive, upsertLocalRun, upsertLocalCandidate, upsertLocalEvaluation, } from "./local-archive.js";
 import { WorkspaceSnapshotError, } from "./workspace-snapshot.js";
 import { readLocalProjectSource, WORKBENCH_BENCHMARK_FILE, } from "./project-source.js";
-import { localBenchmarkFingerprint, localSubjectFingerprint, } from "./benchmark-fingerprint.js";
+import { localBenchmarkFingerprint, localCandidateFingerprint, } from "./benchmark-fingerprint.js";
 const require = createRequire(import.meta.url);
 function getCliVersion() {
     const manifest = require("../package.json");
@@ -74,29 +74,38 @@ export async function runCli(argv, io = {
         if (argv[0] === "clone") {
             return await cloneProject(argv.slice(1), io);
         }
-        if (argv[0] === "fetch") {
-            return await fetchProject(argv.slice(1), io);
-        }
         if (argv[0] === "pull") {
             return await pullProject(argv.slice(1), io);
         }
         if (argv[0] === "push") {
             return await pushBenchmark(argv.slice(1), io);
         }
-        if (argv[0] === "remote") {
-            return await runRemoteCommand(argv.slice(1), io);
-        }
         if (argv[0] === "eval") {
-            return await localEvaluateSubject(argv.slice(1), io, runtimeOptions);
+            const hosted = extractHostedFlag(argv.slice(1));
+            return hosted.enabled
+                ? await startHostedWorkflow("eval", hosted.argv, io)
+                : await localEvaluateCandidate(hosted.argv, io, runtimeOptions);
+        }
+        if (argv[0] === "retry") {
+            const hosted = extractHostedFlag(argv.slice(1));
+            return hosted.enabled
+                ? await retryHostedWorkflow(hosted.argv, io)
+                : await localRetry(hosted.argv, io, runtimeOptions);
         }
         if (argv[0] === "improve") {
-            return await localRun(argv.slice(1), io, runtimeOptions);
+            const hosted = extractHostedFlag(argv.slice(1));
+            return hosted.enabled
+                ? await startHostedWorkflow("improve", hosted.argv, io)
+                : await localRun(hosted.argv, io, runtimeOptions);
         }
         if (argv[0] === "restore") {
             return await localRestore(argv.slice(1), io);
         }
         if (argv[0] === "open") {
-            return await localDevOpen(argv.slice(1), io);
+            const hosted = extractHostedFlag(argv.slice(1));
+            return hosted.enabled
+                ? await openWorkbench(hosted.argv, io)
+                : await localDevOpen(hosted.argv, io);
         }
         if (argv[0] === "auth") {
             return await runAuthCommand(argv.slice(1), io);
@@ -107,9 +116,6 @@ export async function runCli(argv, io = {
         if (argv[0] === "traces") {
             return await runTracesCommand(argv.slice(1), io);
         }
-        if (argv[0] === "cloud") {
-            return await runCloudCommand(argv.slice(1), io);
-        }
         const commandPath = argv.slice(0, 2).join(" ");
         const rest = argv.slice(2);
         switch (commandPath) {
@@ -117,14 +123,14 @@ export async function runCli(argv, io = {
                 return await localRunList(rest, io);
             case "runs show":
                 return await localRunShow(rest, io);
-            case "subjects list":
-                return await localSubjectList(rest, io);
-            case "subjects show":
-                return await localSubjectShow(rest, io);
-            case "subjects files":
-                return await localSubjectFiles(rest, io);
-            case "subjects preview":
-                return await localSubjectPreview(rest, io);
+            case "candidates list":
+                return await localCandidateList(rest, io);
+            case "candidates show":
+                return await localCandidateShow(rest, io);
+            case "candidates files":
+                return await localCandidateFiles(rest, io);
+            case "candidates preview":
+                return await localCandidatePreview(rest, io);
             default:
                 break;
         }
@@ -145,9 +151,6 @@ export async function runCli(argv, io = {
 }
 function commandPathForHelp(argv) {
     const positionals = argv.filter((arg) => arg !== "--help" && arg !== "-h" && !arg.startsWith("--"));
-    if (positionals[0] === "cloud") {
-        return positionals.slice(0, 3).join(" ");
-    }
     if (positionals[0] === "adapters" &&
         ["create", "list", "inspect", "test"].includes(positionals[1] ?? "")) {
         return positionals.slice(0, 2).join(" ");
@@ -156,76 +159,31 @@ function commandPathForHelp(argv) {
         ["collect", "list", "show"].includes(positionals[1] ?? "")) {
         return positionals.slice(0, 2).join(" ");
     }
-    if (positionals[0] === "auth" || positionals[0] === "remote") {
+    if (positionals[0] === "auth") {
         return positionals.slice(0, 2).join(" ");
     }
     if (positionals[0] === "runs" &&
         ["list", "show"].includes(positionals[1] ?? "")) {
         return positionals.slice(0, 2).join(" ");
     }
-    if (positionals[0] === "subjects" &&
+    if (positionals[0] === "candidates" &&
         ["list", "show", "files", "preview"].includes(positionals[1] ?? "")) {
         return positionals.slice(0, 2).join(" ");
     }
     return positionals[0] ?? "";
 }
-async function runCloudCommand(argv, io) {
-    const command = argv[0];
-    const rest = argv.slice(1);
-    switch (command) {
-        case "eval":
-            return await startHostedWorkflow("eval", rest, io);
-        case "improve":
-            return await startHostedWorkflow("improve", rest, io);
-        case "open":
-            return await openWorkbench(rest, io);
-        case "watch":
-            return await runWatch(rest, io);
-        case "logs":
-            return await runLogs(rest, io);
-        case "star":
-            return await starProject(rest, io, true);
-        case "unstar":
-            return await starProject(rest, io, false);
-        default:
-            break;
-    }
-    const commandPath = argv.slice(0, 2).join(" ");
-    const subRest = argv.slice(2);
-    switch (commandPath) {
-        case "benchmarks list":
-            return await benchmarkList(subRest, io);
-        case "benchmarks show":
-            return await benchmarkShow(subRest, io);
-        case "benchmarks versions":
-            return await benchmarkVersions(subRest, io);
-        case "benchmarks starred":
-            return await benchmarkStarred(subRest, io);
-        case "benchmarks delete":
-            return await benchmarkDelete(subRest, io);
-        case "runs list":
-            return await runList(subRest, io);
-        case "runs show":
-            return await runShow(subRest, io);
-        case "runs cancel":
-            return await runCancel(subRest, io);
-        case "subjects list":
-            return await subjectList(subRest, io);
-        case "subjects show":
-            return await subjectShow(subRest, io);
-        case "subjects files":
-            return await subjectFiles(subRest, io);
-        case "subjects preview":
-            return await subjectPreview(subRest, io);
-        case "subjects pull":
-            return await subjectExport(subRest, io);
-        case "subjects publish":
-            return await subjectVisibility(subRest, io, "public");
-        case "subjects unpublish":
-            return await subjectVisibility(subRest, io, "private");
-        default:
-            throw new UsageError(`Unknown command: cloud ${argv.join(" ")}`);
+function extractHostedFlag(argv) {
+    let enabled = false;
+    const next = [];
+    for (const arg of argv) {
+        if (arg === "--hosted") {
+            enabled = true;
+        }
+        else {
+            next.push(arg);
+        }
     }
+    return { enabled, argv: next };
 }
 async function localDevOpen(argv, io) {
     const parsed = parseArgs(argv);
@@ -313,7 +271,7 @@ async function localInit(argv, io) {
         specPath,
         kind: scaffold.kind,
         name: scaffold.name,
-        subjectRoot: scaffold.subjectRoot,
+        candidateRoot: scaffold.candidateRoot,
     }, parsed, io, () => `Initialized ${scaffold.kind} Workbench source directory at ${workspace}`);
     return 0;
 }
@@ -358,20 +316,20 @@ function buildWorkbenchCheckPlan(source) {
             files: sourceFileCount(source),
             yaml: [
                 path.relative(source.dir, source.benchmarkPath) || "benchmark.yaml",
-                path.relative(source.dir, source.subjectSpecPath) || "subject YAML",
-                ...(source.optimizerSource !== undefined
-                    ? [path.relative(source.dir, source.optimizerPath ?? "") || "optimizer YAML"]
-                    : []),
+                path.relative(source.dir, source.candidateSpecPath) || "candidate YAML",
             ],
             dockerfile: source.dockerfilePath,
         },
-        subject: {
-            filesPath: source.spec.subject.files.path,
-            files: source.subjectFiles.length,
+        candidate: {
+            name: source.spec.candidate.name,
+            selectedRunId: source.spec.candidate.selectedRunId,
+            runCount: Object.keys(source.spec.candidate.runs).length,
+            filesPath: source.spec.candidate.files.path,
+            files: source.candidateFiles.length,
         },
-        optimizer: source.spec.optimizer
+        improve: source.spec.candidate.improve
             ? {
-                edits: [...source.spec.optimizer.edits],
+                edits: [...source.spec.candidate.improve.edits],
             }
             : null,
         engine: {
@@ -394,8 +352,8 @@ function buildWorkbenchCheckPlan(source) {
     };
 }
 function formatWorkbenchCheckPlan(plan, warningSuffix) {
-    const edits = plan.optimizer?.edits.length
-        ? plan.optimizer.edits.join(", ")
+    const edits = plan.improve?.edits.length
+        ? plan.improve.edits.join(", ")
         : "-";
     const network = plan.environment.network.egress;
     const resources = plan.environment.resources;
@@ -404,11 +362,12 @@ function formatWorkbenchCheckPlan(plan, warningSuffix) {
         `Benchmark: ${plan.benchmarkName}`,
         `Description: ${plan.benchmarkDescription}`,
         `Source: ${plan.source.files} file(s) (${plan.source.yaml.join(", ")}, ${plan.source.dockerfile})`,
-        `Subject files: ${plan.subject.filesPath} (${plan.subject.files} file(s))`,
-        `Optimizer edits: ${edits}`,
+        `Candidate: ${plan.candidate.name} (${plan.candidate.runCount} run(s), selected ${plan.candidate.selectedRunId})`,
+        `Candidate files: ${plan.candidate.filesPath} (${plan.candidate.files} file(s))`,
+        `Improve edits: ${edits}`,
         `Engine cases: ${plan.engine.cases} case(s) from ${formatAdapterSummary(plan.engine.resolver)} at ${plan.engine.path} (${plan.engine.files} file(s))`,
         `Environment: ${plan.environment.dockerfile}, network ${network}, ${resources.cpu} CPU, ${resources.memoryGb}GB RAM, ${resources.timeoutMinutes}m timeout`,
-        `Execution: improve ${plan.adapters.improve ? formatAdapterSummary(plan.adapters.improve) : "not configured"}, subject ${formatAdapterSummary(plan.adapters.run)}, engine ${formatAdapterSummary(plan.adapters.engine)}`,
+        `Execution: improve ${plan.adapters.improve ? formatAdapterSummary(plan.adapters.improve) : "not configured"}, candidate run ${formatAdapterSummary(plan.adapters.run)}, engine ${formatAdapterSummary(plan.adapters.engine)}`,
         ...adapterSourceLines(plan.adapters.sources),
     ].join("\n");
 }
@@ -493,18 +452,206 @@ function splitWorkspaceError(error) {
     const message = error instanceof Error ? error.message : String(error);
     return message.split(/\n+/u).map((entry) => entry.trim()).filter(Boolean);
 }
+async function localRetry(argv, io, runtimeOptions) {
+    const parsed = parseArgs(argv);
+    rejectUnknownFlags(parsed, new Set(["dir", "json"]));
+    rejectUnexpectedPositionals(parsed, "workbench retry", 1);
+    const targetId = parsed.positionals[0];
+    if (!targetId) {
+        throw new UsageError("Missing required TARGET_ID.");
+    }
+    const workspace = resolveDir(parsed);
+    const target = await resolveLocalRetryTarget(workspace, targetId);
+    const captured = createCapturingIo(io);
+    const code = target.workflow === "eval"
+        ? await localEvaluateCandidate([
+            "--dir",
+            workspace,
+            "--candidate",
+            target.candidateId,
+            "--runs",
+            target.candidateRunId,
+            "--samples",
+            String(target.samples),
+            "--json",
+        ], captured.io, runtimeOptions)
+        : await localRun([
+            "--dir",
+            workspace,
+            "--from",
+            target.candidateId,
+            "--runs",
+            target.candidateRunId,
+            "--budget",
+            String(target.budget ?? 1),
+            "--samples",
+            String(target.samples),
+            "--json",
+        ], captured.io, runtimeOptions);
+    const commandOutput = parseCapturedJson(captured.stdoutText());
+    await preserveLocalActiveCandidate(workspace, target.preserveActiveId);
+    const outputRecord = readRecord(commandOutput) ?? {};
+    const result = {
+        ok: code === 0 && outputRecord.ok !== false,
+        retried: {
+            id: target.sourceId,
+            kind: target.sourceKind,
+            workflow: target.workflow,
+        },
+    };
+    assignRetryResultString(result, "runId", outputRecord.runId);
+    assignRetryResultString(result, "evaluationId", outputRecord.evaluationId);
+    assignRetryResultString(result, "candidateId", outputRecord.candidateId);
+    assignRetryResultString(result, "activeCandidateId", outputRecord.activeCandidateId);
+    const localView = localRetryViewHint(outputRecord.localView);
+    if (localView) {
+        result.localView = localView;
+    }
+    const failedJobCount = numberValue(outputRecord.failedJobCount);
+    if (failedJobCount !== null) {
+        result.failedJobCount = failedJobCount;
+    }
+    const error = stringValue(outputRecord.error);
+    if (error) {
+        result.error = error;
+    }
+    writeOutput(result, parsed, io, formatRetryCommandResult);
+    return code;
+}
+async function resolveLocalRetryTarget(workspace, targetId) {
+    const snapshot = await loadLocalArchive(workspace);
+    const evaluation = snapshot.evaluations.find((entry) => entry.id === targetId);
+    if (evaluation) {
+        const run = snapshot.runs.find((entry) => entry.id === evaluation.runId) ?? null;
+        return localEvaluationRetryTarget(snapshot, evaluation, run, "evaluation", targetId);
+    }
+    const run = snapshot.runs.find((entry) => entry.id === targetId);
+    if (!run) {
+        throw new UsageError(`Run or evaluation not found: ${targetId}`);
+    }
+    if (run.status !== "finished") {
+        throw new UsageError(`Run ${run.id} is ${run.status}; wait for it to finish before retrying.`);
+    }
+    if (!runSummaryFailed(run)) {
+        throw new UsageError(`Run ${run.id} did not fail; use workbench ${run.workflow} to intentionally run it again.`);
+    }
+    if (run.workflow === "eval") {
+        const evaluations = snapshot.evaluations.filter((entry) => entry.runId === run.id);
+        if (evaluations.length !== 1) {
+            throw new UsageError(evaluations.length === 0
+                ? `Run ${run.id} has no evaluation record to retry.`
+                : `Run ${run.id} has multiple evaluations; retry a specific evaluation id instead.`);
+        }
+        return localEvaluationRetryTarget(snapshot, evaluations[0], run, "run", targetId);
+    }
+    const candidateRunId = run.candidateRunId;
+    if (!run.candidateId || !candidateRunId) {
+        throw new UsageError(`Run ${run.id} is missing retry metadata; use workbench improve --from with an explicit candidate id.`);
+    }
+    return {
+        sourceId: targetId,
+        sourceKind: "run",
+        workflow: "improve",
+        candidateId: run.candidateId,
+        candidateRunId,
+        samples: run.samples,
+        budget: run.budget,
+        preserveActiveId: snapshot.activeId,
+    };
+}
+function localEvaluationRetryTarget(snapshot, evaluation, run, sourceKind, sourceId) {
+    if (!evaluationScorecardFailed(evaluation, run)) {
+        throw new UsageError(`Evaluation ${evaluation.id} did not fail; use workbench eval to intentionally run it again.`);
+    }
+    if (!snapshot.candidates.some((entry) => entry.id === evaluation.candidateId)) {
+        throw new UsageError(`Candidate not found for evaluation ${evaluation.id}: ${evaluation.candidateId}`);
+    }
+    const candidateRunId = evaluation.candidateRunId ?? run?.candidateRunId;
+    if (!candidateRunId) {
+        throw new UsageError(`Evaluation ${evaluation.id} is missing its candidate run configuration.`);
+    }
+    return {
+        sourceId,
+        sourceKind,
+        workflow: "eval",
+        candidateId: evaluation.candidateId,
+        candidateRunId,
+        samples: evaluation.sampleCount || run?.samples || 1,
+        preserveActiveId: snapshot.activeId,
+    };
+}
+async function preserveLocalActiveCandidate(workspace, activeId) {
+    let snapshot = await loadLocalArchive(workspace);
+    if (activeId && !snapshot.candidates.some((candidate) => candidate.id === activeId)) {
+        return;
+    }
+    if (snapshot.activeId === activeId) {
+        return;
+    }
+    snapshot = setLocalActive(snapshot, activeId);
+    await saveLocalArchive(workspace, snapshot);
+}
+function evaluationScorecardFailed(evaluation, run) {
+    return evaluation.errorSampleCount > 0 ||
+        evaluation.status !== "completed" ||
+        runSummaryFailed(run);
+}
+function runSummaryFailed(run) {
+    return run?.outcome === "error" || run?.outcome === "cancelled";
+}
+function createCapturingIo(io) {
+    const chunks = [];
+    const stdout = new class extends Writable {
+        _write(chunk, _encoding, callback) {
+            chunks.push(Buffer.isBuffer(chunk) ? chunk.toString("utf8") : String(chunk));
+            callback();
+        }
+    }();
+    return {
+        io: {
+            stdin: io.stdin,
+            stdout,
+            stderr: io.stderr,
+        },
+        stdoutText: () => chunks.join(""),
+    };
+}
+function parseCapturedJson(value) {
+    const trimmed = value.trim();
+    if (!trimmed) {
+        return {};
+    }
+    try {
+        return JSON.parse(trimmed);
+    }
+    catch {
+        return { output: trimmed };
+    }
+}
+function localRetryViewHint(value) {
+    const record = readRecord(value);
+    const command = stringValue(record?.command);
+    const note = stringValue(record?.note);
+    return command && note ? { command, note } : undefined;
+}
+function assignRetryResultString(result, key, value) {
+    const normalized = stringValue(value);
+    if (normalized) {
+        result[key] = normalized;
+    }
+}
 async function localRun(argv, io, runtimeOptions) {
     const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "optimizer", "from", "budget", "samples", "json"]));
+    rejectUnknownFlags(parsed, new Set(["dir", "runs", "from", "budget", "samples", "rerun", "json"]));
     const budget = parsePositiveInt(parsed.flags.budget, 1, "budget");
     const samples = parsePositiveInt(parsed.flags.samples, 1, "samples");
     const sourceArg = resolveSourceDir(parsed);
     const projectSource = await readLocalProjectSource(sourceArg, {
-        optimizerPath: asOptionalString(parsed.flags.optimizer),
+        runId: singleRequestedRunId(asOptionalString(parsed.flags.runs), "workbench improve"),
     });
     const workspace = projectSource.dir;
-    if (!projectSource.spec.optimizer) {
-        throw new UsageError("Optimizer YAML is required for workbench improve.");
+    if (!projectSource.spec.improve || !projectSource.spec.candidate.improve) {
+        throw new UsageError("Candidate improve configuration is required for workbench improve.");
     }
     const executionProject = await resolveLocalProjectForExecution(workspace, projectSource.specSource);
     const { spec, adapterManifests } = executionProject;
@@ -514,18 +661,32 @@ async function localRun(argv, io, runtimeOptions) {
     if (caseIds.length === 0) {
         throw new UsageError("Engine resolver must emit at least one case.");
     }
+    const optimizeSelector = workbenchImproveOptimizeSelector(spec);
+    const selectionPolicy = workbenchImproveSelectionPolicy(spec);
+    const optimizeCaseIds = workbenchEngineCaseIdsForSelector(engineCases, optimizeSelector);
+    if (optimizeCaseIds.length === 0) {
+        throw new UsageError(`Improve optimizeOn selector matched no cases: ${formatWorkbenchCaseSelector(optimizeSelector)}.`);
+    }
+    const selectionCaseIds = workbenchEngineCaseIdsForSelector(engineCases, selectionPolicy.selector);
+    if (selectionCaseIds.length === 0) {
+        throw new UsageError(`Improve selectBy selector matched no cases: ${formatWorkbenchCaseSelector(selectionPolicy.selector)}.`);
+    }
+    const selectionScoreCaseIds = workbenchCaseSelectorUsesAllCases(selectionPolicy.selector)
+        ? undefined
+        : selectionCaseIds;
+    const evaluationCaseIds = workbenchEngineCaseIdsForImproveEvaluation({ spec, engineCases });
     requireValidRunEnvelope({
         workflow: "improve",
         budget,
         samples,
-        caseCount: caseIds.length,
+        caseCount: evaluationCaseIds.length,
     });
+    const optimizeOnLabel = formatWorkbenchCaseSelector(optimizeSelector);
+    const selectByLabel = formatWorkbenchSelectionPolicy(selectionPolicy);
     const environmentRefs = await ensureLocalDockerfileEnvironments(workspace, spec, engineCases);
     const benchmarkFingerprint = await readLocalBenchmarkFingerprint(workspace);
-    const runId = `run_local_${Date.now().toString(36)}`;
-    const startedAt = new Date().toISOString();
-    let snapshot = await loadLocalArchive(workspace);
-    const baseSubject = await ensureLocalImproveBaseSubject({
+    const executionFingerprint = localRunExecutionFingerprint(projectSource);
+    const baseCandidate = await ensureLocalImproveBaseCandidate({
         parsed,
         sourceArg,
         workspace,
@@ -534,242 +695,370 @@ async function localRun(argv, io, runtimeOptions) {
         io,
         runtimeOptions,
     });
-    let currentBaseId = baseSubject.id;
+    let snapshot = await loadLocalArchive(workspace);
+    if (parsed.flags.rerun !== true) {
+        const reusableRun = findReusableLocalImproveRun(snapshot.runs, {
+            benchmarkFingerprint,
+            candidateId: baseCandidate.id,
+            candidateRunId: projectSource.spec.candidate.selectedRunId,
+            executionFingerprint,
+            budget,
+            samples,
+        });
+        if (reusableRun) {
+            const evaluation = snapshot.evaluations.find((entry) => entry.runId === reusableRun.id) ?? null;
+            const outputCandidateId = reusableRun.outputCandidateId ?? reusableRun.candidateId ?? baseCandidate.id;
+            const outputCandidate = readLocalCandidate(snapshot, outputCandidateId);
+            const activeCandidate = snapshot.activeId
+                ? readLocalCandidate(snapshot, snapshot.activeId)
+                : null;
+            const result = {
+                ok: true,
+                reused: true,
+                runId: reusableRun.id,
+                evaluationId: evaluation?.id ?? null,
+                outputCandidateId,
+                outputCandidate,
+                activeCandidateId: snapshot.activeId,
+                activeCandidate,
+                completedJobCount: 0,
+                failedJobCount: 0,
+                localView: localDevViewHint(workspace, reusableRun.id),
+            };
+            writeOutput(result, parsed, io, () => `Reused improve run ${reusableRun.id}. Use --rerun to intentionally run it again.`);
+            return 0;
+        }
+    }
+    const runId = `run_local_${Date.now().toString(36)}`;
+    const startedAt = new Date().toISOString();
+    let currentBaseId = baseCandidate.id;
+    let outputCandidateId = null;
     let completedJobCount = 0;
     let failedJobCount = 0;
+    let attemptsExecuted = 0;
     const failedJobs = [];
     const events = [
         createLocalEvent("run_started", startedAt, {
             runId,
-            detail: { budget, samples, strategy: "greedy" },
+            detail: { budget, samples, strategy: "greedy", optimizeOn: optimizeOnLabel, selectBy: selectByLabel },
         }),
     ];
-    const devCapacity = await localDevelopmentCapacity(workspace);
-    const runTraceJobs = [];
-    const attempts = budget;
-    for (let attemptIndex = 0; attemptIndex < attempts; attemptIndex += 1) {
-        snapshot = await loadLocalArchive(workspace);
-        const activeSubject = readLocalSubject(snapshot, currentBaseId);
-        const baseFiles = filterSubjectSourceFiles(readLocalSubjectFiles(snapshot, activeSubject.id));
-        if (baseFiles.length === 0) {
-            throw new UsageError("Subject snapshot must include at least one file.");
-        }
-        const subjectRevisionTraceFiles = [
-            ...createSubjectEvaluationTraceInputFiles({ subject: activeSubject }),
-            ...createSubjectRevisionTraceInputFiles({
-                runId,
-                jobs: runTraceJobs,
-                events,
-            }),
-        ];
-        const subjectId = `subject_${runId.replace(/^run_/u, "")}_${String(attemptIndex + 1).padStart(3, "0")}`;
-        const plannedSubjectRevision = planWorkbenchExecutionJobsForPurpose({
-            ownerUserId: "local",
-            projectId: "local",
-            runId,
-            subjectId,
-            attemptIndex,
-            samples,
-            caseIds,
-            engineCases,
-            spec,
-            workflow: "improve",
-            purpose: "improve",
-            now: new Date().toISOString(),
-            baseFiles,
-            traceFiles: subjectRevisionTraceFiles,
-            ...(environmentRefs.defaultRef ? { environmentRef: environmentRefs.defaultRef } : {}),
-            baseId: activeSubject.id,
-        })[0];
-        const subjectRevisionJobs = await executeLocalDevelopmentDag({
-            jobs: [plannedSubjectRevision],
-            spec,
-            adapterManifests,
-            adapterFiles: normalizeSurfaceFiles(projectSource.adapterFiles),
-            baseFiles,
-            engineResolveFiles,
-            engineCases,
-            traceFiles: subjectRevisionTraceFiles,
-            capacity: devCapacity,
+    const runningRun = {
+        id: runId,
+        workflow: "improve",
+        benchmarkFingerprint,
+        status: "running",
+        candidateId: baseCandidate.id,
+        candidateRunId: projectSource.spec.candidate.selectedRunId,
+        candidateRunName: projectSource.spec.candidate.selectedRunName,
+        startedAt,
+        improver: formatSpecImprover(spec),
+        engineRun: spec.engineRun.use,
+        strategy: "greedy",
+        optimizeOn: optimizeOnLabel,
+        selectBy: selectByLabel,
+        budget,
+        repairBudget: 0,
+        attemptsRequested: budget,
+        attemptsExecuted: 0,
+        samples,
+        executionFingerprint,
+        activeCandidateId: snapshot.activeId,
+        outputCandidateId: null,
+    };
+    snapshot = upsertLocalRun(snapshot, runningRun, events);
+    await saveLocalArchive(workspace, snapshot);
+    try {
+        const devCapacity = await localDevelopmentCapacity(workspace);
+        const baselineTraceJobs = selectLocalOptimizerBaselineTraceJobs(snapshot, await readLocalJobs(workspace), {
+            benchmarkFingerprint,
+            candidateId: baseCandidate.id,
+            candidateRunId: projectSource.spec.candidate.selectedRunId,
+            executionFingerprint,
         });
-        const subjectRevision = subjectRevisionJobs[0];
-        const completedJobs = [subjectRevision];
-        if (subjectRevision.status === "succeeded") {
-            const subjectRevisionFiles = completedJobOutputFiles(subjectRevision).length > 0
-                ? normalizeSurfaceFiles(completedJobOutputFiles(subjectRevision).filter((file) => !file.path.startsWith(".workbench/")))
-                : baseFiles;
-            const attemptJobs = planWorkbenchExecutionJobsForPurpose({
+        const runTraceJobs = [];
+        const attempts = budget;
+        for (let attemptIndex = 0; attemptIndex < attempts; attemptIndex += 1) {
+            snapshot = await loadLocalArchive(workspace);
+            const activeCandidate = readLocalCandidate(snapshot, currentBaseId);
+            const baseFiles = filterCandidateSourceFiles(readLocalCandidateFiles(snapshot, activeCandidate.id));
+            if (baseFiles.length === 0) {
+                throw new UsageError("Candidate snapshot must include at least one file.");
+            }
+            const candidateRevisionTraceFiles = createOptimizerTraceInputFiles({
+                jobs: filterOptimizerTraceJobsForCaseIds([...baselineTraceJobs, ...runTraceJobs], optimizeCaseIds),
+            });
+            const candidateId = `candidate_${runId.replace(/^run_/u, "")}_${String(attemptIndex + 1).padStart(3, "0")}`;
+            const plannedCandidateRevision = planWorkbenchExecutionJobsForPurpose({
                 ownerUserId: "local",
                 projectId: "local",
                 runId,
-                subjectId,
+                candidateId,
                 attemptIndex,
                 samples,
-                now: new Date().toISOString(),
-                caseIds,
+                caseIds: optimizeCaseIds,
                 engineCases,
                 spec,
-                environmentRefsByCase: environmentRefs.byCase,
                 workflow: "improve",
-                purpose: "attempt",
-            });
-            const dagJobs = await executeLocalDevelopmentDag({
-                jobs: [subjectRevision, ...attemptJobs],
+                purpose: "improve",
+                now: new Date().toISOString(),
+                baseFiles,
+                traceFiles: candidateRevisionTraceFiles,
+                ...(environmentRefs.defaultRef ? { environmentRef: environmentRefs.defaultRef } : {}),
+                baseId: activeCandidate.id,
+            })[0];
+            const candidateRevisionJobs = await executeLocalDevelopmentDag({
+                jobs: [plannedCandidateRevision],
                 spec,
                 adapterManifests,
                 adapterFiles: normalizeSurfaceFiles(projectSource.adapterFiles),
-                baseFiles: subjectRevisionFiles,
+                baseFiles,
                 engineResolveFiles,
                 engineCases,
+                traceFiles: candidateRevisionTraceFiles,
                 capacity: devCapacity,
             });
-            completedJobs.splice(0, completedJobs.length, ...dagJobs);
-        }
-        runTraceJobs.push(...completedJobs);
-        const materialized = materializeWorkbenchRunResult({
-            runId,
-            benchmarkFingerprint,
-            sourceYaml: projectSource.specSource,
-            benchmarkSourceFiles: authoredBenchmarkSourceFiles(projectSource),
-            startedAt,
-            spec,
-            jobs: completedJobs,
-            previousSubject: activeSubject,
-            existingSubjectCount: snapshot.subjects.length,
-        });
-        for (const subject of materialized.subjects) {
-            snapshot = upsertLocalSubject(snapshot, subject, materialized.subjectFiles[subject.id] ?? []);
-            events.push(createLocalEvent("subject_created", subject.createdAt, {
+            const candidateRevision = candidateRevisionJobs[0];
+            const completedJobs = [candidateRevision];
+            if (candidateRevision.status === "succeeded") {
+                const candidateRevisionFiles = completedJobOutputFiles(candidateRevision).length > 0
+                    ? normalizeSurfaceFiles(completedJobOutputFiles(candidateRevision).filter((file) => !file.path.startsWith(".workbench/")))
+                    : baseFiles;
+                const attemptJobs = planWorkbenchExecutionJobsForPurpose({
+                    ownerUserId: "local",
+                    projectId: "local",
+                    runId,
+                    candidateId,
+                    attemptIndex,
+                    samples,
+                    now: new Date().toISOString(),
+                    caseIds: evaluationCaseIds,
+                    engineCases,
+                    spec,
+                    environmentRefsByCase: environmentRefs.byCase,
+                    workflow: "improve",
+                    purpose: "attempt",
+                });
+                const dagJobs = await executeLocalDevelopmentDag({
+                    jobs: [candidateRevision, ...attemptJobs],
+                    spec,
+                    adapterManifests,
+                    adapterFiles: normalizeSurfaceFiles(projectSource.adapterFiles),
+                    baseFiles: candidateRevisionFiles,
+                    engineResolveFiles,
+                    engineCases,
+                    capacity: devCapacity,
+                });
+                completedJobs.splice(0, completedJobs.length, ...dagJobs);
+            }
+            runTraceJobs.push(...completedJobs);
+            const materialized = materializeWorkbenchRunResult({
                 runId,
-                subjectId: subject.id,
-                baseId: subject.baseId,
-                status: subject.status,
-                metrics: subject.metrics,
+                benchmarkFingerprint,
+                sourceYaml: projectSource.specSource,
+                benchmarkSourceFiles: authoredBenchmarkSourceFiles(projectSource),
+                startedAt,
+                spec,
+                jobs: completedJobs,
+                previousCandidate: activeCandidate,
+                existingCandidateCount: snapshot.candidates.length,
+                selection: {
+                    metric: selectionPolicy.metric,
+                    ...(selectionScoreCaseIds ? { caseIds: selectionScoreCaseIds } : {}),
+                    label: selectByLabel,
+                },
+            });
+            for (const candidate of materialized.candidates) {
+                outputCandidateId = candidate.id;
+                snapshot = upsertLocalCandidate(snapshot, candidate, materialized.candidateFiles[candidate.id] ?? []);
+                events.push(createLocalEvent("candidate_created", candidate.createdAt, {
+                    runId,
+                    candidateId: candidate.id,
+                    baseId: candidate.baseId,
+                    status: candidate.status,
+                    metrics: evaluationMeanMetrics(candidate.eval),
+                }));
+            }
+            for (const evaluation of materialized.evaluations) {
+                snapshot = upsertLocalEvaluation(snapshot, evaluation);
+            }
+            snapshot = setLocalActive(snapshot, materialized.activeCandidateId);
+            currentBaseId = materialized.activeCandidateId ?? currentBaseId;
+            completedJobCount += materialized.completedJobCount;
+            failedJobCount += materialized.failedJobCount;
+            failedJobs.push(...completedJobs
+                .filter((job) => job.status === "failed")
+                .map((job) => ({
+                id: job.id,
+                purpose: workbenchExecutionPurpose(job),
+                error: job.error ?? "Job failed without an error message.",
+            })));
+            events.push(createLocalEvent("active_changed", new Date().toISOString(), {
+                runId,
+                candidateId: materialized.activeCandidateId ?? undefined,
+                activeId: materialized.activeCandidateId ?? undefined,
+                status: materialized.selectedCandidate?.status,
+                metrics: evaluationMeanMetrics(materialized.selectedCandidate?.eval),
             }));
+            await saveLocalJobs(workspace, completedJobs);
+            await saveLocalArchive(workspace, snapshot);
+            attemptsExecuted += 1;
         }
-        for (const evaluation of materialized.evaluations) {
-            snapshot = upsertLocalEvaluation(snapshot, evaluation);
-        }
-        snapshot = setLocalActive(snapshot, materialized.activeSubjectId);
-        currentBaseId = materialized.activeSubjectId ?? currentBaseId;
-        completedJobCount += materialized.completedJobCount;
-        failedJobCount += materialized.failedJobCount;
-        failedJobs.push(...completedJobs
-            .filter((job) => job.status === "failed")
-            .map((job) => ({
-            id: job.id,
-            purpose: workbenchExecutionPurpose(job),
-            error: job.error ?? "Job failed without an error message.",
-        })));
-        events.push(createLocalEvent("active_changed", new Date().toISOString(), {
+        snapshot = await loadLocalArchive(workspace);
+        const finishedAt = new Date().toISOString();
+        const run = {
+            id: runId,
+            workflow: "improve",
+            benchmarkFingerprint,
+            status: "finished",
+            candidateId: baseCandidate.id,
+            candidateRunId: projectSource.spec.candidate.selectedRunId,
+            candidateRunName: projectSource.spec.candidate.selectedRunName,
+            startedAt,
+            finishedAt,
+            durationMs: Math.max(0, Date.parse(finishedAt) - Date.parse(startedAt)),
+            improver: formatSpecImprover(spec),
+            engineRun: spec.engineRun.use,
+            strategy: "greedy",
+            optimizeOn: optimizeOnLabel,
+            selectBy: selectByLabel,
+            budget,
+            repairBudget: 0,
+            attemptsRequested: budget,
+            attemptsExecuted,
+            samples,
+            executionFingerprint,
+            stoppedReason: "budget_exhausted",
+            outcome: failedJobCount > 0 ? "error" : "ok",
+            activeCandidateId: snapshot.activeId,
+            outputCandidateId: outputCandidateId ?? snapshot.activeId,
+        };
+        events.push(createLocalEvent("run_finished", finishedAt, {
             runId,
-            subjectId: materialized.activeSubjectId ?? undefined,
-            activeId: materialized.activeSubjectId ?? undefined,
-            status: materialized.selectedSubject?.status,
-            metrics: materialized.selectedSubject?.metrics,
+            detail: {
+                outcome: run.outcome ?? null,
+                attemptsExecuted: run.attemptsExecuted,
+                durationMs: run.durationMs ?? null,
+            },
         }));
-        await saveLocalJobs(workspace, completedJobs);
+        snapshot = upsertLocalRun(snapshot, run, events.slice(1));
         await saveLocalArchive(workspace, snapshot);
+        const outputCandidate = run.outputCandidateId
+            ? readLocalCandidate(snapshot, run.outputCandidateId)
+            : null;
+        const activeCandidate = snapshot.activeId
+            ? readLocalCandidate(snapshot, snapshot.activeId)
+            : null;
+        const result = {
+            ok: failedJobCount === 0,
+            runId,
+            outputCandidateId: run.outputCandidateId,
+            outputCandidate,
+            activeCandidateId: snapshot.activeId,
+            activeCandidate,
+            completedJobCount,
+            failedJobCount,
+            failedJobs,
+            localView: localDevViewHint(workspace, runId),
+        };
+        writeOutput(result, parsed, io, () => {
+            const outputMetricValue = outputCandidate ? formatCandidateEvaluationScore(outputCandidate) : "n/a";
+            const activeMetricValue = activeCandidate ? formatCandidateEvaluationScore(activeCandidate) : "n/a";
+            const firstFailure = result.failedJobs[0];
+            const failureDetail = firstFailure
+                ? `\nFirst failed job ${firstFailure.id}${firstFailure.purpose ? ` (${firstFailure.purpose})` : ""}: ${firstFailure.error}`
+                : "";
+            const viewDetail = failedJobCount === 0
+                ? `\nOpen local view: ${result.localView.command}\n${result.localView.note}`
+                : "";
+            return `Run ${runId} finished. Output candidate: ${formatLocalCandidateLabel(outputCandidate)} (score: ${outputMetricValue}). Active candidate: ${formatLocalCandidateLabel(activeCandidate)} (score: ${activeMetricValue}).${failureDetail}${viewDetail}`;
+        });
+        return failedJobCount === 0 ? 0 : 1;
+    }
+    catch (error) {
+        await markLocalRunFailed({
+            workspace,
+            run: {
+                ...runningRun,
+                attemptsExecuted,
+                outputCandidateId,
+            },
+            startedAt,
+            error,
+        }).catch(() => undefined);
+        throw error;
     }
-    snapshot = await loadLocalArchive(workspace);
-    const finishedAt = new Date().toISOString();
-    const run = {
-        id: runId,
-        workflow: "improve",
-        benchmarkFingerprint,
-        status: "finished",
-        startedAt,
-        finishedAt,
-        durationMs: Math.max(0, Date.parse(finishedAt) - Date.parse(startedAt)),
-        optimizer: formatSpecOptimizer(spec),
-        engineRun: spec.engineRun.use,
-        strategy: "greedy",
-        budget,
-        repairBudget: 0,
-        attemptsRequested: budget,
-        attemptsExecuted: budget,
-        samples,
-        stoppedReason: "budget_exhausted",
-        outcome: failedJobCount > 0 ? "error" : "ok",
-    };
-    events.push(createLocalEvent("run_finished", finishedAt, {
-        runId,
-        detail: {
-            outcome: run.outcome ?? null,
-            attemptsExecuted: run.attemptsExecuted,
-            durationMs: run.durationMs ?? null,
-        },
-    }));
-    snapshot = appendLocalRun(snapshot, run, events);
-    await saveLocalArchive(workspace, snapshot);
-    const selected = snapshot.activeId
-        ? readLocalSubject(snapshot, snapshot.activeId)
-        : null;
-    const result = {
-        ok: failedJobCount === 0,
-        runId,
-        activeSubjectId: snapshot.activeId,
-        selectedSubject: selected,
-        completedJobCount,
-        failedJobCount,
-        failedJobs,
-        localView: localDevViewHint(workspace, runId),
-    };
-    writeOutput(result, parsed, io, () => {
-        const metricValue = selected?.metrics?.score ?? "n/a";
-        const firstFailure = result.failedJobs[0];
-        const failureDetail = firstFailure
-            ? `\nFirst failed job ${firstFailure.id}${firstFailure.purpose ? ` (${firstFailure.purpose})` : ""}: ${firstFailure.error}`
-            : "";
-        const viewDetail = failedJobCount === 0
-            ? `\nOpen local view: ${result.localView.command}\n${result.localView.note}`
-            : "";
-        return `Run ${runId} finished. Active subject: ${snapshot.activeId ?? "none"} (score: ${metricValue}).${failureDetail}${viewDetail}`;
-    });
-    return failedJobCount === 0 ? 0 : 1;
 }
-async function ensureLocalImproveBaseSubject(args) {
+async function ensureLocalImproveBaseCandidate(args) {
     let snapshot = await loadLocalArchive(args.workspace);
     const explicitBase = asOptionalString(args.parsed.flags.from);
     const benchmarkFingerprint = await readLocalBenchmarkFingerprint(args.workspace);
     if (explicitBase) {
-        let subject = readLocalSubject(snapshot, explicitBase);
-        if (subject.benchmarkFingerprint !== benchmarkFingerprint) {
-            throw new UsageError(`Base subject ${explicitBase} belongs to benchmark ${subject.benchmarkFingerprint}, not ${benchmarkFingerprint}.`);
+        let candidate = readLocalCandidate(snapshot, explicitBase);
+        if (candidate.benchmarkFingerprint !== benchmarkFingerprint) {
+            throw new UsageError(`Base candidate ${explicitBase} belongs to benchmark ${candidate.benchmarkFingerprint}, not ${benchmarkFingerprint}.`);
         }
-        if (!subject.subjectFingerprint) {
-            throw new UsageError(`Base subject ${explicitBase} is missing a subject fingerprint.`);
+        if (!candidate.candidateFingerprint) {
+            throw new UsageError(`Base candidate ${explicitBase} is missing a candidate fingerprint.`);
         }
-        if (subject.status !== "evaluated" && !subject.eval) {
-            const code = await localEvaluateSubject(["--dir", args.workspace, "--subject", explicitBase, "--samples", String(args.samples), "--json"], createSilentIo(args.io), args.runtimeOptions);
+        if (candidate.status !== "evaluated" && !candidate.eval) {
+            const code = await localEvaluateCandidate([
+                "--dir",
+                args.workspace,
+                "--candidate",
+                explicitBase,
+                "--runs",
+                args.projectSource.spec.candidate.selectedRunId,
+                "--samples",
+                String(args.samples),
+                ...(args.parsed.flags.rerun === true ? ["--rerun"] : []),
+                "--json",
+            ], createSilentIo(args.io), args.runtimeOptions);
             if (code !== 0) {
-                throw new UsageError(`Base subject ${explicitBase} eval failed; improve was not started.`);
+                throw new UsageError(`Base candidate ${explicitBase} eval failed; improve was not started.`);
             }
             snapshot = await loadLocalArchive(args.workspace);
-            subject = readLocalSubject(snapshot, explicitBase);
+            candidate = readLocalCandidate(snapshot, explicitBase);
         }
-        return subject;
+        return candidate;
     }
-    const subjectFingerprint = localSubjectFingerprint(args.projectSource);
-    const existing = snapshot.subjects.find((subject) => subject.benchmarkFingerprint === benchmarkFingerprint &&
-        subject.subjectFingerprint === subjectFingerprint &&
-        (subject.status === "evaluated" || Boolean(subject.eval)));
+    const candidateFingerprint = localCandidateFingerprint(args.projectSource);
+    const existing = snapshot.candidates.find((candidate) => candidate.benchmarkFingerprint === benchmarkFingerprint &&
+        candidate.candidateFingerprint === candidateFingerprint &&
+        (candidate.status === "evaluated" || Boolean(candidate.eval)));
     if (existing) {
         return existing;
     }
     const evalArgs = args.parsed.positionals.length > 0
-        ? [args.sourceArg, "--samples", String(args.samples), "--json"]
-        : ["--dir", args.workspace, "--samples", String(args.samples), "--json"];
-    const code = await localEvaluateSubject(evalArgs, createSilentIo(args.io), args.runtimeOptions);
+        ? [
+            args.sourceArg,
+            "--runs",
+            args.projectSource.spec.candidate.selectedRunId,
+            "--samples",
+            String(args.samples),
+            ...(args.parsed.flags.rerun === true ? ["--rerun"] : []),
+            "--json",
+        ]
+        : [
+            "--dir",
+            args.workspace,
+            "--runs",
+            args.projectSource.spec.candidate.selectedRunId,
+            "--samples",
+            String(args.samples),
+            ...(args.parsed.flags.rerun === true ? ["--rerun"] : []),
+            "--json",
+        ];
+    const code = await localEvaluateCandidate(evalArgs, createSilentIo(args.io), args.runtimeOptions);
     if (code !== 0) {
-        throw new UsageError("Parent subject eval failed; improve was not started.");
+        throw new UsageError("Parent candidate eval failed; improve was not started.");
     }
     snapshot = await loadLocalArchive(args.workspace);
-    const evaluated = snapshot.subjects.find((subject) => subject.benchmarkFingerprint === benchmarkFingerprint &&
-        subject.subjectFingerprint === subjectFingerprint &&
-        (subject.status === "evaluated" || Boolean(subject.eval)));
+    const evaluated = snapshot.candidates.find((candidate) => candidate.benchmarkFingerprint === benchmarkFingerprint &&
+        candidate.candidateFingerprint === candidateFingerprint &&
+        (candidate.status === "evaluated" || Boolean(candidate.eval)));
     if (!evaluated) {
-        throw new UsageError("Parent subject eval did not produce an evaluated subject.");
+        throw new UsageError("Parent candidate eval did not produce an evaluated candidate.");
     }
     return evaluated;
 }
@@ -785,13 +1074,62 @@ function createSilentIo(io) {
         stderr: io.stderr,
     };
 }
-async function localEvaluateSubject(argv, io, runtimeOptions) {
+function selectLocalOptimizerBaselineTraceJobs(snapshot, jobs, target) {
+    const runById = new Map(snapshot.runs.map((run) => [run.id, run]));
+    const evaluation = snapshot.evaluations
+        .filter((entry) => {
+        const run = runById.get(entry.runId);
+        return entry.benchmarkFingerprint === target.benchmarkFingerprint &&
+            entry.candidateId === target.candidateId &&
+            entry.candidateRunId === target.candidateRunId &&
+            run?.executionFingerprint === target.executionFingerprint;
+    })
+        .sort((left, right) => right.updatedAt.localeCompare(left.updatedAt) ||
+        right.runId.localeCompare(left.runId))[0] ?? null;
+    if (!evaluation) {
+        return [];
+    }
+    return jobs.filter((job) => job.runId === evaluation.runId);
+}
+async function localEvaluateCandidate(argv, io, runtimeOptions) {
     void runtimeOptions;
     const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "subject", "samples", "json"]));
+    rejectUnknownFlags(parsed, new Set(["dir", "candidate", "runs", "samples", "rerun", "json"]));
     const samples = parsePositiveInt(parsed.flags.samples, 1, "samples");
     const sourceArg = resolveSourceDir(parsed);
-    const projectSource = await readLocalProjectSource(sourceArg);
+    const runsFlag = asOptionalString(parsed.flags.runs);
+    const defaultProjectSource = await readLocalProjectSource(sourceArg);
+    const selectedRunIds = resolveCandidateRunSelection(defaultProjectSource, runsFlag);
+    if (selectedRunIds.length > 1) {
+        let failed = 0;
+        for (const runId of selectedRunIds) {
+            const args = [
+                "--dir",
+                defaultProjectSource.dir,
+                "--runs",
+                runId,
+                "--samples",
+                String(samples),
+                ...(readOptionalCandidateFlag(parsed) ? ["--candidate", readOptionalCandidateFlag(parsed)] : []),
+                ...(parsed.flags.rerun === true ? ["--rerun"] : []),
+                "--json",
+            ];
+            const code = await localEvaluateCandidate(args, createSilentIo(io), runtimeOptions);
+            if (code !== 0) {
+                failed += 1;
+            }
+        }
+        writeOutput({
+            ok: failed === 0,
+            candidateId: defaultProjectSource.candidateName,
+            candidateRunIds: selectedRunIds,
+            failedRunCount: failed,
+        }, parsed, io, () => `Evaluated ${selectedRunIds.length} candidate run(s); ${failed} failed.`);
+        return failed === 0 ? 0 : 1;
+    }
+    const projectSource = selectedRunIds[0] === defaultProjectSource.candidateRunId
+        ? defaultProjectSource
+        : await readLocalProjectSource(sourceArg, { runId: selectedRunIds[0] });
     const workspace = projectSource.dir;
     const executionProject = await resolveLocalProjectForExecution(workspace, projectSource.specSource);
     const { spec, adapterManifests } = executionProject;
@@ -810,114 +1148,367 @@ async function localEvaluateSubject(argv, io, runtimeOptions) {
     const environmentRefs = await ensureLocalDockerfileEnvironments(workspace, spec, engineCases);
     let snapshot = await loadLocalArchive(workspace);
     const benchmarkFingerprint = await readLocalBenchmarkFingerprint(workspace);
-    const sourceSubjectFingerprint = localSubjectFingerprint(projectSource);
-    const explicitSubjectId = asOptionalString(parsed.flags.subject);
-    const existingSourceSubject = snapshot.subjects.find((subject) => subject.benchmarkFingerprint === benchmarkFingerprint &&
-        subject.subjectFingerprint === sourceSubjectFingerprint);
-    const subjectId = explicitSubjectId ?? existingSourceSubject?.id ?? `subject_${sourceSubjectFingerprint.slice(0, 12)}`;
-    const existingSubject = snapshot.subjects.find((subject) => subject.id === subjectId);
-    const files = filterSubjectSourceFiles(existingSubject
-        ? readLocalSubjectFiles(snapshot, subjectId)
-        : normalizeSurfaceFiles(projectSource.subjectFiles));
+    const executionFingerprint = localRunExecutionFingerprint(projectSource);
+    const sourceCandidateFingerprint = localCandidateFingerprint(projectSource);
+    const explicitCandidateId = readOptionalCandidateFlag(parsed);
+    const existingSourceCandidate = snapshot.candidates.find((candidate) => candidate.benchmarkFingerprint === benchmarkFingerprint &&
+        candidate.candidateFingerprint === sourceCandidateFingerprint);
+    const candidateId = explicitCandidateId ?? existingSourceCandidate?.id ?? `candidate_${sourceCandidateFingerprint.slice(0, 12)}`;
+    const existingCandidate = snapshot.candidates.find((candidate) => candidate.id === candidateId);
+    const activeCandidateIdBeforeEval = snapshot.activeId;
+    const selectedCandidateRunId = projectSource.spec.candidate.selectedRunId;
+    const files = filterCandidateSourceFiles(existingCandidate
+        ? readLocalCandidateFiles(snapshot, candidateId)
+        : normalizeSurfaceFiles(projectSource.candidateFiles));
+    const evaluationWork = parsed.flags.rerun !== true
+        ? await resolveLocalEvaluationWork(workspace, snapshot, {
+            benchmarkFingerprint,
+            candidateId,
+            candidateFingerprint: existingCandidate?.candidateFingerprint ?? sourceCandidateFingerprint,
+            candidateRunId: selectedCandidateRunId,
+            executionFingerprint,
+            samples,
+            caseIds,
+        })
+        : null;
+    const reusableEvaluation = evaluationWork?.reusableEvaluation ?? null;
+    if (reusableEvaluation) {
+        const result = {
+            ok: true,
+            reused: true,
+            runId: reusableEvaluation.runId,
+            evaluation: reusableEvaluation,
+            evaluationId: reusableEvaluation.id,
+            candidateId,
+            completedJobCount: 0,
+            failedJobCount: 0,
+            localView: localDevViewHint(workspace, reusableEvaluation.runId),
+        };
+        writeOutput(result, parsed, io, () => `Reused evaluation ${reusableEvaluation.id}. Use --rerun to intentionally run it again.`);
+        return 0;
+    }
+    const selectedPairs = evaluationWork?.missingPairs.length
+        ? evaluationWork.missingPairs
+        : allCaseSamplePairs(caseIds, samples);
     const runId = `eval_local_${Date.now().toString(36)}`;
-    const evaluatedSubjectId = subjectId;
+    const evaluatedCandidateId = candidateId;
     const startedAt = new Date().toISOString();
-    const baseline = createRuntimeBaselineSubjectJob({
-        ownerUserId: "local",
-        projectId: "local",
-        runId,
-        subjectId: evaluatedSubjectId,
-        attemptIndex: 0,
-        files,
-        now: startedAt,
-        baseId: null,
-    });
-    const completedJobs = [baseline];
-    const attemptJobs = planWorkbenchExecutionJobsForPurpose({
-        ownerUserId: "local",
-        projectId: "local",
-        runId,
-        subjectId: evaluatedSubjectId,
-        attemptIndex: 0,
-        samples,
-        now: startedAt,
-        caseIds,
-        engineCases,
-        spec,
-        environmentRefsByCase: environmentRefs.byCase,
-        workflow: "eval",
-        purpose: "attempt",
-    });
-    const dagJobs = await executeLocalDevelopmentDag({
-        jobs: [baseline, ...attemptJobs],
-        spec,
-        adapterManifests,
-        adapterFiles: normalizeSurfaceFiles(projectSource.adapterFiles),
-        baseFiles: files,
-        engineResolveFiles,
-        engineCases,
-        capacity: await localDevelopmentCapacity(workspace),
-    });
-    completedJobs.splice(0, completedJobs.length, ...dagJobs);
-    const materialized = materializeWorkbenchRunResult({
+    const runStartedEvent = createLocalEvent("run_started", startedAt, {
         runId,
-        benchmarkFingerprint,
-        sourceYaml: projectSource.specSource,
-        benchmarkSourceFiles: authoredBenchmarkSourceFiles(projectSource),
-        subjectFingerprint: existingSubject?.subjectFingerprint ?? sourceSubjectFingerprint,
-        ...(!existingSubject || existingSubject.subjectFingerprint === sourceSubjectFingerprint
-            ? { subjectSourceFiles: authoredSubjectSourceFiles(projectSource) }
-            : {}),
-        startedAt,
-        spec,
-        jobs: completedJobs,
-        previousSubject: null,
-        existingSubjectCount: snapshot.subjects.length,
+        candidateId: evaluatedCandidateId,
+        detail: { samples, strategy: "direct" },
     });
-    for (const subjectRecord of materialized.subjects) {
-        snapshot = upsertLocalSubject(snapshot, subjectRecord, materialized.subjectFiles[subjectRecord.id] ?? []);
-    }
-    if (materialized.activeSubjectId) {
-        snapshot = setLocalActive(snapshot, materialized.activeSubjectId);
-    }
-    for (const evaluation of materialized.evaluations) {
-        snapshot = upsertLocalEvaluation(snapshot, evaluation);
-    }
-    const finishedAt = new Date().toISOString();
-    snapshot = appendLocalRun(snapshot, {
+    const runningRun = {
         id: runId,
         workflow: "eval",
         benchmarkFingerprint,
-        status: "finished",
+        status: "running",
+        candidateId: evaluatedCandidateId,
+        candidateRunId: projectSource.spec.candidate.selectedRunId,
+        candidateRunName: projectSource.spec.candidate.selectedRunName,
         startedAt,
-        finishedAt,
-        durationMs: Math.max(0, Date.parse(finishedAt) - Date.parse(startedAt)),
-        optimizer: "none",
+        improver: "none",
         engineRun: spec.engineRun.use,
         strategy: "direct",
         budget: 1,
         repairBudget: 0,
         attemptsRequested: 1,
-        attemptsExecuted: 1,
+        attemptsExecuted: 0,
         samples,
-        stoppedReason: "completed",
-        outcome: materialized.failedJobCount > 0 ? "error" : "ok",
-    }, []);
-    await saveLocalJobs(workspace, completedJobs);
+        executionFingerprint,
+        activeCandidateId: activeCandidateIdBeforeEval,
+        outputCandidateId: evaluatedCandidateId,
+    };
+    snapshot = upsertLocalRun(snapshot, runningRun, [runStartedEvent]);
     await saveLocalArchive(workspace, snapshot);
-    const evaluation = materialized.evaluations[0] ?? null;
-    const result = {
-        ok: materialized.failedJobCount === 0,
-        runId,
-        evaluation,
-        evaluationId: evaluation?.id ?? null,
-        subjectId: evaluatedSubjectId,
-        completedJobCount: materialized.completedJobCount,
-        failedJobCount: materialized.failedJobCount,
-        localView: localDevViewHint(workspace, runId),
+    try {
+        const baseline = createRuntimeBaselineCandidateJob({
+            ownerUserId: "local",
+            projectId: "local",
+            runId,
+            candidateId: evaluatedCandidateId,
+            attemptIndex: 0,
+            files,
+            now: startedAt,
+            baseId: null,
+        });
+        const attemptJobs = planWorkbenchExecutionJobsForPurpose({
+            ownerUserId: "local",
+            projectId: "local",
+            runId,
+            candidateId: evaluatedCandidateId,
+            attemptIndex: 0,
+            samples,
+            now: startedAt,
+            caseIds: orderedCaseIdsForPairs(caseIds, selectedPairs),
+            sampleIndexesByCase: sampleIndexesByCase(selectedPairs),
+            engineCases,
+            spec,
+            environmentRefsByCase: environmentRefs.byCase,
+            workflow: "eval",
+            purpose: "attempt",
+        });
+        const dagJobs = await executeLocalDevelopmentDag({
+            jobs: [baseline, ...attemptJobs],
+            spec,
+            adapterManifests,
+            adapterFiles: normalizeSurfaceFiles(projectSource.adapterFiles),
+            baseFiles: files,
+            engineResolveFiles,
+            engineCases,
+            capacity: await localDevelopmentCapacity(workspace),
+        });
+        const materializationJobs = [
+            ...(evaluationWork?.priorAttemptJobs ?? []),
+            ...dagJobs,
+        ];
+        const currentRunJobs = dagJobs.filter((job) => job.runId === runId);
+        const currentRunCompletedJobCount = currentRunJobs.filter((job) => job.status === "succeeded").length;
+        const currentRunFailedJobCount = currentRunJobs.filter((job) => job.status === "failed").length;
+        const materialized = materializeWorkbenchRunResult({
+            runId,
+            benchmarkFingerprint,
+            sourceYaml: projectSource.specSource,
+            benchmarkSourceFiles: authoredBenchmarkSourceFiles(projectSource),
+            candidateFingerprint: existingCandidate?.candidateFingerprint ?? sourceCandidateFingerprint,
+            ...(!existingCandidate || existingCandidate.candidateFingerprint === sourceCandidateFingerprint
+                ? { candidateSourceFiles: authoredCandidateSourceFiles(projectSource) }
+                : {}),
+            startedAt,
+            spec,
+            jobs: materializationJobs,
+            previousCandidate: existingCandidate ?? null,
+            existingCandidateCount: snapshot.candidates.length,
+        });
+        for (const candidateRecord of materialized.candidates) {
+            snapshot = upsertLocalCandidate(snapshot, candidateRecord, materialized.candidateFiles[candidateRecord.id] ?? []);
+        }
+        if (materialized.activeCandidateId) {
+            snapshot = setLocalActive(snapshot, materialized.activeCandidateId);
+        }
+        for (const evaluation of materialized.evaluations) {
+            snapshot = upsertLocalEvaluation(snapshot, evaluation);
+        }
+        const activeCandidateId = activeCandidateIdBeforeEval ?? materialized.activeCandidateId ?? null;
+        const finishedAt = new Date().toISOString();
+        if (activeCandidateId) {
+            snapshot = setLocalActive(snapshot, activeCandidateId);
+        }
+        const runFinishedEvent = createLocalEvent("run_finished", finishedAt, {
+            runId,
+            candidateId: evaluatedCandidateId,
+            detail: {
+                outcome: currentRunFailedJobCount > 0 ? "error" : "ok",
+                attemptsExecuted: 1,
+                durationMs: Math.max(0, Date.parse(finishedAt) - Date.parse(startedAt)),
+            },
+        });
+        snapshot = upsertLocalRun(snapshot, {
+            id: runId,
+            workflow: "eval",
+            benchmarkFingerprint,
+            status: "finished",
+            candidateId: evaluatedCandidateId,
+            candidateRunId: projectSource.spec.candidate.selectedRunId,
+            candidateRunName: projectSource.spec.candidate.selectedRunName,
+            startedAt,
+            finishedAt,
+            durationMs: Math.max(0, Date.parse(finishedAt) - Date.parse(startedAt)),
+            improver: "none",
+            engineRun: spec.engineRun.use,
+            strategy: "direct",
+            budget: 1,
+            repairBudget: 0,
+            attemptsRequested: 1,
+            attemptsExecuted: 1,
+            samples,
+            executionFingerprint,
+            stoppedReason: "completed",
+            outcome: currentRunFailedJobCount > 0 ? "error" : "ok",
+            activeCandidateId,
+            outputCandidateId: evaluatedCandidateId,
+        }, [runFinishedEvent]);
+        await saveLocalJobs(workspace, currentRunJobs);
+        await saveLocalArchive(workspace, snapshot);
+        const evaluation = materialized.evaluations[0] ?? null;
+        const result = {
+            ok: currentRunFailedJobCount === 0,
+            runId,
+            evaluation,
+            evaluationId: evaluation?.id ?? null,
+            candidateId: evaluatedCandidateId,
+            activeCandidateId,
+            completedJobCount: currentRunCompletedJobCount,
+            failedJobCount: currentRunFailedJobCount,
+            localView: localDevViewHint(workspace, runId),
+        };
+        writeOutput(result, parsed, io, ({ evaluationId, candidateId }) => `Evaluation ${evaluationId ?? runId} finished for candidate ${candidateId}.\nOpen local view: ${result.localView.command}\n${result.localView.note}`);
+        return currentRunFailedJobCount === 0 ? 0 : 1;
+    }
+    catch (error) {
+        await markLocalRunFailed({
+            workspace,
+            run: runningRun,
+            startedAt,
+            error,
+        }).catch(() => undefined);
+        throw error;
+    }
+}
+async function resolveLocalEvaluationWork(workspace, snapshot, target) {
+    const runById = new Map(snapshot.runs.map((run) => [run.id, run]));
+    const matchingEvaluations = snapshot.evaluations.filter((evaluation) => {
+        const run = runById.get(evaluation.runId);
+        return evaluation.benchmarkFingerprint === target.benchmarkFingerprint &&
+            evaluation.candidateId === target.candidateId &&
+            evaluation.candidateFingerprint === target.candidateFingerprint &&
+            evaluation.candidateRunId === target.candidateRunId &&
+            run?.executionFingerprint === target.executionFingerprint;
+    });
+    const reusableEvaluation = matchingEvaluations
+        .filter((evaluation) => evaluation.status === "completed" &&
+        evaluation.errorSampleCount === 0 &&
+        evaluation.completedSampleCount >= target.samples)
+        .sort((left, right) => right.updatedAt.localeCompare(left.updatedAt) ||
+        right.id.localeCompare(left.id))[0] ?? null;
+    if (reusableEvaluation) {
+        return {
+            reusableEvaluation,
+            missingPairs: [],
+            priorAttemptJobs: [],
+        };
+    }
+    const matchingRunIds = new Set(matchingEvaluations.map((evaluation) => evaluation.runId));
+    if (matchingRunIds.size === 0) {
+        return null;
+    }
+    const allPairs = allCaseSamplePairs(target.caseIds, target.samples);
+    const desiredKeys = new Set(allPairs.map(caseSamplePairKey));
+    const previousJobs = await readLocalJobs(workspace);
+    const priorAttemptJobsByPair = latestCompletedAttemptJobsByPair(previousJobs.filter((job) => matchingRunIds.has(job.runId) &&
+        job.candidateId === target.candidateId), desiredKeys);
+    const missingPairs = allPairs.filter((pair) => !priorAttemptJobsByPair.has(caseSamplePairKey(pair)));
+    if (missingPairs.length === allPairs.length) {
+        return null;
+    }
+    return {
+        reusableEvaluation: null,
+        missingPairs,
+        priorAttemptJobs: [...priorAttemptJobsByPair.values()],
     };
-    writeOutput(result, parsed, io, ({ evaluationId, subjectId: evaluatedSubjectId }) => `Evaluation ${evaluationId ?? runId} finished for ${evaluatedSubjectId}.\nOpen local view: ${result.localView.command}\n${result.localView.note}`);
-    return materialized.failedJobCount === 0 ? 0 : 1;
+}
+async function markLocalRunFailed(args) {
+    const latest = await loadLocalArchive(args.workspace);
+    const current = latest.runs.find((run) => run.id === args.run.id);
+    if (current?.status === "finished") {
+        return;
+    }
+    const finishedAt = new Date().toISOString();
+    const message = errorMessage(args.error);
+    const failedRun = {
+        ...args.run,
+        status: "finished",
+        finishedAt,
+        durationMs: Math.max(0, Date.parse(finishedAt) - Date.parse(args.startedAt)),
+        outcome: "error",
+        error: message,
+    };
+    await saveLocalArchive(args.workspace, upsertLocalRun(latest, failedRun, [
+        createLocalEvent("run_finished", finishedAt, {
+            runId: args.run.id,
+            candidateId: args.run.candidateId ?? undefined,
+            detail: {
+                outcome: "error",
+                error: message,
+                attemptsExecuted: failedRun.attemptsExecuted,
+                durationMs: failedRun.durationMs ?? null,
+            },
+        }),
+    ]));
+}
+function errorMessage(error) {
+    return error instanceof Error ? error.message : String(error);
+}
+function allCaseSamplePairs(caseIds, samples) {
+    return caseIds.flatMap((caseId) => Array.from({ length: samples }, (_, sampleIndex) => ({
+        caseId,
+        sampleIndex,
+    })));
+}
+function orderedCaseIdsForPairs(caseIds, pairs) {
+    const selected = new Set(pairs.map((pair) => pair.caseId));
+    return caseIds.filter((caseId) => selected.has(caseId));
+}
+function sampleIndexesByCase(pairs) {
+    const byCase = new Map();
+    for (const pair of pairs) {
+        byCase.set(pair.caseId, [...(byCase.get(pair.caseId) ?? []), pair.sampleIndex]);
+    }
+    for (const [caseId, indexes] of byCase.entries()) {
+        byCase.set(caseId, [...new Set(indexes)].sort((left, right) => left - right));
+    }
+    return byCase;
+}
+function latestCompletedAttemptJobsByPair(jobs, desiredKeys) {
+    const byPair = new Map();
+    for (const job of jobs) {
+        if (job.status !== "succeeded" || executionPurposeFromJobInput(job.input) !== "attempt") {
+            continue;
+        }
+        const pair = caseSamplePairFromJob(job);
+        if (!pair) {
+            continue;
+        }
+        const key = caseSamplePairKey(pair);
+        if (!desiredKeys.has(key)) {
+            continue;
+        }
+        const previous = byPair.get(key);
+        if (!previous || compareJobRecency(job, previous) > 0) {
+            byPair.set(key, job);
+        }
+    }
+    return byPair;
+}
+function caseSamplePairFromJob(job) {
+    const input = readRecord(job.input);
+    const execution = readRecord(input?.execution);
+    const metadata = readRecord(execution?.metadata);
+    const caseId = stringValue(input?.caseId) ?? stringValue(metadata?.caseId);
+    const sampleIndex = integerValue(input?.sampleIndex) ?? integerValue(metadata?.sampleIndex);
+    return caseId && sampleIndex !== null
+        ? { caseId, sampleIndex }
+        : null;
+}
+function executionPurposeFromJobInput(inputValue) {
+    const input = readRecord(inputValue);
+    const execution = readRecord(input?.execution);
+    return stringValue(execution?.purpose);
+}
+function caseSamplePairKey(pair) {
+    return `${pair.caseId}\0${pair.sampleIndex}`;
+}
+function compareJobRecency(left, right) {
+    return jobRecencyTimestamp(left).localeCompare(jobRecencyTimestamp(right)) ||
+        left.id.localeCompare(right.id);
+}
+function jobRecencyTimestamp(job) {
+    return job.finishedAt ?? job.updatedAt ?? job.startedAt ?? job.createdAt ?? "";
+}
+function findReusableLocalImproveRun(runs, target) {
+    return runs
+        .filter((run) => run.workflow === "improve" &&
+        run.benchmarkFingerprint === target.benchmarkFingerprint &&
+        run.candidateId === target.candidateId &&
+        run.candidateRunId === target.candidateRunId &&
+        run.executionFingerprint === target.executionFingerprint &&
+        run.budget === target.budget &&
+        run.samples === target.samples &&
+        run.status === "finished" &&
+        run.outcome === "ok" &&
+        Boolean(run.outputCandidateId))
+        .sort((left, right) => (right.finishedAt ?? right.startedAt).localeCompare(left.finishedAt ?? left.startedAt) ||
+        right.id.localeCompare(left.id))[0] ?? null;
 }
 function localDevViewHint(workspace, runId) {
     const runFlag = runId ? ` --run ${shellQuote(runId)}` : "";
@@ -935,20 +1526,26 @@ function localDevOpenUrl(baseUrl, snapshot, runId) {
         .reverse()
         .find((entry) => entry.runId === runId);
     if (!evaluation) {
-        return new URL("subjects", baseUrl).toString();
+        return new URL("candidates", baseUrl).toString();
     }
     const params = new URLSearchParams({ evaluation: evaluation.id });
-    return new URL(`subjects/${encodeURIComponent(evaluation.subjectId)}?${params.toString()}`, baseUrl).toString();
+    return new URL(`candidates/${encodeURIComponent(evaluation.candidateId)}?${params.toString()}`, baseUrl).toString();
 }
 async function readLocalBenchmarkFingerprint(workspace) {
     return localBenchmarkFingerprint(await readLocalProjectSource(workspace));
 }
-function authoredSubjectSourceFiles(projectSource) {
+function localRunExecutionFingerprint(projectSource) {
+    return workbenchRunExecutionFingerprint({
+        sourceYaml: projectSource.specSource,
+        adapterFiles: normalizeSurfaceFiles(projectSource.adapterFiles),
+    });
+}
+function authoredCandidateSourceFiles(projectSource) {
     return [{
-            path: path.relative(projectSource.dir, projectSource.subjectSpecPath).split(path.sep).join("/"),
+            path: path.relative(projectSource.dir, projectSource.candidateSpecPath).split(path.sep).join("/"),
             kind: "text",
             encoding: "utf8",
-            content: projectSource.subjectSource,
+            content: projectSource.candidateSource,
             executable: false,
         }];
 }
@@ -1155,72 +1752,72 @@ function requireValidRunEnvelope(args) {
 }
 async function localRestore(argv, io) {
     const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "subject", "dry-run", "yes", "json"]));
+    rejectUnknownFlags(parsed, new Set(["dir", "candidate", "dry-run", "yes", "json"]));
     const workspace = resolveDir(parsed);
     const spec = await readLocalSpecIfValid(workspace);
     if (!spec) {
         throw new UsageError("restore requires a valid Workbench project.");
     }
-    const subjectRoot = spec.subject.files.path;
+    const candidateRoot = spec.candidate.files.path;
     const snapshot = await loadLocalArchive(workspace);
-    const subjectId = readSubjectIdFlag(parsed, snapshot);
-    const files = readLocalSubjectFiles(snapshot, subjectId);
+    const candidateId = readCandidateIdFlag(parsed, snapshot);
+    const files = readLocalCandidateFiles(snapshot, candidateId);
     if (parsed.flags["dry-run"] === true) {
-        writeOutput({ ok: true, subjectId, fileCount: files.length }, parsed, io, () => `Restore would write ${files.length} file(s) from ${subjectId}.`);
+        writeOutput({ ok: true, candidateId: candidateId, fileCount: files.length }, parsed, io, () => `Restore would write ${files.length} file(s) from ${candidateId}.`);
         return 0;
     }
     if (parsed.flags.yes !== true) {
         throw new UsageError("restore requires --dry-run to preview or --yes to apply source directory changes.");
     }
-    const changedPaths = await materializeSubjectRoot(workspace, subjectRoot, files);
-    const next = setLocalActive(snapshot, subjectId);
+    const changedPaths = await materializeCandidateRoot(workspace, candidateRoot, files);
+    const next = setLocalActive(snapshot, candidateId);
     await saveLocalArchive(workspace, next);
-    writeOutput({ ok: true, activeAfter: subjectId, changedPaths }, parsed, io, () => `Restored ${subjectId} to ${subjectRoot}.`);
+    writeOutput({ ok: true, activeCandidateId: candidateId, changedPaths }, parsed, io, () => `Restored ${candidateId} to ${candidateRoot}.`);
     return 0;
 }
-async function localSubjectList(argv, io) {
+async function localCandidateList(argv, io) {
     const parsed = parseArgs(argv);
     rejectUnknownFlags(parsed, new Set(["dir", "json"]));
     const snapshot = await loadLocalArchive(resolveDir(parsed));
-    writeOutput(snapshot.subjects, parsed, io, (subjects) => subjects
-        .map((subject) => `${subject.id}\t${subject.status}\tmetrics ${formatMetricSummary(subject.metrics)}${snapshot.activeId === subject.id ? "\tactive" : ""}`)
-        .join("\n") || "No subjects.");
+    writeOutput(snapshot.candidates, parsed, io, (candidates) => candidates
+        .map((candidate) => `${candidate.id}\t${candidate.status}\tevaluation ${formatCandidateEvaluationScore(candidate)}${snapshot.activeId === candidate.id ? "\tactive" : ""}`)
+        .join("\n") || "No candidates.");
     return 0;
 }
-async function localSubjectShow(argv, io) {
+async function localCandidateShow(argv, io) {
     const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "subject", "json"]));
+    rejectUnknownFlags(parsed, new Set(["dir", "candidate", "json"]));
     const snapshot = await loadLocalArchive(resolveDir(parsed));
-    const subjectId = readSubjectIdFlag(parsed, snapshot);
-    const subject = readLocalSubject(snapshot, subjectId);
-    writeOutput(subject, parsed, io, (record) => [
+    const candidateId = readCandidateIdFlag(parsed, snapshot);
+    const candidate = readLocalCandidate(snapshot, candidateId);
+    writeOutput(candidate, parsed, io, (record) => [
         `${record.id}\t${record.status}`,
         `benchmark\t${record.benchmarkFingerprint}`,
-        `subject\t${record.subjectFingerprint}`,
-        `metrics\t${formatMetricSummary(record.metrics)}`,
+        `candidate\t${record.candidateFingerprint ?? record.candidateFingerprint}`,
+        `evaluation\t${formatCandidateEvaluationSummary(record)}`,
         ...(record.baseId ? [`base\t${record.baseId}`] : []),
     ].join("\n"));
     return 0;
 }
-async function localSubjectFiles(argv, io) {
+async function localCandidateFiles(argv, io) {
     const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "subject", "json"]));
+    rejectUnknownFlags(parsed, new Set(["dir", "candidate", "json"]));
     const snapshot = await loadLocalArchive(resolveDir(parsed));
-    const subjectId = readSubjectIdFlag(parsed, snapshot);
-    const subject = readLocalSubject(snapshot, subjectId);
-    const files = summarizeSubjectFiles(readLocalSubjectFiles(snapshot, subjectId), subject.fileChanges);
+    const candidateId = readCandidateIdFlag(parsed, snapshot);
+    const candidate = readLocalCandidate(snapshot, candidateId);
+    const files = summarizeCandidateFiles(readLocalCandidateFiles(snapshot, candidateId), candidate.fileChanges);
     writeOutput(files, parsed, io, (records) => records
         .map((file) => `${file.path}\t${file.status}\t${file.preview_kind}`)
         .join("\n") || "No files.");
     return 0;
 }
-async function localSubjectPreview(argv, io) {
+async function localCandidatePreview(argv, io) {
     const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "subject", "path", "output", "view", "json"]));
+    rejectUnknownFlags(parsed, new Set(["dir", "candidate", "path", "output", "view", "json"]));
     const snapshot = await loadLocalArchive(resolveDir(parsed));
-    const subjectId = readSubjectIdFlag(parsed, snapshot);
-    const preview = createSubjectFilePreview({
-        files: readLocalSubjectFiles(snapshot, subjectId),
+    const candidateId = readCandidateIdFlag(parsed, snapshot);
+    const preview = createCandidateFilePreview({
+        files: readLocalCandidateFiles(snapshot, candidateId),
         path: requireFlag(parsed, "path"),
         view: readPreviewMode(parsed),
     });
@@ -1755,7 +2352,7 @@ function createAdapterScaffoldFiles(id) {
         "setup:",
         "  - npm install --global .",
         "operations:",
-        "  subject.run: {}",
+        "  candidate.run: {}",
         "",
     ].join("\n");
     const packageJson = `${JSON.stringify({
@@ -1777,11 +2374,11 @@ const request = requestPath && fs.existsSync(requestPath)
   ? JSON.parse(fs.readFileSync(requestPath, "utf8"))
   : {};
 fs.mkdirSync(outputRoot, { recursive: true });
-const operation = request.operation || "subject.run";
+const operation = request.operation || "candidate.run";
 const resultPath = process.env.WORKBENCH_RESULT || request.paths?.result || path.join(outputRoot, "workbench-result.json");
 let value;
-if (operation === "subject.run") {
+if (operation === "candidate.run") {
   const task = request.context?.case?.prompt || "No case prompt was provided.";
   fs.writeFileSync(path.join(outputRoot, "adapter-output.txt"), [
     "adapter: ${id}",
@@ -1790,7 +2387,7 @@ if (operation === "subject.run") {
     "",
   ].join("\\n"));
 } else {
-  console.error("${id} only implements subject.run.");
+  console.error("${id} only implements candidate.run.");
   process.exit(2);
 }
@@ -2065,7 +2662,7 @@ async function resolveAdapterForAuthTarget(dir, targetRaw) {
     const adapters = await resolveWorkbenchAdaptersForProject(dir, spec);
     const adapter = adapters.find((entry) => entry.manifest.id === target.adapterId);
     if (!adapter) {
-        throw new UsageError(`Adapter ${target.adapterId} is not used by this benchmark source. Add it to the benchmark, subject, or optimizer YAML before connecting auth.`);
+        throw new UsageError(`Adapter ${target.adapterId} is not used by this benchmark source. Add it to the benchmark or candidate YAML before connecting auth.`);
     }
     if (!adapter.manifest.auth) {
         throw new UsageError(`Adapter ${target.adapterId} does not declare auth.`);
@@ -2313,13 +2910,21 @@ function adapterAuthRecord(value) {
 }
 async function pushBenchmark(argv, io) {
     const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "tag", "visibility", "dry-run", "json"]));
+    rejectUnknownFlags(parsed, new Set(["dir", "visibility", "dry-run", "json"]));
     const dir = resolveSourceDir(parsed);
     const source = await readLocalProjectSource(dir);
     const origin = await readWorkbenchOrigin(dir);
     const baseUrl = await effectiveBaseUrl(origin?.baseUrl);
-    const visibility = readBenchmarkVisibility(parsed.flags.visibility);
+    const visibility = readOptionalBenchmarkVisibility(parsed.flags.visibility);
+    const createVisibility = visibility ?? "public";
     const dryRun = parsed.flags["dry-run"] === true;
+    const runtime = await exportLocalRuntimeBundle(dir);
+    const state = localProjectState({
+        source,
+        runtime,
+        origin,
+        visibility: createVisibility,
+    });
     if (!origin) {
         if (dryRun) {
             writeOutput({
@@ -2329,35 +2934,36 @@ async function pushBenchmark(argv, io) {
                 dir,
                 baseUrl,
                 benchmarkName: source.spec.name,
-                tag: asOptionalString(parsed.flags.tag) ?? null,
-                visibility,
+                visibility: createVisibility,
                 sourceFileCount: sourceFileCount(source),
+                runtime: runtimeBundleStats(runtime),
+                sourceFingerprint: state.source.fingerprint,
+                runtimeFingerprint: state.base.runtimeFingerprint,
             }, parsed, io, () => `Would push benchmark ${source.spec.name}.`);
             return 0;
         }
-        const { project, publishedProject, origin: nextOrigin } = await createHostedBenchmarkFromSource({
+        const { project, origin: nextOrigin, result } = await createHostedBenchmarkFromState({
             baseUrl,
             dir,
-            source,
-            visibility,
+            state,
         });
         writeOutput({
             ok: true,
             action: "create",
-            benchmark: publishedProject,
-            tag: asOptionalString(parsed.flags.tag) ?? null,
-            visibility,
+            benchmark: project,
+            visibility: project.visibility ?? createVisibility,
             origin: nextOrigin,
+            source: result.source,
+            runtime: result.runtime.stats,
             urls: buildWorkbenchResourceUrls({
                 baseUrl,
-                projectId: publishedProject.id ?? project.id,
-                owner: nextOrigin.owner,
-                projectName: nextOrigin.project,
+                projectId: project.id,
+                ...originRemoteUrlParts(nextOrigin),
             }),
         }, parsed, io, (record) => {
             const value = record;
             return [
-                `Pushed ${value.origin.owner}/${value.origin.project} (${value.origin.projectId}).`,
+                `Pushed ${value.origin.remote} (${value.origin.projectId}).`,
                 `Open benchmark: ${value.urls.benchmark}`,
             ].join("\n");
         });
@@ -2367,57 +2973,6 @@ async function pushBenchmark(argv, io) {
     if (!projectId) {
         throw new UsageError("Missing hosted benchmark. Run workbench push from a source directory.");
     }
-    if (!origin.writable) {
-        const signedInUsername = dryRun ? null : await readAuthenticatedWorkbenchUsername(baseUrl);
-        if (signedInUsername !== origin.owner) {
-            const upstream = upstreamFromOrigin(origin);
-            if (dryRun) {
-                writeOutput({
-                    ok: true,
-                    dryRun: true,
-                    action: "create",
-                    dir,
-                    baseUrl,
-                    benchmarkName: source.spec.name,
-                    tag: asOptionalString(parsed.flags.tag) ?? null,
-                    visibility,
-                    sourceFileCount: sourceFileCount(source),
-                    upstream: upstream ?? null,
-                }, parsed, io, () => `Would create a writable benchmark from read-only origin ${origin.owner}/${origin.project}.`);
-                return 0;
-            }
-            const { project, publishedProject, origin: nextOrigin } = await createHostedBenchmarkFromSource({
-                baseUrl,
-                dir,
-                source,
-                visibility,
-                upstream,
-            });
-            writeOutput({
-                ok: true,
-                action: "create",
-                benchmark: publishedProject,
-                tag: asOptionalString(parsed.flags.tag) ?? null,
-                visibility,
-                origin: nextOrigin,
-                upstream: upstream ?? null,
-                urls: buildWorkbenchResourceUrls({
-                    baseUrl,
-                    projectId: publishedProject.id ?? project.id,
-                    owner: nextOrigin.owner,
-                    projectName: nextOrigin.project,
-                }),
-            }, parsed, io, (record) => {
-                const value = record;
-                return [
-                    `Pushed ${value.origin.owner}/${value.origin.project} (${value.origin.projectId}).`,
-                    ...(value.upstream ? [`Upstream: ${value.upstream.owner}/${value.upstream.project}`] : []),
-                    `Open benchmark: ${value.urls.benchmark}`,
-                ].join("\n");
-            });
-            return 0;
-        }
-    }
     if (dryRun) {
         writeOutput({
             ok: true,
@@ -2426,92 +2981,82 @@ async function pushBenchmark(argv, io) {
             dir,
             baseUrl,
             benchmarkId: projectId,
-            tag: asOptionalString(parsed.flags.tag) ?? null,
-            visibility,
+            remote: origin.remote,
+            benchmarkName: source.spec.name,
+            visibility: visibility ?? "unchanged",
             sourceFileCount: sourceFileCount(source),
-        }, parsed, io, () => `Would push ${sourceFileCount(source)} source file(s) to ${projectId}.`);
+            runtime: runtimeBundleStats(runtime),
+            sourceFingerprint: state.source.fingerprint,
+            runtimeFingerprint: state.base.runtimeFingerprint,
+        }, parsed, io, () => `Would push ${sourceFileCount(source)} source file(s) and runtime history to ${origin.remote}.`);
         return 0;
     }
-    const response = await apiRequest(projectApiPath(projectId, "/source"), {
+    const response = await apiRequest(projectApiPath(projectId, "/state"), {
         method: "PUT",
-        body: hostedProjectSourceRequest(source),
+        body: state,
     }, baseUrl);
-    const publishedProject = visibility === "public"
-        ? (await apiRequest(projectApiPath(response.benchmark.id, "/publish"), { method: "PUT" }, baseUrl)).benchmark
-        : response.benchmark;
-    const nextOrigin = await writeWorkbenchOrigin(dir, {
+    const responseProject = hostedProjectSummaryFromState(response.state);
+    const publishedProject = await applyRequestedProjectVisibility({
         baseUrl,
-        owner: publishedProject.ownerUsername ?? response.benchmark.ownerUsername ?? origin.owner,
-        project: publishedProject.name ?? response.benchmark.name ?? origin.project ?? source.spec.name,
-        projectId: publishedProject.id ?? response.benchmark.id,
-        writable: true,
-        sourceRevisionId: publishedProject.currentSpecVersionId ?? response.benchmark.currentSpecVersionId,
-        sourceFingerprint: response.sourceFingerprint ?? publishedProject.sourceFingerprint ?? response.benchmark.sourceFingerprint,
-        upstream: origin.upstream,
+        projectId: responseProject.id,
+        responseProject,
+        visibility,
+    });
+    const nextOrigin = await writeWorkbenchOriginFromState(dir, {
+        baseUrl,
+        state: response.state,
+        project: publishedProject,
+        sourceFingerprint: state.source.fingerprint,
     });
     writeOutput({
         ok: true,
         action: "update",
         changed: response.changed === true,
         benchmark: publishedProject,
-        tag: asOptionalString(parsed.flags.tag) ?? null,
-        visibility,
+        visibility: visibility ?? "unchanged",
         origin: nextOrigin,
+        source: response.source,
+        runtime: response.runtime.stats,
         urls: buildWorkbenchResourceUrls({
             baseUrl,
-            projectId: publishedProject.id ?? response.benchmark.id,
-            owner: nextOrigin.owner,
-            projectName: nextOrigin.project,
+            projectId: publishedProject.id ?? responseProject.id,
+            ...originRemoteUrlParts(nextOrigin),
         }),
     }, parsed, io, (record) => {
         const value = record;
         return [
-            `${value.changed ? "Pushed" : "Already up to date"} ${value.origin.owner}/${value.origin.project} (${value.origin.projectId}).`,
+            `${value.changed ? "Pushed" : "Already up to date"} ${value.origin.remote} (${value.origin.projectId}).`,
             `Open benchmark: ${value.urls.benchmark}`,
         ].join("\n");
     });
     return 0;
 }
-async function createHostedBenchmarkFromSource(args) {
-    const response = await apiRequest("/api/workbench/benchmarks", {
+async function createHostedBenchmarkFromState(args) {
+    const result = await apiRequest("/api/workbench/benchmarks/state", {
         method: "POST",
-        body: hostedProjectSourceRequest(args.source),
+        body: args.state,
     }, args.baseUrl);
-    const project = response.benchmark;
-    const publishedProject = args.visibility === "public"
-        ? (await apiRequest(projectApiPath(project.id, "/publish"), { method: "PUT" }, args.baseUrl)).benchmark
-        : project;
-    const origin = await writeWorkbenchOrigin(args.dir, {
+    const project = hostedProjectSummaryFromState(result.state);
+    const origin = await writeWorkbenchOriginFromState(args.dir, {
         baseUrl: args.baseUrl,
-        owner: publishedProject.ownerUsername ?? project.ownerUsername ?? "",
-        project: publishedProject.name ?? project.name ?? args.source.spec.name,
-        projectId: publishedProject.id ?? project.id,
-        writable: true,
-        sourceRevisionId: publishedProject.currentSpecVersionId ?? project.currentSpecVersionId,
-        sourceFingerprint: publishedProject.sourceFingerprint ?? project.sourceFingerprint,
-        ...(args.upstream ? { upstream: args.upstream } : {}),
+        state: result.state,
+        project,
+        sourceFingerprint: args.state.source.fingerprint,
     });
-    return { project, publishedProject, origin };
+    return { project, origin, result };
 }
-async function readAuthenticatedWorkbenchUsername(baseUrl) {
-    const config = await loadConfig();
-    const status = await readWorkbenchProfileStatus({ ...config, baseUrl });
-    return status.authenticated ? status.profile?.username ?? null : null;
-}
-function upstreamFromOrigin(origin) {
-    if (!origin.owner || !origin.project || !origin.projectId || !origin.sourceRevisionId) {
-        return undefined;
+async function applyRequestedProjectVisibility(args) {
+    if (args.visibility === "public") {
+        return (await apiRequest(projectApiPath(args.projectId, "/publish"), { method: "PUT" }, args.baseUrl)).benchmark;
     }
-    return {
-        owner: origin.owner,
-        project: origin.project,
-        projectId: origin.projectId,
-        sourceRevisionId: origin.sourceRevisionId,
-    };
+    if (args.visibility === "private") {
+        return (await apiRequest(projectApiPath(args.projectId, "/publish"), { method: "DELETE" }, args.baseUrl)).benchmark;
+    }
+    return args.responseProject;
 }
-function readBenchmarkVisibility(value) {
+function readOptionalBenchmarkVisibility(value) {
     if (value === undefined) {
-        return "public";
+        return undefined;
     }
     if (value === "private" || value === "public") {
         return value;
@@ -2524,41 +3069,37 @@ async function cloneProject(argv, io) {
     const ref = readRequiredBenchmarkRef(parsed);
     const outputDir = parsed.positionals[1] ?? ref.project;
     if (parsed.positionals.length > 2) {
-        throw new UsageError("workbench clone accepts OWNER/BENCHMARK[@REF] and an optional output directory.");
+        throw new UsageError("workbench clone accepts OWNER/BENCHMARK and an optional output directory.");
     }
     const baseUrl = await effectiveBaseUrl();
-    const projectResponse = await apiRequest(publicProjectApiPath(ref), {}, baseUrl);
-    const filesResponse = await apiRequest(publicProjectSourceApiPath(ref), {}, baseUrl);
+    const state = await apiRequest(publicProjectStateApiPath(ref), {}, baseUrl);
     if (parsed.flags["dry-run"] === true) {
         writeOutput({
             ok: true,
             dryRun: true,
             ref,
             outputDir,
-            fileCount: filesResponse.files.length,
+            fileCount: state.source.files.length,
+            runtime: runtimeBundleStats(state.runtime),
+            sourceFingerprint: state.source.fingerprint ?? state.base.sourceFingerprint ?? null,
+            runtimeFingerprint: state.base.runtimeFingerprint ?? null,
         }, parsed, io, () => `Would clone ${formatBenchmarkRef(ref)} to ${outputDir}.`);
         return 0;
     }
-    await syncSourceFiles(outputDir, filesResponse.files);
-    const project = projectResponse.benchmark;
-    const sourceProject = filesResponse.benchmark;
-    const origin = await writeWorkbenchOrigin(outputDir, {
+    const applied = await applyProjectStateToLocal({
+        dir: outputDir,
         baseUrl,
-        owner: sourceProject?.ownerUsername ?? project.ownerUsername,
-        project: sourceProject?.name ?? project.name,
-        projectId: sourceProject?.id ?? project.id,
-        writable: false,
-        sourceRevisionId: sourceProject?.currentSpecVersionId ?? project.currentSpecVersionId,
-        sourceFingerprint: sourceProject?.sourceFingerprint ?? project.sourceFingerprint,
+        state,
     });
     writeOutput({
         ok: true,
-        origin,
+        origin: applied.origin,
         outputDir,
-        files: filesResponse.files.length,
+        files: applied.files,
+        runtime: applied.runtime,
     }, parsed, io, (record) => {
         const value = record;
-        return `Cloned ${value.origin.owner}/${value.origin.project} to ${value.outputDir} (${value.files} file(s)).`;
+        return `Cloned ${value.origin.remote} to ${value.outputDir} (${value.files} file(s)).`;
     });
     return 0;
 }
@@ -2566,167 +3107,273 @@ async function pullProject(argv, io) {
     const parsed = parseArgs(argv);
     rejectUnknownFlags(parsed, new Set(["dir", "dry-run", "json"]));
     if (parsed.positionals.length > 0) {
-        throw new UsageError("workbench pull updates the current origin; use workbench clone OWNER/BENCHMARK[@REF] DIR for a new directory.");
+        throw new UsageError("workbench pull updates the current origin; use workbench clone OWNER/BENCHMARK DIR for a new directory.");
     }
     const dir = resolveDir(parsed);
     const origin = await requireWorkbenchOrigin(dir);
-    const filesResponse = origin.writable
-        ? await apiRequest(projectApiPath(origin.projectId, "/source"), {}, await effectiveBaseUrl(origin.baseUrl))
-        : await apiRequest(publicProjectSourceApiPath({ owner: origin.owner, project: origin.project }), {}, await effectiveBaseUrl(origin.baseUrl));
+    const baseUrl = await effectiveBaseUrl(origin.baseUrl);
+    const remoteRef = parseOriginRemote(origin);
+    const state = await apiRequest(publicProjectStateApiPath(remoteRef), {}, baseUrl);
     if (parsed.flags["dry-run"] === true) {
         writeOutput({
             ok: true,
             dryRun: true,
             dir,
-            fileCount: filesResponse.files.length,
-        }, parsed, io, () => `Would pull ${filesResponse.files.length} source file(s) into ${dir}.`);
+            fileCount: state.source.files.length,
+            runtime: runtimeBundleStats(state.runtime),
+            sourceFingerprint: state.source.fingerprint ?? state.base.sourceFingerprint ?? null,
+            runtimeFingerprint: state.base.runtimeFingerprint ?? null,
+        }, parsed, io, () => `Would pull ${state.source.files.length} source file(s) and runtime history into ${dir}.`);
         return 0;
     }
-    await syncSourceFiles(dir, filesResponse.files);
-    const sourceProject = filesResponse.benchmark;
-    const nextOrigin = await writeWorkbenchOrigin(dir, {
-        ...origin,
-        ...(sourceProject?.ownerUsername ? { owner: sourceProject.ownerUsername } : {}),
-        ...(sourceProject?.name ? { project: sourceProject.name } : {}),
-        ...(sourceProject?.id ? { projectId: sourceProject.id } : {}),
-        ...(sourceProject?.currentSpecVersionId ? { sourceRevisionId: sourceProject.currentSpecVersionId } : {}),
-        ...(sourceProject?.sourceFingerprint ? { sourceFingerprint: sourceProject.sourceFingerprint } : {}),
+    const applied = await applyProjectStateToLocal({
+        dir,
+        baseUrl,
+        state,
+        origin,
+        requireCleanSource: true,
     });
     writeOutput({
         ok: true,
-        origin: nextOrigin,
+        origin: applied.origin,
         dir,
-        files: filesResponse.files.length,
+        files: applied.files,
+        runtime: applied.runtime,
     }, parsed, io, (record) => {
         const value = record;
         return `Pulled ${value.files} source file(s) into ${value.dir}.`;
     });
     return 0;
 }
-async function fetchProject(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "json"]));
-    if (parsed.positionals.length > 0) {
-        throw new UsageError("workbench fetch updates the current remote cache; use workbench clone OWNER/BENCHMARK[@REF] DIR for a new directory.");
+async function applyProjectStateToLocal(args) {
+    if (args.requireCleanSource === true && args.origin) {
+        await assertLocalSourceMatchesOrigin(args.dir, args.origin);
     }
-    const dir = resolveDir(parsed);
-    const origin = await requireWorkbenchOrigin(dir);
-    const filesResponse = await readRemoteSourceFiles(origin);
-    const fetchRoot = path.join(dir, ".workbench", "fetch");
-    await fs.rm(fetchRoot, { force: true, recursive: true });
-    await fs.mkdir(fetchRoot, { recursive: true });
-    await writeFiles(path.join(fetchRoot, "source"), filesResponse.files);
-    const sourceProject = filesResponse.benchmark;
-    const nextOrigin = await writeWorkbenchOrigin(dir, {
-        ...origin,
-        ...(sourceProject?.ownerUsername ? { owner: sourceProject.ownerUsername } : {}),
-        ...(sourceProject?.name ? { project: sourceProject.name } : {}),
-        ...(sourceProject?.id ? { projectId: sourceProject.id } : {}),
-        ...(sourceProject?.currentSpecVersionId ? { sourceRevisionId: sourceProject.currentSpecVersionId } : {}),
-        ...(sourceProject?.sourceFingerprint ? { sourceFingerprint: sourceProject.sourceFingerprint } : {}),
+    await syncSourceFiles(args.dir, args.state.source.files);
+    const runtimeImport = await importLocalRuntimeBundle(args.dir, args.state.runtime);
+    const origin = await writeWorkbenchOriginFromState(args.dir, {
+        baseUrl: args.baseUrl,
+        state: args.state,
+        sourceFingerprint: await localSourceFingerprint(args.dir),
     });
-    await fs.writeFile(path.join(fetchRoot, "manifest.json"), `${JSON.stringify({
-        fetchedAt: new Date().toISOString(),
-        origin: nextOrigin,
-        files: filesResponse.files.map((file) => file.path),
-    }, null, 2)}\n`);
-    writeOutput({
+    return {
+        origin,
+        files: args.state.source.files.length,
+        runtime: runtimeImport.stats,
+    };
+}
+async function retryHostedWorkflow(argv, io) {
+    const parsed = parseArgs(argv);
+    rejectUnknownFlags(parsed, new Set([
+        "dir",
+        "benchmark",
+        "watch",
+        "interval-ms",
+        "timeout-ms",
+        "json",
+    ]));
+    rejectUnexpectedPositionals(parsed, "workbench retry --hosted", 1);
+    const targetId = parsed.positionals[0];
+    if (!targetId) {
+        throw new UsageError("Missing required TARGET_ID.");
+    }
+    if (parsed.flags.watch !== true && (parsed.flags["interval-ms"] !== undefined ||
+        parsed.flags["timeout-ms"] !== undefined)) {
+        throw new UsageError("--interval-ms and --timeout-ms require --watch.");
+    }
+    const target = await resolveHostedTarget(parsed, { requireProjectIdentity: true });
+    const retryTarget = await resolveHostedRetryTarget(target, targetId);
+    const watchIntervalMs = parsed.flags.watch === true
+        ? parsePositiveInt(parsed.flags["interval-ms"], 1000, "interval-ms")
+        : undefined;
+    const watchTimeoutMs = parsed.flags.watch === true
+        ? parseOptionalPositiveInt(parsed.flags["timeout-ms"], "timeout-ms")
+        : undefined;
+    const response = await apiRequest(projectApiPath(target.projectId, "/runs"), {
+        method: "POST",
+        body: retryTarget.request,
+    }, target.baseUrl);
+    const startedRun = withRunUrls(target, response.run);
+    if (parsed.flags.watch === true) {
+        if (parsed.flags.json !== true) {
+            io.stdout.write(`${formatHostedRunStarted(startedRun, retryTarget.workflow).trimEnd()}\n${HOSTED_WATCH_LIFECYCLE_NOTE}\n`);
+        }
+        const watched = await watchHostedRun({
+            parsed,
+            target,
+            runId: response.run.id,
+            intervalMs: watchIntervalMs ?? 1000,
+            timeoutMs: watchTimeoutMs,
+        });
+        const outputRun = withRunUrls(target, await withHostedRunFailureSummary(target, watched));
+        await tryImportTerminalHostedProjectState({ target, io });
+        const result = {
+            ok: hostedRunSucceeded(watched),
+            retried: {
+                id: retryTarget.sourceId,
+                kind: retryTarget.sourceKind,
+                workflow: retryTarget.workflow,
+            },
+            runId: outputRun.id,
+            candidateId: outputRun.outputCandidateId ?? outputRun.candidateId,
+            activeCandidateId: outputRun.activeCandidateId ?? null,
+            run: outputRun,
+            ...(outputRun.urls ? { urls: outputRun.urls } : {}),
+            ...(outputRun.failedJobCount !== undefined ? { failedJobCount: outputRun.failedJobCount } : {}),
+            ...(outputRun.error ? { error: outputRun.error } : {}),
+        };
+        writeOutput(result, parsed, io, formatRetryCommandResult);
+        return hostedRunSucceeded(watched) ? 0 : 1;
+    }
+    const result = {
         ok: true,
-        origin: nextOrigin,
-        dir,
-        fetchRoot,
-        files: filesResponse.files.length,
-    }, parsed, io, (record) => {
-        const value = record;
-        return `Fetched ${value.files} source file(s) into ${value.fetchRoot}.`;
-    });
+        retried: {
+            id: retryTarget.sourceId,
+            kind: retryTarget.sourceKind,
+            workflow: retryTarget.workflow,
+        },
+        runId: startedRun.id,
+        candidateId: startedRun.outputCandidateId ?? startedRun.candidateId,
+        activeCandidateId: startedRun.activeCandidateId ?? null,
+        run: startedRun,
+        ...(startedRun.urls ? { urls: startedRun.urls } : {}),
+    };
+    writeOutput(result, parsed, io, formatRetryCommandResult);
     return 0;
 }
-async function readRemoteSourceFiles(origin) {
-    return origin.writable
-        ? await apiRequest(projectApiPath(origin.projectId, "/source"), {}, await effectiveBaseUrl(origin.baseUrl))
-        : await apiRequest(publicProjectSourceApiPath({ owner: origin.owner, project: origin.project }), {}, await effectiveBaseUrl(origin.baseUrl));
+async function resolveHostedRetryTarget(target, targetId) {
+    if (targetId.startsWith("eval_")) {
+        return await resolveHostedEvaluationRetryTarget(target, targetId);
+    }
+    const detail = await readHostedRunDetail(target, targetId);
+    const run = detail.run;
+    if (run.status !== "finished") {
+        throw new UsageError(`Run ${run.id} is ${run.status}; wait for it to finish before retrying.`);
+    }
+    if (!hostedRunRecordFailed(run)) {
+        throw new UsageError(`Run ${run.id} did not fail; use workbench ${run.workflow ?? "eval"} --hosted to intentionally run it again.`);
+    }
+    if (run.workflow === "eval") {
+        const candidateId = hostedRunEvaluationCandidateId(run, detail.jobs);
+        if (!candidateId) {
+            throw new UsageError(`Run ${run.id} has no candidate id to retry.`);
+        }
+        return {
+            sourceId: targetId,
+            sourceKind: "run",
+            workflow: "eval",
+            request: {
+                workflow: "eval",
+                samples: run.samples ?? 1,
+                candidateId,
+                sourceYaml: hostedRetrySourceYaml(run, run.id),
+                preserveActive: true,
+                ...retrySampleSelectionFromJobs(detail.jobs),
+            },
+        };
+    }
+    if (run.workflow === "improve") {
+        const baseCandidateId = stringValue(readRecord(run.input)?.baseCandidateId);
+        if (!baseCandidateId) {
+            throw new UsageError(`Run ${run.id} is missing its base candidate id.`);
+        }
+        return {
+            sourceId: targetId,
+            sourceKind: "run",
+            workflow: "improve",
+            request: {
+                workflow: "improve",
+                samples: run.samples ?? 1,
+                budget: run.budget ?? run.attemptsRequested ?? 1,
+                candidateId: baseCandidateId,
+                sourceYaml: hostedRetrySourceYaml(run, run.id),
+                preserveActive: true,
+            },
+        };
+    }
+    throw new UsageError(`Run ${run.id} has no retryable workflow.`);
 }
-async function runRemoteCommand(argv, io) {
-    const command = argv[0] ?? "show";
-    switch (command) {
-        case "show":
-            return await remoteShow(argv.slice(1), io);
-        case "add":
-            return await remoteAdd(argv.slice(1), io, "add");
-        case "set-url":
-            return await remoteAdd(argv.slice(1), io, "set-url");
-        case "remove":
-            return await remoteRemove(argv.slice(1), io);
-        default:
-            throw new UsageError(`Unknown command: remote ${argv.join(" ")}`);
+async function resolveHostedEvaluationRetryTarget(target, evaluationId) {
+    const snapshot = await apiRequest(projectApiPath(target.projectId, "/workbench/snapshot"), {}, target.baseUrl);
+    const evaluation = snapshot.evaluations.find((entry) => entry.id === evaluationId);
+    if (!evaluation) {
+        throw new UsageError(`Hosted evaluation not found: ${evaluationId}`);
     }
+    const run = snapshot.runs.find((entry) => entry.id === evaluation.runId) ?? null;
+    if (!evaluationScorecardFailed(evaluation, run)) {
+        throw new UsageError(`Evaluation ${evaluation.id} did not fail; use workbench eval --hosted to intentionally run it again.`);
+    }
+    if (!run) {
+        throw new UsageError(`Evaluation ${evaluation.id} is missing its run record.`);
+    }
+    const detail = await readHostedRunDetail(target, run.id);
+    const detailedRun = detail.run;
+    return {
+        sourceId: evaluationId,
+        sourceKind: "evaluation",
+        workflow: "eval",
+        request: {
+            workflow: "eval",
+            samples: evaluation.sampleCount || detailedRun.samples || 1,
+            candidateId: evaluation.candidateId,
+            sourceYaml: hostedRetrySourceYaml(detailedRun, detailedRun.id),
+            preserveActive: true,
+            ...retrySampleSelectionFromJobs(detail.jobs),
+        },
+    };
 }
-async function remoteShow(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "json"]));
-    const origin = await requireWorkbenchOrigin(resolveDir(parsed));
-    writeOutput({ ok: true, remote: "origin", origin }, parsed, io, (record) => {
-        const value = record;
-        return [
-            `origin\t${value.origin.owner}/${value.origin.project}`,
-            `url\t${value.origin.baseUrl}`,
-            `writable\t${value.origin.writable ? "yes" : "no"}`,
-            ...(value.origin.sourceFingerprint ? [`fingerprint\t${value.origin.sourceFingerprint}`] : []),
-        ].join("\n");
-    });
-    return 0;
+function retrySampleSelectionFromJobs(jobs) {
+    const selectedSamples = uniqueCaseSamplePairs(jobs
+        .filter((job) => job.status !== "succeeded" &&
+        executionPurposeFromJobInput(job.input) === "attempt")
+        .map(caseSamplePairFromJob)
+        .filter((pair) => pair !== null));
+    return selectedSamples.length > 0
+        ? { selectedSamples }
+        : {};
 }
-async function remoteAdd(argv, io, command) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "json"]));
-    const [name, refValue] = parsed.positionals;
-    if (name !== "origin" || !refValue || parsed.positionals.length !== 2) {
-        throw new UsageError(`workbench remote ${command} accepts: origin OWNER/BENCHMARK[@REF].`);
+function uniqueCaseSamplePairs(pairs) {
+    const byKey = new Map();
+    for (const pair of pairs) {
+        byKey.set(caseSamplePairKey(pair), pair);
+    }
+    return [...byKey.values()].sort((left, right) => left.caseId.localeCompare(right.caseId) ||
+        left.sampleIndex - right.sampleIndex);
+}
+async function readHostedRunDetail(target, runId) {
+    return await apiRequest(projectApiPath(target.projectId, `/runs/${encodeURIComponent(runId)}`), {}, target.baseUrl);
+}
+async function tryImportTerminalHostedProjectState(args) {
+    const origin = args.target.origin;
+    if (!origin || origin.projectId !== args.target.projectId) {
+        return;
+    }
+    try {
+        const state = await apiRequest(projectApiPath(args.target.projectId, "/state"), {}, args.target.baseUrl);
+        await applyProjectStateToLocal({
+            dir: args.target.dir,
+            baseUrl: args.target.baseUrl,
+            state,
+            origin,
+            requireCleanSource: true,
+        });
+    }
+    catch (error) {
+        args.io.stderr.write(`Hosted run finished, but local project state was not updated: ${errorMessage(error)}\n`);
     }
-    const ref = parseBenchmarkRef(refValue);
-    const baseUrl = await effectiveBaseUrl();
-    const project = await resolveRemoteProject(formatBenchmarkRef(ref), baseUrl);
-    const origin = await writeWorkbenchOrigin(resolveDir(parsed), {
-        baseUrl,
-        owner: project.ownerUsername ?? ref.owner,
-        project: project.name ?? ref.project,
-        projectId: project.id,
-        writable: false,
-        ...(project.currentSpecVersionId ? { sourceRevisionId: project.currentSpecVersionId } : {}),
-        ...(project.sourceFingerprint ? { sourceFingerprint: project.sourceFingerprint } : {}),
-    });
-    writeOutput({ ok: true, remote: "origin", origin }, parsed, io, () => `Set origin to ${origin.owner}/${origin.project}.`);
-    return 0;
-}
-async function remoteRemove(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "json"]));
-    const [name] = parsed.positionals;
-    if (name !== "origin" || parsed.positionals.length !== 1) {
-        throw new UsageError("workbench remote remove accepts: origin.");
-    }
-    const originPath = workbenchOriginPath(resolveDir(parsed));
-    const existed = await fileIsReadable(originPath);
-    await fs.rm(originPath, { force: true });
-    writeOutput({ ok: true, remote: "origin", removed: existed, path: originPath }, parsed, io, () => existed
-        ? `Removed origin (${originPath}).`
-        : `No origin configured (${originPath}).`);
-    return 0;
 }
-async function starProject(argv, io, starred) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["json"]));
-    const ref = readRequiredBenchmarkRef(parsed);
-    if (parsed.positionals.length > 1) {
-        throw new UsageError(`${starred ? "workbench cloud star" : "workbench cloud unstar"} accepts exactly one OWNER/BENCHMARK ref.`);
+function hostedRetrySourceYaml(run, runId) {
+    const sourceYaml = stringValue(readRecord(run.input)?.sourceYaml);
+    if (!sourceYaml) {
+        throw new UsageError(`Run ${runId} is missing its recorded source configuration.`);
     }
-    const response = await apiRequest(`${publicProjectApiPath(ref)}/star`, { method: starred ? "PUT" : "DELETE" }, await effectiveBaseUrl());
-    writeOutput({ ok: true, benchmark: response.benchmark }, parsed, io, (record) => {
-        const value = record;
-        return `${starred ? "Starred" : "Unstarred"} ${formatBenchmarkRef(ref)}; ${value.benchmark.starCount} star(s).`;
-    });
-    return 0;
+    return sourceYaml;
+}
+function hostedRunRecordFailed(run) {
+    return run.outcome === "error" ||
+        run.outcome === "cancelled" ||
+        (run.failedJobCount ?? 0) > 0 ||
+        Boolean(run.error);
 }
 async function startHostedWorkflow(workflow, argv, io) {
     const parsed = parseArgs(argv);
@@ -2734,9 +3381,10 @@ async function startHostedWorkflow(workflow, argv, io) {
         "dir",
         "benchmark",
         "base",
-        "optimizer",
+        "runs",
         "budget",
         "samples",
+        "rerun",
         "watch",
         "dry-run",
         "interval-ms",
@@ -2744,44 +3392,68 @@ async function startHostedWorkflow(workflow, argv, io) {
         "json",
     ]));
     if (parsed.positionals.length > 1) {
-        throw new UsageError(`workbench cloud ${workflow} accepts at most one source file or directory argument.`);
+        throw new UsageError(`workbench ${workflow} --hosted accepts at most one source file or directory argument.`);
     }
-    const optimizerPath = asOptionalString(parsed.flags.optimizer);
-    const sourceArg = parsed.positionals[0] ?? asOptionalString(parsed.flags.dir) ?? process.cwd();
-    if (parsed.positionals.length > 0 && parsed.flags.dir !== undefined) {
-        throw new UsageError("Use either --dir or SOURCE, not both.");
+    const sourceArg = resolveSourceDir(parsed);
+    const samples = parsePositiveInt(parsed.flags.samples, 1, "samples");
+    const budget = workflow === "improve"
+        ? parsePositiveInt(parsed.flags.budget, 1, "budget")
+        : undefined;
+    if (parsed.flags.watch !== true && (parsed.flags["interval-ms"] !== undefined ||
+        parsed.flags["timeout-ms"] !== undefined)) {
+        throw new UsageError("--interval-ms and --timeout-ms require --watch.");
     }
-    const baseSubjectId = asOptionalString(parsed.flags.base);
+    const runsFlag = asOptionalString(parsed.flags.runs);
+    const defaultProjectSource = await readLocalProjectSource(path.resolve(sourceArg));
+    const selectedRunIds = workflow === "eval"
+        ? resolveCandidateRunSelection(defaultProjectSource, runsFlag)
+        : [singleRequestedRunId(runsFlag, `workbench ${workflow} --hosted`) ?? defaultProjectSource.candidateRunId];
+    if (workflow === "eval" && selectedRunIds.length > 1) {
+        let failed = 0;
+        const results = [];
+        for (const runId of selectedRunIds) {
+            const captured = createCapturingIo(io);
+            const code = await startHostedWorkflow(workflow, hostedWorkflowArgsForRun({
+                parsed,
+                sourceDir: defaultProjectSource.dir,
+                runId,
+            }), captured.io);
+            if (code !== 0) {
+                failed += 1;
+            }
+            results.push(parseCapturedJson(captured.stdoutText()));
+        }
+        writeOutput({
+            ok: failed === 0,
+            candidateRunIds: selectedRunIds,
+            failedRunCount: failed,
+            results,
+        }, parsed, io, () => `Processed ${selectedRunIds.length} hosted candidate run(s); ${failed} failed.`);
+        return failed === 0 ? 0 : 1;
+    }
+    const baseCandidateId = asOptionalString(parsed.flags.base);
     const request = workflow === "improve"
         ? {
             workflow,
-            budget: parsePositiveInt(parsed.flags.budget, 1, "budget"),
-            samples: parsePositiveInt(parsed.flags.samples, 1, "samples"),
-            ...(baseSubjectId ? { subjectId: baseSubjectId } : {}),
+            budget,
+            samples,
+            ...(baseCandidateId ? { candidateId: baseCandidateId } : {}),
         }
         : {
             workflow,
-            samples: parsePositiveInt(parsed.flags.samples, 1, "samples"),
-            ...(baseSubjectId ? { subjectId: baseSubjectId } : {}),
+            samples,
+            ...(baseCandidateId ? { candidateId: baseCandidateId } : {}),
         };
-    if (workflow === "improve" && !optimizerPath) {
-        throw new UsageError("workbench cloud improve requires --optimizer OPTIMIZER_YAML.");
+    const projectSource = selectedRunIds[0] === defaultProjectSource.candidateRunId
+        ? defaultProjectSource
+        : await readLocalProjectSource(path.resolve(sourceArg), { runId: selectedRunIds[0] });
+    request.sourceYaml = projectSource.specSource;
+    request.adapterFiles = projectSource.adapterFiles;
+    if (workflow === "eval" && !baseCandidateId) {
+        request.candidateFiles = projectSource.candidateFiles;
     }
-    if (parsed.flags.watch !== true && (parsed.flags["interval-ms"] !== undefined ||
-        parsed.flags["timeout-ms"] !== undefined)) {
-        throw new UsageError("--interval-ms and --timeout-ms require --watch.");
-    }
-    const projectSource = await readLocalProjectSource(path.resolve(sourceArg), {
-        optimizerPath,
-    });
-    if (workflow === "eval") {
-        request.subjectSource = projectSource.subjectSource;
-        request.subjectFiles = projectSource.subjectFiles;
-        request.adapterFiles = projectSource.adapterFiles;
-    }
-    if (workflow === "improve" && projectSource.optimizerSource) {
-        request.optimizerSource = projectSource.optimizerSource;
-        request.adapterFiles = projectSource.adapterFiles;
+    if (parsed.flags.rerun === true) {
+        request.rerun = true;
     }
     const watchIntervalMs = parsed.flags.watch === true
         ? parsePositiveInt(parsed.flags["interval-ms"], 1000, "interval-ms")
@@ -2808,13 +3480,16 @@ async function startHostedWorkflow(workflow, argv, io) {
         sourceDir: projectSource.dir,
     });
     if (workflow === "improve") {
-        request.subjectId = await ensureHostedImproveBaseSubject({
+        request.candidateId = await ensureHostedImproveBaseCandidate({
             parsed,
             target,
             samples: request.samples,
-            subjectId: baseSubjectId,
+            candidateId: baseCandidateId,
+            sourceYaml: projectSource.specSource,
+            adapterFiles: projectSource.adapterFiles,
             intervalMs: watchIntervalMs ?? 1000,
             timeoutMs: watchTimeoutMs,
+            io,
         });
     }
     const response = await apiRequest(projectApiPath(target.projectId, "/runs"), {
@@ -2822,6 +3497,20 @@ async function startHostedWorkflow(workflow, argv, io) {
         body: request,
     }, target.baseUrl);
     const startedRun = withRunUrls(target, response.run);
+    const startedRunOutput = response.reused === true
+        ? { ...startedRun, reused: true }
+        : startedRun;
+    if (response.reused === true && response.run.status === "finished") {
+        await tryImportTerminalHostedProjectState({ target, io });
+        writeOutput({
+            ok: hostedRunSucceeded(response.run),
+            reused: true,
+            workflow,
+            runId: startedRun.id,
+            ...startedRun,
+        }, parsed, io, () => `Reused hosted ${workflow} ${startedRun.id}. Use --rerun to intentionally run it again.`);
+        return hostedRunSucceeded(response.run) ? 0 : 1;
+    }
     if (parsed.flags.watch === true) {
         if (parsed.flags.json !== true) {
             io.stdout.write(`${formatHostedRunStarted(startedRun, workflow).trimEnd()}\n${HOSTED_WATCH_LIFECYCLE_NOTE}\n`);
@@ -2834,26 +3523,27 @@ async function startHostedWorkflow(workflow, argv, io) {
             timeoutMs: watchTimeoutMs,
         });
         const outputRun = await withHostedRunFailureSummary(target, watched);
+        await tryImportTerminalHostedProjectState({ target, io });
         writeOutput(withRunUrls(target, outputRun), parsed, io, formatHostedRunResult);
         return hostedRunSucceeded(watched) ? 0 : 1;
     }
-    writeOutput(startedRun, parsed, io, (run) => formatHostedRunStarted(run, workflow).trimEnd());
+    writeOutput(startedRunOutput, parsed, io, (run) => formatHostedRunStarted(run, workflow).trimEnd());
     return 0;
 }
-async function ensureHostedImproveBaseSubject(args) {
-    if (args.subjectId) {
-        const subject = await readHostedSubjectSummary(args.target, args.subjectId);
-        if (!subject) {
-            throw new UsageError(`Base subject ${args.subjectId} was not found for the current benchmark.`);
+async function ensureHostedImproveBaseCandidate(args) {
+    if (args.candidateId) {
+        const candidate = await readHostedCandidateSummary(args.target, args.candidateId);
+        if (!candidate) {
+            throw new UsageError(`Base candidate ${args.candidateId} was not found for the current benchmark.`);
         }
-        if (hostedSubjectIsEvaluated(subject)) {
-            return args.subjectId;
+        if (hostedCandidateIsEvaluated(candidate)) {
+            return args.candidateId;
         }
     }
     else {
-        const activeSubject = await readEvaluatedActiveHostedSubject(args.target);
-        if (activeSubject) {
-            return activeSubject.id;
+        const activeCandidate = await readEvaluatedActiveHostedCandidate(args.target);
+        if (activeCandidate) {
+            return activeCandidate.id;
         }
     }
     const response = await apiRequest(projectApiPath(args.target.projectId, "/runs"), {
@@ -2861,7 +3551,9 @@ async function ensureHostedImproveBaseSubject(args) {
         body: {
             workflow: "eval",
             samples: args.samples,
-            ...(args.subjectId ? { subjectId: args.subjectId } : {}),
+            ...(args.candidateId ? { candidateId: args.candidateId } : {}),
+            sourceYaml: args.sourceYaml,
+            ...(args.adapterFiles.length > 0 ? { adapterFiles: args.adapterFiles } : {}),
         },
     }, args.target.baseUrl);
     const watched = await watchHostedRun({
@@ -2872,333 +3564,59 @@ async function ensureHostedImproveBaseSubject(args) {
         timeoutMs: args.timeoutMs,
     });
     if (!hostedRunSucceeded(watched)) {
-        throw new UsageError(`Parent subject eval ${watched.id} failed; improve was not started.`);
+        throw new UsageError(`Parent candidate eval ${watched.id} failed; improve was not started.`);
     }
-    if (!watched.subjectId) {
-        throw new UsageError(`Parent subject eval ${watched.id} did not produce a subject.`);
-    }
-    return watched.subjectId;
-}
-async function readHostedSubjectSummary(target, subjectId) {
-    const response = await apiRequest(projectApiPath(target.projectId, "/subjects"), {}, target.baseUrl);
-    return response.subjects.find((entry) => entry.id === subjectId) ?? null;
-}
-async function readEvaluatedActiveHostedSubject(target) {
-    const response = await apiRequest(projectApiPath(target.projectId), {}, target.baseUrl);
-    const activeSubjectId = response.benchmark.activeSubjectId;
-    if (!activeSubjectId) {
-        return null;
+    if (!watched.candidateId) {
+        throw new UsageError(`Parent candidate eval ${watched.id} did not produce a candidate.`);
     }
-    const subject = await readHostedSubjectSummary(target, activeSubjectId);
-    return subject && hostedSubjectIsEvaluated(subject) ? subject : null;
-}
-function hostedSubjectIsEvaluated(subject) {
-    return subject.status === "evaluated" || subject.eval != null;
+    await tryImportTerminalHostedProjectState({ target: args.target, io: args.io });
+    return watched.candidateId;
 }
-async function benchmarkList(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud benchmarks list", 0);
-    const response = await apiRequest("/api/workbench/public/benchmarks");
-    writeOutput(response.benchmarks, parsed, io, (projects) => {
-        if (projects.length === 0) {
-            return "No hosted Workbench benchmarks.";
-        }
-        return projects
-            .map((project) => `${project.id}\t${project.name}\t${project.runCount} runs\t${project.subjectCount} subjects`)
-            .join("\n");
-    });
-    return 0;
-}
-async function benchmarkShow(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud benchmarks show", 1);
-    const dir = resolveDir(parsed);
-    const origin = await readWorkbenchOrigin(dir);
-    const projectRef = parsed.positionals[0] ??
-        origin?.projectId;
-    if (!projectRef) {
-        throw new UsageError("Missing hosted benchmark. Pass OWNER/BENCHMARK, run workbench push, or run workbench clone.");
-    }
-    const response = await apiRequest(benchmarkApiPath(projectRef), {}, await effectiveBaseUrl(origin?.baseUrl));
-    writeOutput(response.benchmark, parsed, io, (project) => {
-        const record = project;
-        return `${record.name} (${record.id})\n${record.runs.length} runs\n${record.subjects.length} subjects`;
-    });
-    return 0;
-}
-async function benchmarkDelete(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "dry-run", "json"]));
-    if (parsed.positionals.length > 1) {
-        throw new UsageError(`Unexpected argument for workbench benchmarks delete: ${parsed.positionals.slice(1).join(" ")}`);
+function hostedWorkflowArgsForRun(args) {
+    const next = ["--dir", args.sourceDir, "--runs", args.runId, "--json"];
+    appendStringFlag(next, "benchmark", asOptionalString(args.parsed.flags.benchmark));
+    appendStringFlag(next, "base", asOptionalString(args.parsed.flags.base));
+    appendStringFlag(next, "samples", asOptionalString(args.parsed.flags.samples));
+    appendStringFlag(next, "budget", asOptionalString(args.parsed.flags.budget));
+    appendStringFlag(next, "interval-ms", asOptionalString(args.parsed.flags["interval-ms"]));
+    appendStringFlag(next, "timeout-ms", asOptionalString(args.parsed.flags["timeout-ms"]));
+    if (args.parsed.flags.watch === true) {
+        next.push("--watch");
     }
-    const dir = resolveDir(parsed);
-    const origin = await readWorkbenchOrigin(dir);
-    const projectRef = parsed.positionals[0] ??
-        origin?.projectId;
-    if (!projectRef) {
-        throw new UsageError("Missing hosted benchmark. Pass OWNER/BENCHMARK, run workbench push, or run workbench clone.");
-    }
-    const originPath = workbenchOriginPath(dir);
-    const baseUrl = await effectiveBaseUrl(origin?.baseUrl);
-    if (parsed.flags["dry-run"] === true) {
-        const originProjectDeleted = originMatchesProjectRef(origin, projectRef);
-        writeOutput({
-            ok: true,
-            dryRun: true,
-            projectRef,
-            ...(isRemoteProjectId(projectRef) ? { projectId: projectRef } : {}),
-            ...(originProjectDeleted && origin?.project ? { projectName: origin.project } : {}),
-            baseUrl,
-            ...(originProjectDeleted ? { originPath } : {}),
-        }, parsed, io, () => originProjectDeleted
-            ? `Would delete hosted benchmark ${projectRef} and remove local origin ${originPath}.`
-            : `Would delete hosted benchmark ${projectRef}.`);
-        return 0;
+    if (args.parsed.flags["dry-run"] === true) {
+        next.push("--dry-run");
     }
-    const project = await resolveRemoteProject(projectRef, baseUrl);
-    const projectId = project.id;
-    const projectName = project.name;
-    const originProjectDeleted = origin ? origin.projectId === projectId : false;
-    await apiRequest(projectApiPath(projectId), { method: "DELETE" }, baseUrl);
-    if (originProjectDeleted) {
-        await fs.rm(originPath, { force: true });
+    if (args.parsed.flags.rerun === true) {
+        next.push("--rerun");
     }
-    writeOutput({
-        ok: true,
-        deleted: true,
-        projectId,
-        ...(projectName ? { projectName } : {}),
-        originRemoved: originProjectDeleted,
-        ...(originProjectDeleted ? { originPath } : {}),
-    }, parsed, io, () => originProjectDeleted
-        ? `Deleted benchmark ${formatProjectRef(project)} and removed local origin ${originPath}.`
-        : `Deleted benchmark ${formatProjectRef(project)}.`);
-    return 0;
-}
-async function benchmarkVersions(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud benchmarks versions", 1);
-    const projectRef = parsed.positionals[0];
-    const origin = await readWorkbenchOrigin(resolveDir(parsed));
-    if (!projectRef && !origin) {
-        throw new UsageError("Missing benchmark ref. Pass OWNER/BENCHMARK or run from a benchmark clone.");
-    }
-    const response = await apiRequest(benchmarkApiPath(projectRef ?? origin.projectId), {}, await effectiveBaseUrl(origin?.baseUrl));
-    const version = response.benchmark.sourceFingerprint ?? response.benchmark.currentSpecVersionId ?? "current";
-    writeOutput({
-        ok: true,
-        benchmark: response.benchmark,
-        versions: [{ ref: "main", digest: version, current: true }],
-    }, parsed, io, () => `${response.benchmark.name ?? projectRef ?? origin.project}\tmain\t${shortDigest(version)}\tcurrent`);
-    return 0;
-}
-async function benchmarkStarred(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud benchmarks starred", 0);
-    const response = await apiRequest("/api/workbench/benchmarks");
-    const starred = response.benchmarks.filter((project) => project.viewerHasStarred === true);
-    writeOutput(starred, parsed, io, (benchmarks) => {
-        if (benchmarks.length === 0) {
-            return "No starred benchmarks.";
-        }
-        return benchmarks
-            .map((benchmark) => `${benchmark.ownerUsername ?? "-"} / ${benchmark.name ?? "-"}\t${benchmark.starCount ?? 0} stars`)
-            .join("\n");
-    });
-    return 0;
-}
-async function subjectList(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud subjects list", 0);
-    const target = await resolveHostedTarget(parsed);
-    const response = await apiRequest(projectApiPath(target.projectId, "/subjects"), {}, target.baseUrl);
-    writeOutput(response.subjects, parsed, io, (subjects) => {
-        if (subjects.length === 0) {
-            return "No subjects yet.";
-        }
-        return subjects
-            .map((subject) => `${subject.id}\t${subject.status}\tmetrics ${formatMetricSummary(subject.metrics)}\t${subject.fileChanges?.length ?? 0} files`)
-            .join("\n");
-    });
-    return 0;
-}
-async function subjectShow(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud subjects show", 1);
-    const target = await resolveHostedTarget(parsed);
-    const subjectId = readRequiredSubjectId(parsed);
-    const params = new URLSearchParams({ id: subjectId });
-    const subject = await apiRequest(projectApiPath(target.projectId, `/workbench/record?${params.toString()}`), {}, target.baseUrl);
-    writeOutput(subject, parsed, io, (record) => {
-        const value = record;
-        return [
-            `${value.id ?? subjectId}\t${value.status ?? "unknown"}`,
-            ...(value.benchmarkFingerprint ? [`Benchmark version: ${shortDigest(value.benchmarkFingerprint)}`] : []),
-            ...(value.subjectFingerprint ? [`Subject digest: ${shortDigest(value.subjectFingerprint)}`] : []),
-        ].join("\n");
-    });
-    return 0;
-}
-async function subjectFiles(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud subjects files", 1);
-    const target = await resolveHostedTarget(parsed);
-    const subjectId = readRequiredSubjectId(parsed);
-    const response = await apiRequest(projectApiPath(target.projectId, `/subjects/${encodeURIComponent(subjectId)}/files`), {}, target.baseUrl);
-    writeOutput(response.files, parsed, io, (files) => files
-        .map((file) => `${file.path}\t${file.status}\t${file.preview_kind}`)
-        .join("\n") || "No files.");
-    return 0;
+    return next;
 }
-async function subjectPreview(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "path", "output", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud subjects preview", 1);
-    const target = await resolveHostedTarget(parsed);
-    const subjectId = readRequiredSubjectId(parsed);
-    const filePath = requireFlag(parsed, "path");
-    const params = new URLSearchParams({ path: filePath });
-    const response = await apiRequest(projectApiPath(target.projectId, `/subjects/${encodeURIComponent(subjectId)}/files?${params.toString()}`), {}, target.baseUrl);
-    const content = response.preview.source?.content ??
-        response.preview.rendered_html ??
-        response.preview.diff ??
-        "";
-    const outputPath = asOptionalString(parsed.flags.output);
-    if (outputPath && outputPath !== "-") {
-        await fs.writeFile(outputPath, content);
-        io.stdout.write(`Wrote preview to ${outputPath}\n`);
+function appendStringFlag(args, name, value) {
+    if (value !== undefined) {
+        args.push(`--${name}`, value);
     }
-    else if (parsed.flags.json === true) {
-        writeJson(response.preview, io);
-    }
-    else {
-        io.stdout.write(content);
-    }
-    return 0;
-}
-async function subjectExport(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "out", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud subjects pull", 1);
-    const target = await resolveHostedTarget(parsed);
-    const subjectId = readRequiredSubjectId(parsed);
-    const outputDir = requireOutDir(parsed);
-    const response = await apiRequest(projectApiPath(target.projectId, `/subjects/${encodeURIComponent(subjectId)}/export`), {}, target.baseUrl);
-    await writeFiles(outputDir, response.files);
-    writeOutput({ ok: true, outputDir, files: response.files.length }, parsed, io, (result) => {
-        const record = result;
-        return `Exported ${record.files} file(s) to ${record.outputDir}`;
-    });
-    return 0;
-}
-async function subjectVisibility(argv, io, visibility) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, `workbench cloud subjects ${visibility === "public" ? "publish" : "unpublish"}`, 1);
-    const target = await resolveHostedTarget(parsed, { requireProjectIdentity: true });
-    const subjectId = readRequiredSubjectId(parsed);
-    const response = await apiRequest(projectApiPath(target.projectId, `/subjects/${encodeURIComponent(subjectId)}/publish`), { method: visibility === "public" ? "PUT" : "DELETE" }, target.baseUrl);
-    writeOutput({ ok: true, visibility, subject: response.subject }, parsed, io, () => `${visibility === "public" ? "Published" : "Unpublished"} subject ${subjectId}.`);
-    return 0;
-}
-async function runList(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud runs list", 0);
-    const target = await resolveHostedTarget(parsed);
-    const response = await apiRequest(projectApiPath(target.projectId, "/runs"), {}, target.baseUrl);
-    writeOutput(response.runs, parsed, io, (runs) => runs
-        .map((run) => `${run.id}\t${run.status}\t${run.subjectId ?? "pending"}`)
-        .join("\n") || "No runs.");
-    return 0;
-}
-async function runShow(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud runs show", 1);
-    const target = await resolveHostedTarget(parsed, { requireProjectIdentity: true });
-    const runId = readRequiredRunId(parsed);
-    const response = await apiRequest(projectApiPath(target.projectId, `/runs/${encodeURIComponent(runId)}`), {}, target.baseUrl);
-    const detail = withRunDetailUrls(target, response);
-    writeOutput(detail, parsed, io, formatRunDetail);
-    return 0;
-}
-async function runCancel(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud runs cancel", 1);
-    const target = await resolveHostedTarget(parsed, { requireProjectIdentity: true });
-    const runId = readRequiredRunId(parsed);
-    const response = await apiRequest(projectApiPath(target.projectId, `/runs/${encodeURIComponent(runId)}`), { method: "DELETE" }, target.baseUrl);
-    const run = withRunUrls(target, response.run);
-    writeOutput(run, parsed, io, (record) => {
-        const value = record;
-        return [
-            `Cancelled run ${value.id}; status ${value.status}; outcome ${value.outcome ?? "cancelled"}.`,
-            `Open benchmark: ${value.urls?.benchmark ?? buildWorkbenchResourceUrls(target).benchmark}`,
-        ].join("\n");
-    });
-    return 0;
 }
-async function runWatch(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "interval-ms", "timeout-ms", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud watch", 1);
-    const target = await resolveHostedTarget(parsed, { requireProjectIdentity: true });
-    const runId = readRequiredRunId(parsed);
-    if (parsed.flags.json !== true) {
-        io.stdout.write(`Watching run ${runId}.\n${HOSTED_WATCH_LIFECYCLE_NOTE}\n`);
-    }
-    const run = await watchHostedRun({
-        parsed,
-        target,
-        runId,
-        intervalMs: parsePositiveInt(parsed.flags["interval-ms"], 1000, "interval-ms"),
-        timeoutMs: parseOptionalPositiveInt(parsed.flags["timeout-ms"], "timeout-ms"),
-    });
-    const outputRun = await withHostedRunFailureSummary(target, run);
-    writeOutput(withRunUrls(target, outputRun), parsed, io, formatHostedRunResult);
-    return hostedRunSucceeded(run) ? 0 : 1;
+async function readHostedCandidateSummary(target, candidateId) {
+    const response = await apiRequest(projectApiPath(target.projectId, "/candidates"), {}, target.baseUrl);
+    return response.candidates.find((entry) => entry.id === candidateId) ?? null;
 }
-async function runLogs(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud logs", 1);
-    const target = await resolveHostedTarget(parsed);
-    const requestedRunId = parsed.positionals[0];
-    if (requestedRunId) {
-        const response = await apiRequest(projectApiPath(target.projectId, `/runs/${encodeURIComponent(requestedRunId)}`), {}, target.baseUrl);
-        writeOutput({ runId: response.run.id, jobs: response.jobs }, parsed, io, formatRunLogs);
-        return 0;
-    }
-    const project = (await apiRequest(projectApiPath(target.projectId), {}, target.baseUrl)).project;
-    const runId = project.runs.at(-1)?.id;
-    if (!runId) {
-        throw new UsageError("Missing RUN_ID; the benchmark has no runs.");
+async function readEvaluatedActiveHostedCandidate(target) {
+    const response = await apiRequest(projectApiPath(target.projectId), {}, target.baseUrl);
+    const activeCandidateId = response.benchmark.activeCandidateId;
+    if (!activeCandidateId) {
+        return null;
     }
-    const jobs = project.jobs.filter((job) => job.runId === runId);
-    writeOutput({ runId, jobs }, parsed, io, formatRunLogs);
-    return 0;
+    const candidate = await readHostedCandidateSummary(target, activeCandidateId);
+    return candidate && hostedCandidateIsEvaluated(candidate) ? candidate : null;
 }
-function formatRunLogs(record) {
-    const value = record;
-    return (value.jobs
-        .map((job) => `${job.id}\t${job.kind}\t${job.status}\t${job.subjectId ?? "-"}${job.error ? `\t${job.error}` : ""}`)
-        .join("\n") || `No jobs for ${value.runId}.`);
+function hostedCandidateIsEvaluated(candidate) {
+    return candidate.status === "evaluated" || candidate.eval != null;
 }
 async function openWorkbench(argv, io) {
     const parsed = parseArgs(argv);
     rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "no-open", "json"]));
     if (parsed.positionals.length > 1) {
-        throw new UsageError(`Unexpected argument for workbench open: ${parsed.positionals.slice(1).join(" ")}`);
+        throw new UsageError(`Unexpected argument for workbench open --hosted: ${parsed.positionals.slice(1).join(" ")}`);
     }
     const target = await resolveOpenTarget(parsed);
     const ref = target.openRef;
@@ -3226,7 +3644,7 @@ function buildWorkbenchWebUrl(target, ref) {
     if (ref.startsWith("run_")) {
         return benchmarkUrl;
     }
-    return buildWorkbenchResourceUrls(target, { subjectId: ref }).subjectEvaluation;
+    return buildWorkbenchResourceUrls(target, { candidateId: ref }).candidateEvaluation;
 }
 async function resolveHostedTarget(parsed, options = {}) {
     if (options.sourceArg !== undefined && parsed.flags.dir !== undefined) {
@@ -3253,11 +3671,12 @@ async function resolveHostedTarget(parsed, options = {}) {
     if (!projectId) {
         throw new UsageError("Missing hosted benchmark. Run workbench push, workbench clone, or pass --benchmark OWNER/BENCHMARK.");
     }
+    const originRemote = origin ? parseOriginRemote(origin) : null;
     return {
         projectId,
-        ...(!explicitProject && origin?.owner ? { owner: origin.owner } : {}),
-        ...(!explicitProject && origin?.project
-            ? { projectName: origin.project }
+        ...(!explicitProject && originRemote ? { owner: originRemote.owner } : {}),
+        ...(!explicitProject && originRemote
+            ? { projectName: originRemote.project }
             : {}),
         dir,
         baseUrl,
@@ -3295,13 +3714,12 @@ async function resolveHostedDryRunTarget(parsed, options = {}) {
         };
     }
     if (origin?.projectId) {
+        const originRemote = parseOriginRemote(origin);
         return {
-            projectRef: origin.owner && origin.project
-                ? `${origin.owner}/${origin.project}`
-                : origin.projectId,
+            projectRef: origin.remote,
             projectId: origin.projectId,
-            ...(origin.owner ? { owner: origin.owner } : {}),
-            ...(origin.project ? { projectName: origin.project } : {}),
+            owner: originRemote.owner,
+            projectName: originRemote.project,
             dir,
             baseUrl,
             origin,
@@ -3313,7 +3731,7 @@ async function resolveOpenTarget(parsed) {
     const ref = parsed.positionals[0];
     if (ref &&
         !ref.startsWith("run_") &&
-        !ref.startsWith("subject_")) {
+        !ref.startsWith("candidate_")) {
         const baseUrl = await effectiveBaseUrl();
         if (ref.includes("/")) {
             const parsedRef = parseBenchmarkRef(ref);
@@ -3347,51 +3765,44 @@ function buildWorkbenchResourceUrls(target, refs = {}) {
     const projectRef = `${encodeURIComponent(target.owner)}/${encodeURIComponent(target.projectName)}`;
     const benchmark = `${target.baseUrl}/benchmarks/${projectRef}`;
     const urls = { benchmark };
-    if (refs.subjectId) {
+    if (refs.candidateId) {
         const evaluationId = refs.runId
-            ? evaluationScorecardId(refs.runId, refs.subjectId)
+            ? evaluationScorecardId(refs.runId, refs.candidateId)
             : null;
-        urls.subjectEvaluation = evaluationId
-            ? `${benchmark}/subjects/${encodeURIComponent(refs.subjectId)}?evaluation=${encodeURIComponent(evaluationId)}`
-            : `${benchmark}/subjects/${encodeURIComponent(refs.subjectId)}`;
+        urls.candidateEvaluation = evaluationId
+            ? `${benchmark}/candidates/${encodeURIComponent(refs.candidateId)}?evaluation=${encodeURIComponent(evaluationId)}`
+            : `${benchmark}/candidates/${encodeURIComponent(refs.candidateId)}`;
     }
     return urls;
 }
 function projectApiPath(projectRef, suffix = "") {
     return `/api/workbench/benchmarks/${encodeURIComponent(projectRef)}${suffix}`;
 }
-function benchmarkApiPath(benchmarkRef) {
-    if (benchmarkRef.includes("/")) {
-        return publicProjectApiPath(parseBenchmarkRef(benchmarkRef));
-    }
-    return projectApiPath(benchmarkRef);
-}
 function publicProjectApiPath(ref) {
     return `/api/workbench/public/benchmarks/${encodeURIComponent(ref.owner)}/${encodeURIComponent(ref.project)}`;
 }
-function publicProjectSourceApiPath(ref) {
-    return `${publicProjectApiPath(ref)}/source`;
+function publicProjectStateApiPath(ref) {
+    return `${publicProjectApiPath(ref)}/state`;
 }
 function readRequiredBenchmarkRef(parsed) {
     const ref = parsed.positionals[0];
     if (!ref) {
-        throw new UsageError("Missing required OWNER/BENCHMARK ref.");
+        throw new UsageError("Missing required OWNER/BENCHMARK.");
     }
     return parseBenchmarkRef(ref);
 }
 function parseBenchmarkRef(value) {
-    const [namePart, versionRef, extraRef] = value.split("@");
-    if (extraRef !== undefined || !namePart) {
-        throw new UsageError("Benchmark refs must use OWNER/BENCHMARK[@REF].");
+    if (value.includes("@")) {
+        throw new UsageError("Benchmark refs must use OWNER/BENCHMARK.");
     }
-    const [owner, project, extra] = namePart.split("/");
+    const [owner, project, extra] = value.split("/");
     if (!owner || !project || extra !== undefined) {
-        throw new UsageError("Benchmark refs must use OWNER/BENCHMARK[@REF].");
+        throw new UsageError("Benchmark refs must use OWNER/BENCHMARK.");
     }
-    return { owner, project, ...(versionRef ? { ref: versionRef } : {}) };
+    return { owner, project };
 }
 function formatBenchmarkRef(ref) {
-    return `${ref.owner}/${ref.project}${ref.ref ? `@${ref.ref}` : ""}`;
+    return `${ref.owner}/${ref.project}`;
 }
 async function resolveRemoteProject(projectRef, baseUrl) {
     if (projectRef.includes("/")) {
@@ -3402,52 +3813,84 @@ async function resolveRemoteProject(projectRef, baseUrl) {
     const response = await apiRequest(projectApiPath(projectRef), {}, baseUrl);
     return response.benchmark;
 }
-function formatProjectRef(project) {
-    return project.name ? `${project.name} (${project.id})` : project.id;
-}
-function originMatchesProjectRef(origin, projectRef) {
-    if (!origin) {
-        return false;
-    }
-    if (origin.projectId === projectRef) {
-        return true;
-    }
-    if (!projectRef.includes("/")) {
-        return false;
-    }
-    const ref = parseBenchmarkRef(projectRef);
-    return origin.owner === ref.owner && origin.project === ref.project;
-}
 function withRunUrls(target, run) {
     return {
         ...run,
         urls: buildWorkbenchResourceUrls(target, {
             runId: run.id,
-            subjectId: run.outputSubjectId ?? run.subjectId,
+            candidateId: run.outputCandidateId ?? run.candidateId,
         }),
     };
 }
-function withRunDetailUrls(target, detail) {
-    const subjectId = hostedRunEvaluationSubjectId(detail.run, detail.jobs);
-    const run = withRunUrls(target, {
-        ...detail.run,
-        outputSubjectId: detail.run.outputSubjectId ?? subjectId,
-    });
+function hostedRunEvaluationCandidateId(run, jobs = []) {
+    if (run.outputCandidateId) {
+        return run.outputCandidateId;
+    }
+    const attemptCandidates = jobs
+        .filter((job) => readRunJobPurpose(job) === "attempt")
+        .map((job) => job.candidateId)
+        .filter((candidateId) => Boolean(candidateId));
+    return attemptCandidates.at(-1) ?? run.candidateId ?? null;
+}
+function localProjectState(args) {
+    const stateSource = localProjectStateSource(args.source);
+    const runtimeFingerprint = workbenchRuntimeBundleFingerprint(args.runtime);
     return {
-        run,
-        jobs: detail.jobs,
-        urls: run.urls ?? buildWorkbenchResourceUrls(target, { runId: run.id }),
+        schema: "workbench.project.state.v1",
+        project: {
+            id: args.origin?.projectId ?? "",
+            remote: args.origin?.remote ?? `local/${args.source.spec.name}`,
+            ownerUsername: args.origin ? parseOriginRemote(args.origin).owner : "local",
+            name: args.origin ? parseOriginRemote(args.origin).project : args.source.spec.name,
+            visibility: args.visibility,
+        },
+        base: {
+            ...(args.origin ? { sourceRevisionId: args.origin.sourceRevisionId } : {}),
+            ...(args.origin ? { sourceFingerprint: args.origin.sourceFingerprint } : {}),
+            runtimeFingerprint: args.origin?.runtimeFingerprint ?? runtimeFingerprint,
+        },
+        source: stateSource,
+        runtime: args.runtime,
     };
 }
-function hostedRunEvaluationSubjectId(run, jobs = []) {
-    if (run.outputSubjectId) {
-        return run.outputSubjectId;
-    }
-    const attemptSubjects = jobs
-        .filter((job) => readRunJobPurpose(job) === "attempt")
-        .map((job) => job.subjectId)
-        .filter((subjectId) => Boolean(subjectId));
-    return attemptSubjects.at(-1) ?? run.subjectId ?? null;
+function localProjectStateSource(source) {
+    const request = hostedProjectSourceRequest(source);
+    const stateSource = {
+        source: request.source,
+        files: source.sourceFiles.map((file) => ({ ...file })),
+        candidateFiles: request.candidateFiles.map(toSurfaceSnapshotFile),
+        engineResolveFiles: request.engineResolveFiles.map(toSurfaceSnapshotFile),
+        engineResolveBinding: request.engineResolveBinding,
+        adapterFiles: request.adapterFiles.map(toSurfaceSnapshotFile),
+        dockerfile: request.dockerfile,
+        runtimeDockerfile: request.runtimeDockerfile,
+        runtimeFiles: request.runtimeFiles.map(toSurfaceSnapshotFile),
+        network: request.network,
+        resources: { ...request.resources },
+    };
+    return {
+        ...stateSource,
+        fingerprint: workbenchProjectSourceFingerprint(stateSource),
+    };
+}
+function toSurfaceSnapshotFile(file) {
+    return {
+        path: file.path,
+        kind: "kind" in file ? file.kind : file.encoding === "base64" ? "binary" : "text",
+        encoding: file.encoding ?? "utf8",
+        content: file.content,
+        executable: file.executable === true,
+    };
+}
+function hostedProjectSummaryFromState(state) {
+    return {
+        id: state.project.id,
+        ownerUsername: state.project.ownerUsername,
+        name: state.project.name,
+        visibility: state.project.visibility,
+        currentSpecVersionId: state.source.revisionId ?? state.base.sourceRevisionId,
+        sourceFingerprint: state.source.fingerprint ?? state.base.sourceFingerprint,
+    };
 }
 function sourceFileCount(source) {
     return source.sourceFiles.length;
@@ -3456,7 +3899,7 @@ function hostedProjectSourceRequest(source) {
     const { network, resources } = hostedEnvironmentOptions(source);
     return {
         source: source.specSource,
-        subjectFiles: source.subjectFiles,
+        candidateFiles: source.candidateFiles,
         engineResolveFiles: hostedEngineResolveFiles(source),
         engineResolveBinding: engineResolveBindingForSpec(source.spec),
         adapterFiles: source.adapterFiles,
@@ -3539,68 +3982,48 @@ async function watchHostedRun(args) {
     }
 }
 function formatHostedRunResult(run) {
-    const subjectId = run.outputSubjectId ?? run.subjectId;
-    const activeDetail = run.activeSubjectId && subjectId && run.activeSubjectId !== subjectId
-        ? `; active ${run.activeSubjectId}`
+    const candidateId = run.outputCandidateId ?? run.candidateId;
+    const activeDetail = run.activeCandidateId && candidateId && run.activeCandidateId !== candidateId
+        ? `; active ${run.activeCandidateId}`
         : "";
-    const summary = `Run ${run.id} reached ${run.status}; ${run.outcome ? `outcome ${run.outcome}; ` : ""}subject ${subjectId ?? "pending"}${activeDetail}; ${run.completedJobCount ?? 0}/${run.jobCount ?? 0} jobs completed.`;
+    const summary = `Run ${run.id} reached ${run.status}; ${run.outcome ? `outcome ${run.outcome}; ` : ""}candidate ${candidateId ?? "pending"}${activeDetail}; ${run.completedJobCount ?? 0}/${run.jobCount ?? 0} jobs completed.`;
     return [
         run.error ? `${summary}\nError: ${run.error}` : summary,
-        ...(run.urls?.subjectEvaluation
-            ? [`Open evaluation: ${run.urls.subjectEvaluation}`]
+        ...(run.urls?.candidateEvaluation
+            ? [`Open evaluation: ${run.urls.candidateEvaluation}`]
             : [`Open benchmark: ${run.urls?.benchmark ?? ""}`].filter(Boolean)),
     ].join("\n");
 }
-function formatHostedRunStarted(run, fallbackWorkflow) {
-    const subjectId = run.outputSubjectId ?? run.subjectId;
-    return [
-        `Started ${run.workflow ?? fallbackWorkflow} run ${run.id}; ${subjectId ? `subject ${subjectId}` : `${run.jobCount ?? 0} jobs queued`}.`,
-        ...(run.urls?.subjectEvaluation
-            ? [`Open evaluation: ${run.urls.subjectEvaluation}`]
-            : run.urls?.benchmark ? [`Open benchmark: ${run.urls.benchmark}`] : []),
-        "",
-    ].join("\n");
-}
-function formatRunDetail(record) {
-    const detail = record;
-    const { run, jobs, urls } = detail;
-    const cost = sumJobCostUsd(jobs);
-    const firstFailedJob = jobs.find((job) => job.status === "failed" && job.error);
-    const subjectId = hostedRunEvaluationSubjectId(run, jobs);
+function formatRetryCommandResult(result) {
+    const run = result.run;
+    const runId = run?.id ?? result.runId ?? "unknown";
+    const scope = `${result.retried.kind} ${result.retried.id}`;
+    const verb = run
+        ? run.status === "finished" ? "finished as hosted run" : "started as hosted run"
+        : "finished as local run";
     return [
-        `Run ${run.id}: ${run.status}${run.outcome ? ` (${run.outcome})` : ""}`,
-        `Workflow: ${run.workflow ?? "improve"}`,
-        `Subject: ${subjectId ?? "pending"}`,
-        ...(run.activeSubjectId && subjectId && run.activeSubjectId !== subjectId
-            ? [`Active subject: ${run.activeSubjectId}`]
-            : []),
-        `Samples: ${run.samples ?? 0}`,
-        `Attempts: ${run.attemptsExecuted ?? 0}/${run.attemptsRequested ?? run.attemptsExecuted ?? 0}`,
-        `Jobs: ${run.completedJobCount ?? jobs.filter(isTerminalRunJob).length}/${run.jobCount ?? jobs.length} completed${run.failedJobCount ? `; ${run.failedJobCount} failed` : ""}`,
-        ...(typeof run.durationMs === "number"
-            ? [`Duration: ${formatDurationMs(run.durationMs)}`]
+        `Retry of ${scope} ${verb} ${runId}.`,
+        ...(result.evaluationId ? [`Evaluation: ${result.evaluationId}`] : []),
+        ...(result.candidateId ? [`Candidate: ${result.candidateId}`] : []),
+        ...(result.failedJobCount ? [`Failed jobs: ${result.failedJobCount}`] : []),
+        ...(result.error ? [`Error: ${result.error}`] : []),
+        ...(result.localView
+            ? [`Open local view: ${result.localView.command}`, result.localView.note]
             : []),
-        ...(cost > 0 ? [`Cost: ${formatUsd(cost)}`] : []),
-        ...(firstFailedJob?.error
-            ? [`First failed job ${firstFailedJob.id}: ${firstFailedJob.error}`]
-            : []),
-        ...(urls.subjectEvaluation
-            ? [`Open evaluation: ${urls.subjectEvaluation}`]
-            : [`Open benchmark: ${urls.benchmark}`]),
-        ...(jobs.length > 0 ? ["", "Jobs:", ...jobs.map(formatRunJobLine)] : []),
+        ...(result.urls?.candidateEvaluation
+            ? [`Open evaluation: ${result.urls.candidateEvaluation}`]
+            : result.urls?.benchmark ? [`Open benchmark: ${result.urls.benchmark}`] : []),
     ].join("\n");
 }
-function formatRunJobLine(job) {
+function formatHostedRunStarted(run, fallbackWorkflow) {
+    const candidateId = run.outputCandidateId ?? run.candidateId;
     return [
-        job.id,
-        readRunJobPurpose(job) ?? job.kind ?? "job",
-        job.status,
-        job.subjectId ?? "-",
-        job.error ?? "",
-    ].filter((value, index) => index < 4 || value !== "").join("\t");
-}
-function isTerminalRunJob(job) {
-    return job.status === "succeeded" || job.status === "failed" || job.status === "cancelled";
+        `Started ${run.workflow ?? fallbackWorkflow} run ${run.id}; ${candidateId ? `candidate ${candidateId}` : `${run.jobCount ?? 0} jobs queued`}.`,
+        ...(run.urls?.candidateEvaluation
+            ? [`Open evaluation: ${run.urls.candidateEvaluation}`]
+            : run.urls?.benchmark ? [`Open benchmark: ${run.urls.benchmark}`] : []),
+        "",
+    ].join("\n");
 }
 function readRunJobPurpose(job) {
     const input = readRecord(job.input);
@@ -3608,49 +4031,22 @@ function readRunJobPurpose(job) {
     const purpose = execution?.purpose;
     return typeof purpose === "string" && purpose ? purpose : null;
 }
-function sumJobCostUsd(jobs) {
-    const sum = jobs.reduce((total, job) => total + costUsdFromUsage(readRecord(job.output)?.usage), 0);
-    return Number.isFinite(sum) ? Math.round(sum * 1_000_000) / 1_000_000 : 0;
-}
-function costUsdFromUsage(value) {
-    const usage = readRecord(value);
-    if (!usage) {
-        return 0;
-    }
-    const direct = readFiniteNumber(usage.costUsd);
-    if (direct !== null) {
-        return direct;
-    }
-    return ["total", "optimizer", "runner", "engine"].reduce((sum, key) => {
-        const nested = readRecord(usage[key]);
-        return sum + (readFiniteNumber(nested?.costUsd) ?? 0);
-    }, 0);
-}
 function readRecord(value) {
     return value && typeof value === "object" && !Array.isArray(value)
         ? value
         : null;
 }
-function readFiniteNumber(value) {
-    return typeof value === "number" && Number.isFinite(value) ? value : null;
+function stringValue(value) {
+    return typeof value === "string" && value.length > 0 ? value : null;
 }
-function formatDurationMs(durationMs) {
-    if (durationMs < 1000) {
-        return `${Math.max(0, Math.round(durationMs))}ms`;
-    }
-    const seconds = durationMs / 1000;
-    if (seconds < 60) {
-        return `${seconds.toFixed(seconds < 10 ? 1 : 0)}s`;
-    }
-    const minutes = Math.floor(seconds / 60);
-    const remainingSeconds = Math.round(seconds % 60);
-    return `${minutes}m ${remainingSeconds}s`;
+function numberValue(value) {
+    return readFiniteNumber(value);
 }
-function formatUsd(value) {
-    return `$${value.toFixed(value < 1 ? 4 : 2)}`;
+function integerValue(value) {
+    return Number.isSafeInteger(value) ? value : null;
 }
-function shortDigest(value) {
-    return value.length > 12 ? value.slice(0, 12) : value;
+function readFiniteNumber(value) {
+    return typeof value === "number" && Number.isFinite(value) ? value : null;
 }
 async function withHostedRunFailureSummary(target, run) {
     if (hostedRunSucceeded(run) || run.error || (run.failedJobCount ?? 0) <= 0) {
@@ -3681,23 +4077,44 @@ function hostedRunSucceeded(run) {
 async function readWorkbenchOrigin(dir) {
     try {
         const parsed = JSON.parse(await fs.readFile(workbenchOriginPath(dir), "utf8"));
-        if (!parsed.projectId ||
-            !parsed.baseUrl ||
-            !parsed.owner ||
-            !parsed.project ||
-            typeof parsed.writable !== "boolean") {
+        if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
+            throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
+        }
+        const originRecord = parsed;
+        const keys = Object.keys(originRecord).sort();
+        const expectedKeys = [
+            "baseUrl",
+            "linkedAt",
+            "projectId",
+            "remote",
+            "runtimeFingerprint",
+            "sourceFingerprint",
+            "sourceRevisionId",
+        ];
+        if (typeof originRecord.projectId !== "string" ||
+            typeof originRecord.baseUrl !== "string" ||
+            typeof originRecord.remote !== "string" ||
+            typeof originRecord.sourceRevisionId !== "string" ||
+            typeof originRecord.sourceFingerprint !== "string" ||
+            typeof originRecord.runtimeFingerprint !== "string" ||
+            typeof originRecord.linkedAt !== "string" ||
+            originRecord.projectId.length === 0 ||
+            originRecord.sourceRevisionId.length === 0 ||
+            originRecord.sourceFingerprint.length === 0 ||
+            originRecord.runtimeFingerprint.length === 0) {
+            throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
+        }
+        if (JSON.stringify(keys) !== JSON.stringify(expectedKeys)) {
             throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
         }
         return {
-            baseUrl: normalizeBaseUrl(parsed.baseUrl),
-            owner: parsed.owner,
-            project: parsed.project,
-            projectId: parsed.projectId,
-            writable: parsed.writable,
-            ...(parsed.sourceRevisionId ? { sourceRevisionId: parsed.sourceRevisionId } : {}),
-            ...(parsed.sourceFingerprint ? { sourceFingerprint: parsed.sourceFingerprint } : {}),
-            ...(parsed.upstream ? { upstream: parsed.upstream } : {}),
-            linkedAt: parsed.linkedAt ?? new Date(0).toISOString(),
+            baseUrl: normalizeBaseUrl(originRecord.baseUrl),
+            remote: normalizeOriginRemote(originRecord.remote),
+            projectId: originRecord.projectId,
+            sourceRevisionId: originRecord.sourceRevisionId,
+            sourceFingerprint: originRecord.sourceFingerprint,
+            runtimeFingerprint: originRecord.runtimeFingerprint,
+            linkedAt: originRecord.linkedAt,
         };
     }
     catch (error) {
@@ -3716,8 +4133,12 @@ async function requireWorkbenchOrigin(dir) {
 }
 async function writeWorkbenchOrigin(dir, input) {
     const origin = {
-        ...input,
         baseUrl: normalizeBaseUrl(input.baseUrl),
+        remote: normalizeOriginRemote(input.remote),
+        projectId: input.projectId,
+        sourceRevisionId: input.sourceRevisionId,
+        sourceFingerprint: input.sourceFingerprint,
+        runtimeFingerprint: input.runtimeFingerprint,
         linkedAt: input.linkedAt ?? new Date().toISOString(),
     };
     const filePath = workbenchOriginPath(dir);
@@ -3725,6 +4146,56 @@ async function writeWorkbenchOrigin(dir, input) {
     await fs.writeFile(filePath, `${JSON.stringify(origin, null, 2)}\n`);
     return origin;
 }
+async function writeWorkbenchOriginFromState(dir, args) {
+    const owner = args.project?.ownerUsername ?? args.state.project.ownerUsername;
+    const name = args.project?.name ?? args.state.project.name;
+    const sourceRevisionId = args.project?.currentSpecVersionId ??
+        args.state.source.revisionId ??
+        args.state.base.sourceRevisionId;
+    const sourceFingerprint = args.sourceFingerprint ??
+        args.project?.sourceFingerprint ??
+        args.state.source.fingerprint ??
+        args.state.base.sourceFingerprint;
+    const runtimeFingerprint = args.state.base.runtimeFingerprint ??
+        workbenchRuntimeBundleFingerprint(args.state.runtime);
+    if (!sourceRevisionId || !sourceFingerprint || !runtimeFingerprint) {
+        throw new UsageError("Hosted project state is missing required origin metadata.");
+    }
+    return await writeWorkbenchOrigin(dir, {
+        baseUrl: args.baseUrl,
+        remote: `${owner}/${name}`,
+        projectId: args.project?.id ?? args.state.project.id,
+        sourceRevisionId,
+        sourceFingerprint,
+        runtimeFingerprint,
+    });
+}
+async function localSourceFingerprint(dir) {
+    const source = localProjectStateSource(await readLocalProjectSource(dir));
+    return source.fingerprint ?? workbenchProjectSourceFingerprint(source);
+}
+function parseOriginRemote(origin) {
+    return parseRemoteName(origin.remote);
+}
+function parseRemoteName(remote) {
+    try {
+        return parseBenchmarkRef(remote);
+    }
+    catch {
+        throw new UsageError(`Workbench origin remote must use OWNER/BENCHMARK: ${remote}`);
+    }
+}
+function normalizeOriginRemote(remote) {
+    const parsed = parseRemoteName(remote.trim());
+    return `${parsed.owner}/${parsed.project}`;
+}
+function originRemoteUrlParts(origin) {
+    const remote = parseOriginRemote(origin);
+    return {
+        owner: remote.owner,
+        projectName: remote.project,
+    };
+}
 function workbenchOriginPath(dir) {
     return path.join(dir, ".workbench", "origin.json");
 }
@@ -3763,30 +4234,6 @@ async function readWorkbenchProfileStatus(config) {
         return { authenticated: true, profile: null };
     }
 }
-function readOptionalSubjectId(parsed) {
-    return asOptionalString(parsed.flags.subject) ?? parsed.positionals[0];
-}
-function readRequiredSubjectId(parsed) {
-    const subjectId = readOptionalSubjectId(parsed);
-    if (!subjectId) {
-        throw new UsageError("Missing required SUBJECT_ID.");
-    }
-    return subjectId;
-}
-function readRequiredRunId(parsed) {
-    const runId = parsed.positionals[0];
-    if (!runId) {
-        throw new UsageError("Missing required RUN_ID.");
-    }
-    return runId;
-}
-function requireOutDir(parsed) {
-    const output = asOptionalString(parsed.flags.out);
-    if (!output) {
-        throw new UsageError("Missing required --out.");
-    }
-    return output;
-}
 async function apiRequest(apiPath, options = {}, baseUrlOverride) {
     const config = await loadConfig();
     const baseUrl = normalizeBaseUrl(baseUrlOverride ??
@@ -4002,6 +4449,38 @@ function readInitAgent(parsed, kind) {
 function asOptionalString(value) {
     return typeof value === "string" && value.length > 0 ? value : undefined;
 }
+function singleRequestedRunId(value, command) {
+    if (!value || value.trim() === "") {
+        return undefined;
+    }
+    const trimmed = value.trim();
+    if (trimmed === "all" || trimmed.includes(",")) {
+        throw new UsageError(`${command} accepts one candidate run id for --runs; use workbench eval --runs all to evaluate every run.`);
+    }
+    return trimmed;
+}
+function resolveCandidateRunSelection(source, value) {
+    const available = source.candidateRunIds;
+    if (available.length === 0) {
+        throw new UsageError("Candidate must declare at least one run.");
+    }
+    if (!value || value.trim() === "") {
+        return [source.candidateRunId];
+    }
+    const trimmed = value.trim();
+    if (trimmed === "all") {
+        return available;
+    }
+    const requested = [...new Set(trimmed.split(",").map((entry) => entry.trim()).filter(Boolean))];
+    if (requested.length === 0) {
+        throw new UsageError("--runs must include at least one run id or all.");
+    }
+    const missing = requested.filter((runId) => !available.includes(runId));
+    if (missing.length > 0) {
+        throw new UsageError(`Unknown candidate run(s): ${missing.join(", ")}. Available: ${available.join(", ")}.`);
+    }
+    return requested;
+}
 function readOptionalStringFlag(value, name) {
     if (value == null || value === false) {
         return undefined;
@@ -4226,6 +4705,27 @@ function parsePortFlag(value) {
     }
     return port;
 }
+function formatCandidateEvaluationScore(candidate) {
+    const score = candidate.eval?.metrics?.score?.mean;
+    return typeof score === "number" && Number.isFinite(score)
+        ? formatMetricValue(score)
+        : "n/a";
+}
+function formatLocalCandidateLabel(candidate) {
+    if (!candidate) {
+        return "none";
+    }
+    const name = candidate.name?.trim() || candidate.id;
+    const displayName = candidate.version > 0
+        ? `${name} v${candidate.version}`
+        : name;
+    return `${displayName} (${candidate.id})`;
+}
+function formatCandidateEvaluationSummary(candidate) {
+    return formatMetricSummary(evaluationMeanMetrics(candidate.eval), {
+        limit: Number.POSITIVE_INFINITY,
+    });
+}
 function formatMetricSummary(metrics, options = {}) {
     const entries = Object.entries(metrics ?? {}).filter((entry) => Number.isFinite(entry[1]));
     if (entries.length === 0) {
@@ -4255,23 +4755,28 @@ function resolveSourceDir(parsed) {
     if (parsed.positionals.length > 1) {
         throw new UsageError("Expected at most one source file or directory argument.");
     }
-    if (parsed.positionals.length > 0 && parsed.flags.dir !== undefined) {
-        throw new UsageError("Use either --dir or SOURCE, not both.");
+    const dir = asOptionalString(parsed.flags.dir);
+    const source = parsed.positionals[0];
+    if (dir && source) {
+        return path.resolve(dir, source);
     }
-    return path.resolve(asOptionalString(parsed.flags.dir) ?? parsed.positionals[0] ?? process.cwd());
+    return path.resolve(dir ?? source ?? process.cwd());
 }
 function isWorkbenchSourceYamlPath(filePath) {
     return path.basename(filePath) === WORKBENCH_BENCHMARK_FILE;
 }
-function readSubjectIdFlag(parsed, snapshot) {
-    const explicit = asOptionalString(parsed.flags.subject) ?? asOptionalString(parsed.flags.subject);
+function readCandidateIdFlag(parsed, snapshot) {
+    const explicit = readOptionalCandidateFlag(parsed);
     if (explicit) {
         return explicit;
     }
     if (snapshot.activeId) {
         return snapshot.activeId;
     }
-    throw new UsageError("Missing required --subject; no active subject exists.");
+    throw new UsageError("Missing required --candidate; no active candidate exists.");
+}
+function readOptionalCandidateFlag(parsed) {
+    return asOptionalString(parsed.flags.candidate);
 }
 function readPreviewMode(parsed) {
     const view = asOptionalString(parsed.flags.view) ?? "rendered";
@@ -4375,8 +4880,8 @@ async function copyInitSeedIfProvided(parsed, workspace, seed) {
         }
     });
 }
-function formatSpecOptimizer(spec) {
-    return spec.improve ? `adapter:${spec.improve.use}` : "optimizer not configured";
+function formatSpecImprover(spec) {
+    return spec.improve ? `adapter:${spec.improve.use}` : "improve not configured";
 }
 async function writeFiles(outputDir, files) {
     await fs.mkdir(outputDir, { recursive: true });
@@ -4401,6 +4906,14 @@ async function syncSourceFiles(outputDir, files) {
     }
     await writeFiles(outputDir, files);
 }
+async function assertLocalSourceMatchesOrigin(dir, origin) {
+    const source = await readLocalProjectSource(dir);
+    const fingerprint = localProjectStateSource(source).fingerprint;
+    if (fingerprint === origin.sourceFingerprint) {
+        return;
+    }
+    throw new UsageError("Local source changed since the last pull or push. Run `workbench push` before pulling, or restore the local source changes and try again.");
+}
 async function readManagedSourceFilePaths(outputDir) {
     try {
         const source = await readLocalProjectSource(outputDir);