npm - @workbench-ai/workbench - Versions diffs - 0.0.50 → 0.0.52 - Mend

@workbench-ai/workbench 0.0.50 → 0.0.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/dist/benchmark-fingerprint.d.ts +1 -3
package/dist/benchmark-fingerprint.d.ts.map +1 -1
package/dist/benchmark-fingerprint.js +19 -87
package/dist/command-model.d.ts.map +1 -1
package/dist/command-model.js +37 -418
package/dist/dev-open/client.css +21 -50
package/dist/dev-open/client.js +140 -140
package/dist/dev-open-server.d.ts +3 -0
package/dist/dev-open-server.d.ts.map +1 -1
package/dist/dev-open-server.js +40 -4
package/dist/index.d.ts.map +1 -1
package/dist/index.js +452 -886
package/dist/local-archive.d.ts +5 -1
package/dist/local-archive.d.ts.map +1 -1
package/dist/local-archive.js +332 -10
package/dist/project-source.d.ts +1 -0
package/dist/project-source.d.ts.map +1 -1
package/dist/project-source.js +23 -1
package/package.json +4 -4

package/dist/index.js CHANGED Viewed

@@ -5,17 +5,17 @@ import { createRequire } from "node:module";
 import os from "node:os";
 import path from "node:path";
 import { Writable } from "node:stream";
-import { createCandidateFilePreview, createBaselineCandidateJob as createRuntimeBaselineCandidateJob, evaluationScorecardId, evaluationMeanMetrics, executeWorkbenchExecutionJob, engineResolveBindingForSpec, filterCandidateSourceFiles, workbenchExecutionPurpose, workbenchRunExecutionFingerprint, createWorkbenchAdapterAuthBundle, createOptimizerTraceInputFiles, DOCKER_SANDBOX_BACKEND, localWorkbenchAdapterAuthStore, materializeWorkbenchRunResult, normalizeSurfaceFiles, planWorkbenchExecutionJobsForPurpose, runWorkbenchExecutionDag, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, summarizeCandidateFiles, validateWorkbenchRunEnvelope, parseWorkbenchAdapterAuthTarget, } from "@workbench-ai/workbench-core";
-import { assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterAuthRequirements, WORKBENCH_ADAPTER_RESULT_FILE, WORKBENCH_ADAPTER_RESULT_PROTOCOL, normalizeWorkbenchAdapterOperationRequest, readWorkbenchAdapterOperationResult, workbenchAdapterOperationCommand, workbenchAdapterOperationResultPath, withDefaultWorkbenchAdapterAuthProfiles as applyDefaultWorkbenchAdapterAuthProfiles, } from "@workbench-ai/workbench-protocol";
+import { createCandidateFilePreview, createBaselineCandidateJob as createRuntimeBaselineCandidateJob, evaluationScorecardId, evaluationMeanMetrics, executeWorkbenchExecutionJob, engineResolveBindingForSpec, filterOptimizerTraceJobsForCaseIds, filterCandidateSourceFiles, formatWorkbenchCaseSelector, formatWorkbenchSelectionPolicy, workbenchCaseSelectorUsesAllCases, workbenchExecutionPurpose, workbenchRunExecutionFingerprint, createWorkbenchAdapterAuthBundle, createOptimizerTraceInputFiles, DOCKER_SANDBOX_BACKEND, localWorkbenchAdapterAuthStore, materializeWorkbenchRunResult, normalizeSurfaceFiles, planWorkbenchExecutionJobsForPurpose, runWorkbenchExecutionDag, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, summarizeCandidateFiles, validateWorkbenchRunEnvelope, parseWorkbenchAdapterAuthTarget, workbenchEngineCaseIdsForImproveEvaluation, workbenchEngineCaseIdsForSelector, workbenchImproveOptimizeSelector, workbenchImproveSelectionPolicy, workbenchProjectSourceFingerprint, workbenchRuntimeBundleFingerprint, } from "@workbench-ai/workbench-core";
+import { assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterAuthRequirements, normalizeWorkbenchAdapterOperationRequest, readWorkbenchAdapterOperationResult, workbenchAdapterOperationCommand, workbenchAdapterOperationResultPath, withDefaultWorkbenchAdapterAuthProfiles as applyDefaultWorkbenchAdapterAuthProfiles, } from "@workbench-ai/workbench-protocol";
 import { builtinLocalTraceAdapter, builtinLocalTraceAdapters, sortLocalTraceRefs, } from "@workbench-ai/workbench-built-in-adapters/local-traces";
 import { commandUsage, HOSTED_WATCH_LIFECYCLE_NOTE, LOCAL_DEV_OPEN_LIFECYCLE_NOTE, rootUsage, } from "./command-model.js";
 import { startLocalWorkbenchDevServer } from "./dev-open-server.js";
 import { createWorkbenchInitScaffold, } from "./init-scaffold.js";
 import { defaultAdapterManifests, composeRuntimeDockerfileWithAdapters, resolveDefaultWorkbenchAdapter, resolveProjectAdapterSource, resolveWorkbenchAdaptersForProject, WORKBENCH_ADAPTER_MANIFEST_FILE, } from "./adapter-project.js";
 import { createAdapterCommandEnv } from "./adapter-command-env.js";
-import { loadLocalArchive, loadLocalArchiveIndex, materializeCandidateRoot, readLocalCandidate, readLocalCandidateFiles, readLocalJobs, saveLocalArchive, saveLocalJobs, setLocalActive, upsertLocalRun, upsertLocalCandidate, upsertLocalEvaluation, } from "./local-archive.js";
+import { loadLocalArchive, loadLocalArchiveIndex, exportLocalRuntimeBundle, importLocalRuntimeBundle, runtimeBundleStats, materializeCandidateRoot, readLocalCandidate, readLocalCandidateFiles, readLocalJobs, saveLocalArchive, saveLocalJobs, setLocalActive, upsertLocalRun, upsertLocalCandidate, upsertLocalEvaluation, } from "./local-archive.js";
 import { WorkspaceSnapshotError, } from "./workspace-snapshot.js";
-import { readLocalProjectSource, WORKBENCH_BENCHMARK_FILE, } from "./project-source.js";
+import { hostedEngineResolveFiles, readLocalProjectSource, WORKBENCH_BENCHMARK_FILE, } from "./project-source.js";
 import { localBenchmarkFingerprint, localCandidateFingerprint, } from "./benchmark-fingerprint.js";
 const require = createRequire(import.meta.url);
 function getCliVersion() {
@@ -74,32 +74,38 @@ export async function runCli(argv, io = {
         if (argv[0] === "clone") {
             return await cloneProject(argv.slice(1), io);
         }
-        if (argv[0] === "fetch") {
-            return await fetchProject(argv.slice(1), io);
-        }
         if (argv[0] === "pull") {
             return await pullProject(argv.slice(1), io);
         }
         if (argv[0] === "push") {
             return await pushBenchmark(argv.slice(1), io);
         }
-        if (argv[0] === "remote") {
-            return await runRemoteCommand(argv.slice(1), io);
-        }
         if (argv[0] === "eval") {
-            return await localEvaluateCandidate(argv.slice(1), io, runtimeOptions);
+            const hosted = extractHostedFlag(argv.slice(1));
+            return hosted.enabled
+                ? await startHostedWorkflow("eval", hosted.argv, io)
+                : await localEvaluateCandidate(hosted.argv, io, runtimeOptions);
         }
         if (argv[0] === "retry") {
-            return await localRetry(argv.slice(1), io, runtimeOptions);
+            const hosted = extractHostedFlag(argv.slice(1));
+            return hosted.enabled
+                ? await retryHostedWorkflow(hosted.argv, io)
+                : await localRetry(hosted.argv, io, runtimeOptions);
         }
         if (argv[0] === "improve") {
-            return await localRun(argv.slice(1), io, runtimeOptions);
+            const hosted = extractHostedFlag(argv.slice(1));
+            return hosted.enabled
+                ? await startHostedWorkflow("improve", hosted.argv, io)
+                : await localRun(hosted.argv, io, runtimeOptions);
         }
         if (argv[0] === "restore") {
             return await localRestore(argv.slice(1), io);
         }
         if (argv[0] === "open") {
-            return await localDevOpen(argv.slice(1), io);
+            const hosted = extractHostedFlag(argv.slice(1));
+            return hosted.enabled
+                ? await openWorkbench(hosted.argv, io)
+                : await localDevOpen(hosted.argv, io);
         }
         if (argv[0] === "auth") {
             return await runAuthCommand(argv.slice(1), io);
@@ -110,9 +116,6 @@ export async function runCli(argv, io = {
         if (argv[0] === "traces") {
             return await runTracesCommand(argv.slice(1), io);
         }
-        if (argv[0] === "cloud") {
-            return await runCloudCommand(argv.slice(1), io);
-        }
         const commandPath = argv.slice(0, 2).join(" ");
         const rest = argv.slice(2);
         switch (commandPath) {
@@ -148,9 +151,6 @@ export async function runCli(argv, io = {
 }
 function commandPathForHelp(argv) {
     const positionals = argv.filter((arg) => arg !== "--help" && arg !== "-h" && !arg.startsWith("--"));
-    if (positionals[0] === "cloud") {
-        return positionals.slice(0, 3).join(" ");
-    }
     if (positionals[0] === "adapters" &&
         ["create", "list", "inspect", "test"].includes(positionals[1] ?? "")) {
         return positionals.slice(0, 2).join(" ");
@@ -159,7 +159,7 @@ function commandPathForHelp(argv) {
         ["collect", "list", "show"].includes(positionals[1] ?? "")) {
         return positionals.slice(0, 2).join(" ");
     }
-    if (positionals[0] === "auth" || positionals[0] === "remote") {
+    if (positionals[0] === "auth") {
         return positionals.slice(0, 2).join(" ");
     }
     if (positionals[0] === "runs" &&
@@ -172,65 +172,18 @@ function commandPathForHelp(argv) {
     }
     return positionals[0] ?? "";
 }
-async function runCloudCommand(argv, io) {
-    const command = argv[0];
-    const rest = argv.slice(1);
-    switch (command) {
-        case "eval":
-            return await startHostedWorkflow("eval", rest, io);
-        case "retry":
-            return await retryHostedWorkflow(rest, io);
-        case "improve":
-            return await startHostedWorkflow("improve", rest, io);
-        case "open":
-            return await openWorkbench(rest, io);
-        case "watch":
-            return await runWatch(rest, io);
-        case "logs":
-            return await runLogs(rest, io);
-        case "star":
-            return await starProject(rest, io, true);
-        case "unstar":
-            return await starProject(rest, io, false);
-        default:
-            break;
-    }
-    const commandPath = argv.slice(0, 2).join(" ");
-    const subRest = argv.slice(2);
-    switch (commandPath) {
-        case "benchmarks list":
-            return await benchmarkList(subRest, io);
-        case "benchmarks show":
-            return await benchmarkShow(subRest, io);
-        case "benchmarks versions":
-            return await benchmarkVersions(subRest, io);
-        case "benchmarks starred":
-            return await benchmarkStarred(subRest, io);
-        case "benchmarks delete":
-            return await benchmarkDelete(subRest, io);
-        case "runs list":
-            return await runList(subRest, io);
-        case "runs show":
-            return await runShow(subRest, io);
-        case "runs cancel":
-            return await runCancel(subRest, io);
-        case "candidates list":
-            return await candidateList(subRest, io);
-        case "candidates show":
-            return await candidateShow(subRest, io);
-        case "candidates files":
-            return await candidateFiles(subRest, io);
-        case "candidates preview":
-            return await candidatePreview(subRest, io);
-        case "candidates pull":
-            return await candidateExport(subRest, io);
-        case "candidates publish":
-            return await candidateVisibility(subRest, io, "public");
-        case "candidates unpublish":
-            return await candidateVisibility(subRest, io, "private");
-        default:
-            throw new UsageError(`Unknown command: cloud ${argv.join(" ")}`);
+function extractHostedFlag(argv) {
+    let enabled = false;
+    const next = [];
+    for (const arg of argv) {
+        if (arg === "--hosted") {
+            enabled = true;
+        }
+        else {
+            next.push(arg);
+        }
     }
+    return { enabled, argv: next };
 }
 async function localDevOpen(argv, io) {
     const parsed = parseArgs(argv);
@@ -708,12 +661,28 @@ async function localRun(argv, io, runtimeOptions) {
     if (caseIds.length === 0) {
         throw new UsageError("Engine resolver must emit at least one case.");
     }
+    const optimizeSelector = workbenchImproveOptimizeSelector(spec);
+    const selectionPolicy = workbenchImproveSelectionPolicy(spec);
+    const optimizeCaseIds = workbenchEngineCaseIdsForSelector(engineCases, optimizeSelector);
+    if (optimizeCaseIds.length === 0) {
+        throw new UsageError(`Improve optimizeOn selector matched no cases: ${formatWorkbenchCaseSelector(optimizeSelector)}.`);
+    }
+    const selectionCaseIds = workbenchEngineCaseIdsForSelector(engineCases, selectionPolicy.selector);
+    if (selectionCaseIds.length === 0) {
+        throw new UsageError(`Improve selectBy selector matched no cases: ${formatWorkbenchCaseSelector(selectionPolicy.selector)}.`);
+    }
+    const selectionScoreCaseIds = workbenchCaseSelectorUsesAllCases(selectionPolicy.selector)
+        ? undefined
+        : selectionCaseIds;
+    const evaluationCaseIds = workbenchEngineCaseIdsForImproveEvaluation({ spec, engineCases });
     requireValidRunEnvelope({
         workflow: "improve",
         budget,
         samples,
-        caseCount: caseIds.length,
+        caseCount: evaluationCaseIds.length,
     });
+    const optimizeOnLabel = formatWorkbenchCaseSelector(optimizeSelector);
+    const selectByLabel = formatWorkbenchSelectionPolicy(selectionPolicy);
     const environmentRefs = await ensureLocalDockerfileEnvironments(workspace, spec, engineCases);
     const benchmarkFingerprint = await readLocalBenchmarkFingerprint(workspace);
     const executionFingerprint = localRunExecutionFingerprint(projectSource);
@@ -771,7 +740,7 @@ async function localRun(argv, io, runtimeOptions) {
     const events = [
         createLocalEvent("run_started", startedAt, {
             runId,
-            detail: { budget, samples, strategy: "greedy" },
+            detail: { budget, samples, strategy: "greedy", optimizeOn: optimizeOnLabel, selectBy: selectByLabel },
         }),
     ];
     const runningRun = {
@@ -786,6 +755,8 @@ async function localRun(argv, io, runtimeOptions) {
         improver: formatSpecImprover(spec),
         engineRun: spec.engineRun.use,
         strategy: "greedy",
+        optimizeOn: optimizeOnLabel,
+        selectBy: selectByLabel,
         budget,
         repairBudget: 0,
         attemptsRequested: budget,
@@ -815,7 +786,7 @@ async function localRun(argv, io, runtimeOptions) {
                 throw new UsageError("Candidate snapshot must include at least one file.");
             }
             const candidateRevisionTraceFiles = createOptimizerTraceInputFiles({
-                jobs: [...baselineTraceJobs, ...runTraceJobs],
+                jobs: filterOptimizerTraceJobsForCaseIds([...baselineTraceJobs, ...runTraceJobs], optimizeCaseIds),
             });
             const candidateId = `candidate_${runId.replace(/^run_/u, "")}_${String(attemptIndex + 1).padStart(3, "0")}`;
             const plannedCandidateRevision = planWorkbenchExecutionJobsForPurpose({
@@ -825,7 +796,7 @@ async function localRun(argv, io, runtimeOptions) {
                 candidateId,
                 attemptIndex,
                 samples,
-                caseIds,
+                caseIds: optimizeCaseIds,
                 engineCases,
                 spec,
                 workflow: "improve",
@@ -861,7 +832,7 @@ async function localRun(argv, io, runtimeOptions) {
                     attemptIndex,
                     samples,
                     now: new Date().toISOString(),
-                    caseIds,
+                    caseIds: evaluationCaseIds,
                     engineCases,
                     spec,
                     environmentRefsByCase: environmentRefs.byCase,
@@ -891,16 +862,22 @@ async function localRun(argv, io, runtimeOptions) {
                 jobs: completedJobs,
                 previousCandidate: activeCandidate,
                 existingCandidateCount: snapshot.candidates.length,
+                selection: {
+                    metric: selectionPolicy.metric,
+                    ...(selectionScoreCaseIds ? { caseIds: selectionScoreCaseIds } : {}),
+                    label: selectByLabel,
+                },
             });
             for (const candidate of materialized.candidates) {
-                outputCandidateId = candidate.id;
-                snapshot = upsertLocalCandidate(snapshot, candidate, materialized.candidateFiles[candidate.id] ?? []);
-                events.push(createLocalEvent("candidate_created", candidate.createdAt, {
+                const localCandidate = localCandidateRecord(candidate);
+                outputCandidateId = localCandidate.id;
+                snapshot = upsertLocalCandidate(snapshot, localCandidate, materialized.candidateFiles[localCandidate.id] ?? []);
+                events.push(createLocalEvent("candidate_created", localCandidate.createdAt, {
                     runId,
-                    candidateId: candidate.id,
-                    baseId: candidate.baseId,
-                    status: candidate.status,
-                    metrics: evaluationMeanMetrics(candidate.eval),
+                    candidateId: localCandidate.id,
+                    baseId: localCandidate.baseId,
+                    status: localCandidate.status,
+                    metrics: evaluationMeanMetrics(localCandidate.eval),
                 }));
             }
             for (const evaluation of materialized.evaluations) {
@@ -944,6 +921,8 @@ async function localRun(argv, io, runtimeOptions) {
             improver: formatSpecImprover(spec),
             engineRun: spec.engineRun.use,
             strategy: "greedy",
+            optimizeOn: optimizeOnLabel,
+            selectBy: selectByLabel,
             budget,
             repairBudget: 0,
             attemptsRequested: budget,
@@ -1302,7 +1281,7 @@ async function localEvaluateCandidate(argv, io, runtimeOptions) {
             previousCandidate: existingCandidate ?? null,
             existingCandidateCount: snapshot.candidates.length,
         });
-        for (const candidateRecord of materialized.candidates) {
+        for (const candidateRecord of materialized.candidates.map(localCandidateRecord)) {
             snapshot = upsertLocalCandidate(snapshot, candidateRecord, materialized.candidateFiles[candidateRecord.id] ?? []);
         }
         if (materialized.activeCandidateId) {
@@ -2932,13 +2911,21 @@ function adapterAuthRecord(value) {
 }
 async function pushBenchmark(argv, io) {
     const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "tag", "visibility", "dry-run", "json"]));
+    rejectUnknownFlags(parsed, new Set(["dir", "visibility", "dry-run", "json"]));
     const dir = resolveSourceDir(parsed);
     const source = await readLocalProjectSource(dir);
     const origin = await readWorkbenchOrigin(dir);
     const baseUrl = await effectiveBaseUrl(origin?.baseUrl);
-    const visibility = readBenchmarkVisibility(parsed.flags.visibility);
+    const visibility = readOptionalBenchmarkVisibility(parsed.flags.visibility);
+    const createVisibility = visibility ?? "public";
     const dryRun = parsed.flags["dry-run"] === true;
+    const runtime = await exportLocalRuntimeBundle(dir);
+    const state = localProjectState({
+        source,
+        runtime,
+        origin,
+        visibility: createVisibility,
+    });
     if (!origin) {
         if (dryRun) {
             writeOutput({
@@ -2948,35 +2935,36 @@ async function pushBenchmark(argv, io) {
                 dir,
                 baseUrl,
                 benchmarkName: source.spec.name,
-                tag: asOptionalString(parsed.flags.tag) ?? null,
-                visibility,
+                visibility: createVisibility,
                 sourceFileCount: sourceFileCount(source),
+                runtime: runtimeBundleStats(runtime),
+                sourceFingerprint: state.source.fingerprint,
+                runtimeFingerprint: state.base.runtimeFingerprint,
             }, parsed, io, () => `Would push benchmark ${source.spec.name}.`);
             return 0;
         }
-        const { project, publishedProject, origin: nextOrigin } = await createHostedBenchmarkFromSource({
+        const { project, origin: nextOrigin, result } = await createHostedBenchmarkFromState({
             baseUrl,
             dir,
-            source,
-            visibility,
+            state,
         });
         writeOutput({
             ok: true,
             action: "create",
-            benchmark: publishedProject,
-            tag: asOptionalString(parsed.flags.tag) ?? null,
-            visibility,
+            benchmark: project,
+            visibility: project.visibility ?? createVisibility,
             origin: nextOrigin,
+            source: result.source,
+            runtime: result.runtime.stats,
             urls: buildWorkbenchResourceUrls({
                 baseUrl,
-                projectId: publishedProject.id ?? project.id,
-                owner: nextOrigin.owner,
-                projectName: nextOrigin.project,
+                projectId: project.id,
+                ...originRemoteUrlParts(nextOrigin),
             }),
         }, parsed, io, (record) => {
             const value = record;
             return [
-                `Pushed ${value.origin.owner}/${value.origin.project} (${value.origin.projectId}).`,
+                `Pushed ${value.origin.remote} (${value.origin.projectId}).`,
                 `Open benchmark: ${value.urls.benchmark}`,
             ].join("\n");
         });
@@ -2986,57 +2974,6 @@ async function pushBenchmark(argv, io) {
     if (!projectId) {
         throw new UsageError("Missing hosted benchmark. Run workbench push from a source directory.");
     }
-    if (!origin.writable) {
-        const signedInUsername = dryRun ? null : await readAuthenticatedWorkbenchUsername(baseUrl);
-        if (signedInUsername !== origin.owner) {
-            const upstream = upstreamFromOrigin(origin);
-            if (dryRun) {
-                writeOutput({
-                    ok: true,
-                    dryRun: true,
-                    action: "create",
-                    dir,
-                    baseUrl,
-                    benchmarkName: source.spec.name,
-                    tag: asOptionalString(parsed.flags.tag) ?? null,
-                    visibility,
-                    sourceFileCount: sourceFileCount(source),
-                    upstream: upstream ?? null,
-                }, parsed, io, () => `Would create a writable benchmark from read-only origin ${origin.owner}/${origin.project}.`);
-                return 0;
-            }
-            const { project, publishedProject, origin: nextOrigin } = await createHostedBenchmarkFromSource({
-                baseUrl,
-                dir,
-                source,
-                visibility,
-                upstream,
-            });
-            writeOutput({
-                ok: true,
-                action: "create",
-                benchmark: publishedProject,
-                tag: asOptionalString(parsed.flags.tag) ?? null,
-                visibility,
-                origin: nextOrigin,
-                upstream: upstream ?? null,
-                urls: buildWorkbenchResourceUrls({
-                    baseUrl,
-                    projectId: publishedProject.id ?? project.id,
-                    owner: nextOrigin.owner,
-                    projectName: nextOrigin.project,
-                }),
-            }, parsed, io, (record) => {
-                const value = record;
-                return [
-                    `Pushed ${value.origin.owner}/${value.origin.project} (${value.origin.projectId}).`,
-                    ...(value.upstream ? [`Upstream: ${value.upstream.owner}/${value.upstream.project}`] : []),
-                    `Open benchmark: ${value.urls.benchmark}`,
-                ].join("\n");
-            });
-            return 0;
-        }
-    }
     if (dryRun) {
         writeOutput({
             ok: true,
@@ -3045,92 +2982,78 @@ async function pushBenchmark(argv, io) {
             dir,
             baseUrl,
             benchmarkId: projectId,
-            tag: asOptionalString(parsed.flags.tag) ?? null,
-            visibility,
+            remote: origin.remote,
+            benchmarkName: source.spec.name,
+            visibility: visibility ?? "unchanged",
             sourceFileCount: sourceFileCount(source),
-        }, parsed, io, () => `Would push ${sourceFileCount(source)} source file(s) to ${projectId}.`);
+            runtime: runtimeBundleStats(runtime),
+            sourceFingerprint: state.source.fingerprint,
+            runtimeFingerprint: state.base.runtimeFingerprint,
+        }, parsed, io, () => `Would push ${sourceFileCount(source)} source file(s) and runtime history to ${origin.remote}.`);
         return 0;
     }
-    const response = await apiRequest(projectApiPath(projectId, "/source"), {
+    const response = await apiRequest(projectApiPath(projectId, "/state"), {
         method: "PUT",
-        body: hostedProjectSourceRequest(source),
+        body: state,
     }, baseUrl);
-    const publishedProject = visibility === "public"
-        ? (await apiRequest(projectApiPath(response.benchmark.id, "/publish"), { method: "PUT" }, baseUrl)).benchmark
-        : response.benchmark;
-    const nextOrigin = await writeWorkbenchOrigin(dir, {
+    const responseProject = hostedProjectSummaryFromState(response.state);
+    const publishedProject = await applyRequestedProjectVisibility({
         baseUrl,
-        owner: publishedProject.ownerUsername ?? response.benchmark.ownerUsername ?? origin.owner,
-        project: publishedProject.name ?? response.benchmark.name ?? origin.project ?? source.spec.name,
-        projectId: publishedProject.id ?? response.benchmark.id,
-        writable: true,
-        sourceRevisionId: publishedProject.currentSpecVersionId ?? response.benchmark.currentSpecVersionId,
-        sourceFingerprint: response.sourceFingerprint ?? publishedProject.sourceFingerprint ?? response.benchmark.sourceFingerprint,
-        upstream: origin.upstream,
+        projectId: responseProject.id,
+        responseProject,
+        visibility,
+    });
+    const nextOrigin = await writeWorkbenchOriginFromState(dir, {
+        baseUrl,
+        state: response.state,
     });
     writeOutput({
         ok: true,
         action: "update",
         changed: response.changed === true,
         benchmark: publishedProject,
-        tag: asOptionalString(parsed.flags.tag) ?? null,
-        visibility,
+        visibility: visibility ?? "unchanged",
         origin: nextOrigin,
+        source: response.source,
+        runtime: response.runtime.stats,
         urls: buildWorkbenchResourceUrls({
             baseUrl,
-            projectId: publishedProject.id ?? response.benchmark.id,
-            owner: nextOrigin.owner,
-            projectName: nextOrigin.project,
+            projectId: publishedProject.id ?? responseProject.id,
+            ...originRemoteUrlParts(nextOrigin),
         }),
     }, parsed, io, (record) => {
         const value = record;
         return [
-            `${value.changed ? "Pushed" : "Already up to date"} ${value.origin.owner}/${value.origin.project} (${value.origin.projectId}).`,
+            `${value.changed ? "Pushed" : "Already up to date"} ${value.origin.remote} (${value.origin.projectId}).`,
             `Open benchmark: ${value.urls.benchmark}`,
         ].join("\n");
     });
     return 0;
 }
-async function createHostedBenchmarkFromSource(args) {
-    const response = await apiRequest("/api/workbench/benchmarks", {
+async function createHostedBenchmarkFromState(args) {
+    const result = await apiRequest("/api/workbench/benchmarks/state", {
         method: "POST",
-        body: hostedProjectSourceRequest(args.source),
+        body: args.state,
     }, args.baseUrl);
-    const project = response.benchmark;
-    const publishedProject = args.visibility === "public"
-        ? (await apiRequest(projectApiPath(project.id, "/publish"), { method: "PUT" }, args.baseUrl)).benchmark
-        : project;
-    const origin = await writeWorkbenchOrigin(args.dir, {
+    const project = hostedProjectSummaryFromState(result.state);
+    const origin = await writeWorkbenchOriginFromState(args.dir, {
         baseUrl: args.baseUrl,
-        owner: publishedProject.ownerUsername ?? project.ownerUsername ?? "",
-        project: publishedProject.name ?? project.name ?? args.source.spec.name,
-        projectId: publishedProject.id ?? project.id,
-        writable: true,
-        sourceRevisionId: publishedProject.currentSpecVersionId ?? project.currentSpecVersionId,
-        sourceFingerprint: publishedProject.sourceFingerprint ?? project.sourceFingerprint,
-        ...(args.upstream ? { upstream: args.upstream } : {}),
+        state: result.state,
     });
-    return { project, publishedProject, origin };
-}
-async function readAuthenticatedWorkbenchUsername(baseUrl) {
-    const config = await loadConfig();
-    const status = await readWorkbenchProfileStatus({ ...config, baseUrl });
-    return status.authenticated ? status.profile?.username ?? null : null;
+    return { project, origin, result };
 }
-function upstreamFromOrigin(origin) {
-    if (!origin.owner || !origin.project || !origin.projectId || !origin.sourceRevisionId) {
-        return undefined;
+async function applyRequestedProjectVisibility(args) {
+    if (args.visibility === "public") {
+        return (await apiRequest(projectApiPath(args.projectId, "/publish"), { method: "PUT" }, args.baseUrl)).benchmark;
     }
-    return {
-        owner: origin.owner,
-        project: origin.project,
-        projectId: origin.projectId,
-        sourceRevisionId: origin.sourceRevisionId,
-    };
+    if (args.visibility === "private") {
+        return (await apiRequest(projectApiPath(args.projectId, "/publish"), { method: "DELETE" }, args.baseUrl)).benchmark;
+    }
+    return args.responseProject;
 }
-function readBenchmarkVisibility(value) {
+function readOptionalBenchmarkVisibility(value) {
     if (value === undefined) {
-        return "public";
+        return undefined;
     }
     if (value === "private" || value === "public") {
         return value;
@@ -3143,41 +3066,37 @@ async function cloneProject(argv, io) {
     const ref = readRequiredBenchmarkRef(parsed);
     const outputDir = parsed.positionals[1] ?? ref.project;
     if (parsed.positionals.length > 2) {
-        throw new UsageError("workbench clone accepts OWNER/BENCHMARK[@REF] and an optional output directory.");
+        throw new UsageError("workbench clone accepts OWNER/BENCHMARK and an optional output directory.");
     }
     const baseUrl = await effectiveBaseUrl();
-    const projectResponse = await apiRequest(publicProjectApiPath(ref), {}, baseUrl);
-    const filesResponse = await apiRequest(publicProjectSourceApiPath(ref), {}, baseUrl);
+    const state = await apiRequest(publicProjectStateApiPath(ref), {}, baseUrl);
     if (parsed.flags["dry-run"] === true) {
         writeOutput({
             ok: true,
             dryRun: true,
             ref,
             outputDir,
-            fileCount: filesResponse.files.length,
+            fileCount: state.source.files.length,
+            runtime: runtimeBundleStats(state.runtime),
+            sourceFingerprint: state.source.fingerprint ?? state.base.sourceFingerprint ?? null,
+            runtimeFingerprint: state.base.runtimeFingerprint ?? null,
         }, parsed, io, () => `Would clone ${formatBenchmarkRef(ref)} to ${outputDir}.`);
         return 0;
     }
-    await syncSourceFiles(outputDir, filesResponse.files);
-    const project = projectResponse.benchmark;
-    const sourceProject = filesResponse.benchmark;
-    const origin = await writeWorkbenchOrigin(outputDir, {
+    const applied = await applyProjectStateToLocal({
+        dir: outputDir,
         baseUrl,
-        owner: sourceProject?.ownerUsername ?? project.ownerUsername,
-        project: sourceProject?.name ?? project.name,
-        projectId: sourceProject?.id ?? project.id,
-        writable: false,
-        sourceRevisionId: sourceProject?.currentSpecVersionId ?? project.currentSpecVersionId,
-        sourceFingerprint: sourceProject?.sourceFingerprint ?? project.sourceFingerprint,
+        state,
     });
     writeOutput({
         ok: true,
-        origin,
+        origin: applied.origin,
         outputDir,
-        files: filesResponse.files.length,
+        files: applied.files,
+        runtime: applied.runtime,
     }, parsed, io, (record) => {
         const value = record;
-        return `Cloned ${value.origin.owner}/${value.origin.project} to ${value.outputDir} (${value.files} file(s)).`;
+        return `Cloned ${value.origin.remote} to ${value.outputDir} (${value.files} file(s)).`;
     });
     return 0;
 }
@@ -3185,167 +3104,60 @@ async function pullProject(argv, io) {
     const parsed = parseArgs(argv);
     rejectUnknownFlags(parsed, new Set(["dir", "dry-run", "json"]));
     if (parsed.positionals.length > 0) {
-        throw new UsageError("workbench pull updates the current origin; use workbench clone OWNER/BENCHMARK[@REF] DIR for a new directory.");
+        throw new UsageError("workbench pull updates the current origin; use workbench clone OWNER/BENCHMARK DIR for a new directory.");
     }
     const dir = resolveDir(parsed);
     const origin = await requireWorkbenchOrigin(dir);
-    const filesResponse = origin.writable
-        ? await apiRequest(projectApiPath(origin.projectId, "/source"), {}, await effectiveBaseUrl(origin.baseUrl))
-        : await apiRequest(publicProjectSourceApiPath({ owner: origin.owner, project: origin.project }), {}, await effectiveBaseUrl(origin.baseUrl));
+    const baseUrl = await effectiveBaseUrl(origin.baseUrl);
+    const remoteRef = parseOriginRemote(origin);
+    const state = await apiRequest(publicProjectStateApiPath(remoteRef), {}, baseUrl);
     if (parsed.flags["dry-run"] === true) {
         writeOutput({
             ok: true,
             dryRun: true,
             dir,
-            fileCount: filesResponse.files.length,
-        }, parsed, io, () => `Would pull ${filesResponse.files.length} source file(s) into ${dir}.`);
+            fileCount: state.source.files.length,
+            runtime: runtimeBundleStats(state.runtime),
+            sourceFingerprint: state.source.fingerprint ?? state.base.sourceFingerprint ?? null,
+            runtimeFingerprint: state.base.runtimeFingerprint ?? null,
+        }, parsed, io, () => `Would pull ${state.source.files.length} source file(s) and runtime history into ${dir}.`);
         return 0;
     }
-    await syncSourceFiles(dir, filesResponse.files);
-    const sourceProject = filesResponse.benchmark;
-    const nextOrigin = await writeWorkbenchOrigin(dir, {
-        ...origin,
-        ...(sourceProject?.ownerUsername ? { owner: sourceProject.ownerUsername } : {}),
-        ...(sourceProject?.name ? { project: sourceProject.name } : {}),
-        ...(sourceProject?.id ? { projectId: sourceProject.id } : {}),
-        ...(sourceProject?.currentSpecVersionId ? { sourceRevisionId: sourceProject.currentSpecVersionId } : {}),
-        ...(sourceProject?.sourceFingerprint ? { sourceFingerprint: sourceProject.sourceFingerprint } : {}),
-    });
-    writeOutput({
-        ok: true,
-        origin: nextOrigin,
+    const applied = await applyProjectStateToLocal({
         dir,
-        files: filesResponse.files.length,
-    }, parsed, io, (record) => {
-        const value = record;
-        return `Pulled ${value.files} source file(s) into ${value.dir}.`;
-    });
-    return 0;
-}
-async function fetchProject(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "json"]));
-    if (parsed.positionals.length > 0) {
-        throw new UsageError("workbench fetch updates the current remote cache; use workbench clone OWNER/BENCHMARK[@REF] DIR for a new directory.");
-    }
-    const dir = resolveDir(parsed);
-    const origin = await requireWorkbenchOrigin(dir);
-    const filesResponse = await readRemoteSourceFiles(origin);
-    const fetchRoot = path.join(dir, ".workbench", "fetch");
-    await fs.rm(fetchRoot, { force: true, recursive: true });
-    await fs.mkdir(fetchRoot, { recursive: true });
-    await writeFiles(path.join(fetchRoot, "source"), filesResponse.files);
-    const sourceProject = filesResponse.benchmark;
-    const nextOrigin = await writeWorkbenchOrigin(dir, {
-        ...origin,
-        ...(sourceProject?.ownerUsername ? { owner: sourceProject.ownerUsername } : {}),
-        ...(sourceProject?.name ? { project: sourceProject.name } : {}),
-        ...(sourceProject?.id ? { projectId: sourceProject.id } : {}),
-        ...(sourceProject?.currentSpecVersionId ? { sourceRevisionId: sourceProject.currentSpecVersionId } : {}),
-        ...(sourceProject?.sourceFingerprint ? { sourceFingerprint: sourceProject.sourceFingerprint } : {}),
+        baseUrl,
+        state,
+        origin,
+        requireCleanSource: true,
     });
-    await fs.writeFile(path.join(fetchRoot, "manifest.json"), `${JSON.stringify({
-        fetchedAt: new Date().toISOString(),
-        origin: nextOrigin,
-        files: filesResponse.files.map((file) => file.path),
-    }, null, 2)}\n`);
     writeOutput({
         ok: true,
-        origin: nextOrigin,
+        origin: applied.origin,
         dir,
-        fetchRoot,
-        files: filesResponse.files.length,
+        files: applied.files,
+        runtime: applied.runtime,
     }, parsed, io, (record) => {
         const value = record;
-        return `Fetched ${value.files} source file(s) into ${value.fetchRoot}.`;
-    });
-    return 0;
-}
-async function readRemoteSourceFiles(origin) {
-    return origin.writable
-        ? await apiRequest(projectApiPath(origin.projectId, "/source"), {}, await effectiveBaseUrl(origin.baseUrl))
-        : await apiRequest(publicProjectSourceApiPath({ owner: origin.owner, project: origin.project }), {}, await effectiveBaseUrl(origin.baseUrl));
-}
-async function runRemoteCommand(argv, io) {
-    const command = argv[0] ?? "show";
-    switch (command) {
-        case "show":
-            return await remoteShow(argv.slice(1), io);
-        case "add":
-            return await remoteAdd(argv.slice(1), io, "add");
-        case "set-url":
-            return await remoteAdd(argv.slice(1), io, "set-url");
-        case "remove":
-            return await remoteRemove(argv.slice(1), io);
-        default:
-            throw new UsageError(`Unknown command: remote ${argv.join(" ")}`);
-    }
-}
-async function remoteShow(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "json"]));
-    const origin = await requireWorkbenchOrigin(resolveDir(parsed));
-    writeOutput({ ok: true, remote: "origin", origin }, parsed, io, (record) => {
-        const value = record;
-        return [
-            `origin\t${value.origin.owner}/${value.origin.project}`,
-            `url\t${value.origin.baseUrl}`,
-            `writable\t${value.origin.writable ? "yes" : "no"}`,
-            ...(value.origin.sourceFingerprint ? [`fingerprint\t${value.origin.sourceFingerprint}`] : []),
-        ].join("\n");
-    });
-    return 0;
-}
-async function remoteAdd(argv, io, command) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "json"]));
-    const [name, refValue] = parsed.positionals;
-    if (name !== "origin" || !refValue || parsed.positionals.length !== 2) {
-        throw new UsageError(`workbench remote ${command} accepts: origin OWNER/BENCHMARK[@REF].`);
-    }
-    const ref = parseBenchmarkRef(refValue);
-    const baseUrl = await effectiveBaseUrl();
-    const project = await resolveRemoteProject(formatBenchmarkRef(ref), baseUrl);
-    const origin = await writeWorkbenchOrigin(resolveDir(parsed), {
-        baseUrl,
-        owner: project.ownerUsername ?? ref.owner,
-        project: project.name ?? ref.project,
-        projectId: project.id,
-        writable: false,
-        ...(project.currentSpecVersionId ? { sourceRevisionId: project.currentSpecVersionId } : {}),
-        ...(project.sourceFingerprint ? { sourceFingerprint: project.sourceFingerprint } : {}),
+        return `Pulled ${value.files} source file(s) into ${value.dir}.`;
     });
-    writeOutput({ ok: true, remote: "origin", origin }, parsed, io, () => `Set origin to ${origin.owner}/${origin.project}.`);
-    return 0;
-}
-async function remoteRemove(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "json"]));
-    const [name] = parsed.positionals;
-    if (name !== "origin" || parsed.positionals.length !== 1) {
-        throw new UsageError("workbench remote remove accepts: origin.");
-    }
-    const originPath = workbenchOriginPath(resolveDir(parsed));
-    const existed = await fileIsReadable(originPath);
-    await fs.rm(originPath, { force: true });
-    writeOutput({ ok: true, remote: "origin", removed: existed, path: originPath }, parsed, io, () => existed
-        ? `Removed origin (${originPath}).`
-        : `No origin configured (${originPath}).`);
     return 0;
 }
-async function starProject(argv, io, starred) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["json"]));
-    const ref = readRequiredBenchmarkRef(parsed);
-    if (parsed.positionals.length > 1) {
-        throw new UsageError(`${starred ? "workbench cloud star" : "workbench cloud unstar"} accepts exactly one OWNER/BENCHMARK ref.`);
+async function applyProjectStateToLocal(args) {
+    if (args.requireCleanSource === true && args.origin) {
+        await assertLocalSourceMatchesOrigin(args.dir, args.origin);
     }
-    const response = await apiRequest(`${publicProjectApiPath(ref)}/star`, { method: starred ? "PUT" : "DELETE" }, await effectiveBaseUrl());
-    writeOutput({ ok: true, benchmark: response.benchmark }, parsed, io, (record) => {
-        const value = record;
-        return `${starred ? "Starred" : "Unstarred"} ${formatBenchmarkRef(ref)}; ${value.benchmark.starCount} star(s).`;
+    await syncSourceFiles(args.dir, args.state.source.files);
+    const benchmarkFingerprint = localBenchmarkFingerprint(await readLocalProjectSource(args.dir));
+    const runtimeImport = await importLocalRuntimeBundle(args.dir, args.state.runtime, benchmarkFingerprint);
+    const origin = await writeWorkbenchOriginFromState(args.dir, {
+        baseUrl: args.baseUrl,
+        state: args.state,
     });
-    return 0;
+    return {
+        origin,
+        files: args.state.source.files.length,
+        runtime: runtimeImport.stats,
+    };
 }
 async function retryHostedWorkflow(argv, io) {
     const parsed = parseArgs(argv);
@@ -3357,7 +3169,7 @@ async function retryHostedWorkflow(argv, io) {
         "timeout-ms",
         "json",
     ]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud retry", 1);
+    rejectUnexpectedPositionals(parsed, "workbench retry --hosted", 1);
     const targetId = parsed.positionals[0];
     if (!targetId) {
         throw new UsageError("Missing required TARGET_ID.");
@@ -3378,19 +3190,21 @@ async function retryHostedWorkflow(argv, io) {
         method: "POST",
         body: retryTarget.request,
     }, target.baseUrl);
-    const startedRun = withRunUrls(target, response.run);
+    const runTarget = hostedTargetForRunStartResponse(target, response);
+    const startedRun = withRunUrls(runTarget, response.run);
     if (parsed.flags.watch === true) {
         if (parsed.flags.json !== true) {
             io.stdout.write(`${formatHostedRunStarted(startedRun, retryTarget.workflow).trimEnd()}\n${HOSTED_WATCH_LIFECYCLE_NOTE}\n`);
         }
         const watched = await watchHostedRun({
             parsed,
-            target,
+            target: runTarget,
             runId: response.run.id,
             intervalMs: watchIntervalMs ?? 1000,
             timeoutMs: watchTimeoutMs,
         });
-        const outputRun = withRunUrls(target, await withHostedRunFailureSummary(target, watched));
+        const outputRun = withRunUrls(runTarget, await withHostedRunFailureSummary(runTarget, watched));
+        await tryImportTerminalHostedProjectState({ target: runTarget, io });
         const result = {
             ok: hostedRunSucceeded(watched),
             retried: {
@@ -3435,7 +3249,7 @@ async function resolveHostedRetryTarget(target, targetId) {
         throw new UsageError(`Run ${run.id} is ${run.status}; wait for it to finish before retrying.`);
     }
     if (!hostedRunRecordFailed(run)) {
-        throw new UsageError(`Run ${run.id} did not fail; use workbench cloud ${run.workflow ?? "eval"} to intentionally run it again.`);
+        throw new UsageError(`Run ${run.id} did not fail; use workbench ${run.workflow ?? "eval"} --hosted to intentionally run it again.`);
     }
     if (run.workflow === "eval") {
         const candidateId = hostedRunEvaluationCandidateId(run, detail.jobs);
@@ -3485,7 +3299,7 @@ async function resolveHostedEvaluationRetryTarget(target, evaluationId) {
     }
     const run = snapshot.runs.find((entry) => entry.id === evaluation.runId) ?? null;
     if (!evaluationScorecardFailed(evaluation, run)) {
-        throw new UsageError(`Evaluation ${evaluation.id} did not fail; use workbench cloud eval to intentionally run it again.`);
+        throw new UsageError(`Evaluation ${evaluation.id} did not fail; use workbench eval --hosted to intentionally run it again.`);
     }
     if (!run) {
         throw new UsageError(`Evaluation ${evaluation.id} is missing its run record.`);
@@ -3527,6 +3341,25 @@ function uniqueCaseSamplePairs(pairs) {
 async function readHostedRunDetail(target, runId) {
     return await apiRequest(projectApiPath(target.projectId, `/runs/${encodeURIComponent(runId)}`), {}, target.baseUrl);
 }
+async function tryImportTerminalHostedProjectState(args) {
+    const origin = args.target.origin;
+    if (!origin || origin.projectId !== args.target.projectId) {
+        return;
+    }
+    try {
+        const state = await apiRequest(projectApiPath(args.target.projectId, "/state"), {}, args.target.baseUrl);
+        await applyProjectStateToLocal({
+            dir: args.target.dir,
+            baseUrl: args.target.baseUrl,
+            state,
+            origin,
+            requireCleanSource: true,
+        });
+    }
+    catch (error) {
+        args.io.stderr.write(`Hosted run finished, but local project state was not updated: ${errorMessage(error)}\n`);
+    }
+}
 function hostedRetrySourceYaml(run, runId) {
     const sourceYaml = stringValue(readRecord(run.input)?.sourceYaml);
     if (!sourceYaml) {
@@ -3542,12 +3375,10 @@ function hostedRunRecordFailed(run) {
 }
 async function startHostedWorkflow(workflow, argv, io) {
     const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set([
+    const allowedFlags = new Set([
         "dir",
         "benchmark",
-        "base",
         "runs",
-        "budget",
         "samples",
         "rerun",
         "watch",
@@ -3555,14 +3386,19 @@ async function startHostedWorkflow(workflow, argv, io) {
         "interval-ms",
         "timeout-ms",
         "json",
-    ]));
-    if (parsed.positionals.length > 1) {
-        throw new UsageError(`workbench cloud ${workflow} accepts at most one source file or directory argument.`);
+    ]);
+    if (workflow === "eval") {
+        allowedFlags.add("candidate");
     }
-    const sourceArg = parsed.positionals[0] ?? asOptionalString(parsed.flags.dir) ?? process.cwd();
-    if (parsed.positionals.length > 0 && parsed.flags.dir !== undefined) {
-        throw new UsageError("Use either --dir or SOURCE, not both.");
+    else {
+        allowedFlags.add("base");
+        allowedFlags.add("budget");
     }
+    rejectUnknownFlags(parsed, allowedFlags);
+    if (parsed.positionals.length > 1) {
+        throw new UsageError(`workbench ${workflow} --hosted accepts at most one source file or directory argument.`);
+    }
+    const sourceArg = resolveSourceDir(parsed);
     const samples = parsePositiveInt(parsed.flags.samples, 1, "samples");
     const budget = workflow === "improve"
         ? parsePositiveInt(parsed.flags.budget, 1, "budget")
@@ -3575,7 +3411,7 @@ async function startHostedWorkflow(workflow, argv, io) {
     const defaultProjectSource = await readLocalProjectSource(path.resolve(sourceArg));
     const selectedRunIds = workflow === "eval"
         ? resolveCandidateRunSelection(defaultProjectSource, runsFlag)
-        : [singleRequestedRunId(runsFlag, `workbench cloud ${workflow}`) ?? defaultProjectSource.candidateRunId];
+        : [singleRequestedRunId(runsFlag, `workbench ${workflow} --hosted`) ?? defaultProjectSource.candidateRunId];
     if (workflow === "eval" && selectedRunIds.length > 1) {
         let failed = 0;
         const results = [];
@@ -3599,25 +3435,27 @@ async function startHostedWorkflow(workflow, argv, io) {
         }, parsed, io, () => `Processed ${selectedRunIds.length} hosted candidate run(s); ${failed} failed.`);
         return failed === 0 ? 0 : 1;
     }
-    const baseCandidateId = asOptionalString(parsed.flags.base);
+    const selectedCandidateId = workflow === "eval"
+        ? asOptionalString(parsed.flags.candidate)
+        : asOptionalString(parsed.flags.base);
     const request = workflow === "improve"
         ? {
             workflow,
             budget,
             samples,
-            ...(baseCandidateId ? { candidateId: baseCandidateId } : {}),
+            ...(selectedCandidateId ? { candidateId: selectedCandidateId } : {}),
         }
         : {
             workflow,
             samples,
-            ...(baseCandidateId ? { candidateId: baseCandidateId } : {}),
+            ...(selectedCandidateId ? { candidateId: selectedCandidateId } : {}),
         };
     const projectSource = selectedRunIds[0] === defaultProjectSource.candidateRunId
         ? defaultProjectSource
         : await readLocalProjectSource(path.resolve(sourceArg), { runId: selectedRunIds[0] });
     request.sourceYaml = projectSource.specSource;
     request.adapterFiles = projectSource.adapterFiles;
-    if (workflow === "eval" && !baseCandidateId) {
+    if (workflow === "eval" && !selectedCandidateId) {
         request.candidateFiles = projectSource.candidateFiles;
     }
     if (parsed.flags.rerun === true) {
@@ -3652,22 +3490,25 @@ async function startHostedWorkflow(workflow, argv, io) {
             parsed,
             target,
             samples: request.samples,
-            candidateId: baseCandidateId,
+            candidateId: selectedCandidateId,
             sourceYaml: projectSource.specSource,
             adapterFiles: projectSource.adapterFiles,
             intervalMs: watchIntervalMs ?? 1000,
             timeoutMs: watchTimeoutMs,
+            io,
         });
     }
     const response = await apiRequest(projectApiPath(target.projectId, "/runs"), {
         method: "POST",
         body: request,
     }, target.baseUrl);
-    const startedRun = withRunUrls(target, response.run);
+    const runTarget = hostedTargetForRunStartResponse(target, response);
+    const startedRun = withRunUrls(runTarget, response.run);
     const startedRunOutput = response.reused === true
         ? { ...startedRun, reused: true }
         : startedRun;
     if (response.reused === true && response.run.status === "finished") {
+        await tryImportTerminalHostedProjectState({ target: runTarget, io });
         writeOutput({
             ok: hostedRunSucceeded(response.run),
             reused: true,
@@ -3683,13 +3524,14 @@ async function startHostedWorkflow(workflow, argv, io) {
         }
         const watched = await watchHostedRun({
             parsed,
-            target,
+            target: runTarget,
             runId: response.run.id,
             intervalMs: watchIntervalMs ?? 1000,
             timeoutMs: watchTimeoutMs,
         });
-        const outputRun = await withHostedRunFailureSummary(target, watched);
-        writeOutput(withRunUrls(target, outputRun), parsed, io, formatHostedRunResult);
+        const outputRun = await withHostedRunFailureSummary(runTarget, watched);
+        await tryImportTerminalHostedProjectState({ target: runTarget, io });
+        writeOutput(withRunUrls(runTarget, outputRun), parsed, io, formatHostedRunResult);
         return hostedRunSucceeded(watched) ? 0 : 1;
     }
     writeOutput(startedRunOutput, parsed, io, (run) => formatHostedRunStarted(run, workflow).trimEnd());
@@ -3721,9 +3563,10 @@ async function ensureHostedImproveBaseCandidate(args) {
             ...(args.adapterFiles.length > 0 ? { adapterFiles: args.adapterFiles } : {}),
         },
     }, args.target.baseUrl);
+    const runTarget = hostedTargetForRunStartResponse(args.target, response);
     const watched = await watchHostedRun({
         parsed: args.parsed,
-        target: args.target,
+        target: runTarget,
         runId: response.run.id,
         intervalMs: args.intervalMs,
         timeoutMs: args.timeoutMs,
@@ -3734,14 +3577,14 @@ async function ensureHostedImproveBaseCandidate(args) {
     if (!watched.candidateId) {
         throw new UsageError(`Parent candidate eval ${watched.id} did not produce a candidate.`);
     }
+    await tryImportTerminalHostedProjectState({ target: runTarget, io: args.io });
     return watched.candidateId;
 }
 function hostedWorkflowArgsForRun(args) {
     const next = ["--dir", args.sourceDir, "--runs", args.runId, "--json"];
     appendStringFlag(next, "benchmark", asOptionalString(args.parsed.flags.benchmark));
-    appendStringFlag(next, "base", asOptionalString(args.parsed.flags.base));
+    appendStringFlag(next, "candidate", asOptionalString(args.parsed.flags.candidate));
     appendStringFlag(next, "samples", asOptionalString(args.parsed.flags.samples));
-    appendStringFlag(next, "budget", asOptionalString(args.parsed.flags.budget));
     appendStringFlag(next, "interval-ms", asOptionalString(args.parsed.flags["interval-ms"]));
     appendStringFlag(next, "timeout-ms", asOptionalString(args.parsed.flags["timeout-ms"]));
     if (args.parsed.flags.watch === true) {
@@ -3776,310 +3619,11 @@ async function readEvaluatedActiveHostedCandidate(target) {
 function hostedCandidateIsEvaluated(candidate) {
     return candidate.status === "evaluated" || candidate.eval != null;
 }
-async function benchmarkList(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud benchmarks list", 0);
-    const response = await apiRequest("/api/workbench/public/benchmarks");
-    writeOutput(response.benchmarks, parsed, io, (projects) => {
-        if (projects.length === 0) {
-            return "No hosted Workbench benchmarks.";
-        }
-        return projects
-            .map((project) => `${project.id}\t${project.name}\t${project.runCount} runs\t${project.candidateCount} candidates`)
-            .join("\n");
-    });
-    return 0;
-}
-async function benchmarkShow(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud benchmarks show", 1);
-    const dir = resolveDir(parsed);
-    const origin = await readWorkbenchOrigin(dir);
-    const projectRef = parsed.positionals[0] ??
-        origin?.projectId;
-    if (!projectRef) {
-        throw new UsageError("Missing hosted benchmark. Pass OWNER/BENCHMARK, run workbench push, or run workbench clone.");
-    }
-    const response = await apiRequest(benchmarkApiPath(projectRef), {}, await effectiveBaseUrl(origin?.baseUrl));
-    writeOutput(response.benchmark, parsed, io, (project) => {
-        const record = project;
-        return `${record.name} (${record.id})\n${record.runs.length} runs\n${record.candidates.length} candidates`;
-    });
-    return 0;
-}
-async function benchmarkDelete(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "dry-run", "json"]));
-    if (parsed.positionals.length > 1) {
-        throw new UsageError(`Unexpected argument for workbench benchmarks delete: ${parsed.positionals.slice(1).join(" ")}`);
-    }
-    const dir = resolveDir(parsed);
-    const origin = await readWorkbenchOrigin(dir);
-    const projectRef = parsed.positionals[0] ??
-        origin?.projectId;
-    if (!projectRef) {
-        throw new UsageError("Missing hosted benchmark. Pass OWNER/BENCHMARK, run workbench push, or run workbench clone.");
-    }
-    const originPath = workbenchOriginPath(dir);
-    const baseUrl = await effectiveBaseUrl(origin?.baseUrl);
-    if (parsed.flags["dry-run"] === true) {
-        const originProjectDeleted = originMatchesProjectRef(origin, projectRef);
-        writeOutput({
-            ok: true,
-            dryRun: true,
-            projectRef,
-            ...(isRemoteProjectId(projectRef) ? { projectId: projectRef } : {}),
-            ...(originProjectDeleted && origin?.project ? { projectName: origin.project } : {}),
-            baseUrl,
-            ...(originProjectDeleted ? { originPath } : {}),
-        }, parsed, io, () => originProjectDeleted
-            ? `Would delete hosted benchmark ${projectRef} and remove local origin ${originPath}.`
-            : `Would delete hosted benchmark ${projectRef}.`);
-        return 0;
-    }
-    const project = await resolveRemoteProject(projectRef, baseUrl);
-    const projectId = project.id;
-    const projectName = project.name;
-    const originProjectDeleted = origin ? origin.projectId === projectId : false;
-    await apiRequest(projectApiPath(projectId), { method: "DELETE" }, baseUrl);
-    if (originProjectDeleted) {
-        await fs.rm(originPath, { force: true });
-    }
-    writeOutput({
-        ok: true,
-        deleted: true,
-        projectId,
-        ...(projectName ? { projectName } : {}),
-        originRemoved: originProjectDeleted,
-        ...(originProjectDeleted ? { originPath } : {}),
-    }, parsed, io, () => originProjectDeleted
-        ? `Deleted benchmark ${formatProjectRef(project)} and removed local origin ${originPath}.`
-        : `Deleted benchmark ${formatProjectRef(project)}.`);
-    return 0;
-}
-async function benchmarkVersions(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud benchmarks versions", 1);
-    const projectRef = parsed.positionals[0];
-    const origin = await readWorkbenchOrigin(resolveDir(parsed));
-    if (!projectRef && !origin) {
-        throw new UsageError("Missing benchmark ref. Pass OWNER/BENCHMARK or run from a benchmark clone.");
-    }
-    const response = await apiRequest(benchmarkApiPath(projectRef ?? origin.projectId), {}, await effectiveBaseUrl(origin?.baseUrl));
-    const version = response.benchmark.sourceFingerprint ?? response.benchmark.currentSpecVersionId ?? "current";
-    writeOutput({
-        ok: true,
-        benchmark: response.benchmark,
-        versions: [{ ref: "main", digest: version, current: true }],
-    }, parsed, io, () => `${response.benchmark.name ?? projectRef ?? origin.project}\tmain\t${shortDigest(version)}\tcurrent`);
-    return 0;
-}
-async function benchmarkStarred(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud benchmarks starred", 0);
-    const response = await apiRequest("/api/workbench/benchmarks");
-    const starred = response.benchmarks.filter((project) => project.viewerHasStarred === true);
-    writeOutput(starred, parsed, io, (benchmarks) => {
-        if (benchmarks.length === 0) {
-            return "No starred benchmarks.";
-        }
-        return benchmarks
-            .map((benchmark) => `${benchmark.ownerUsername ?? "-"} / ${benchmark.name ?? "-"}\t${benchmark.starCount ?? 0} stars`)
-            .join("\n");
-    });
-    return 0;
-}
-async function candidateList(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud candidates list", 0);
-    const target = await resolveHostedTarget(parsed);
-    const response = await apiRequest(projectApiPath(target.projectId, "/candidates"), {}, target.baseUrl);
-    writeOutput(response.candidates, parsed, io, (candidates) => {
-        if (candidates.length === 0) {
-            return "No candidates yet.";
-        }
-        return candidates
-            .map((candidate) => `${candidate.id}\t${candidate.status}\t${candidate.fileChanges?.length ?? 0} files`)
-            .join("\n");
-    });
-    return 0;
-}
-async function candidateShow(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud candidates show", 1);
-    const target = await resolveHostedTarget(parsed);
-    const candidateId = readRequiredCandidateId(parsed);
-    const params = new URLSearchParams({ id: candidateId });
-    const candidate = await apiRequest(projectApiPath(target.projectId, `/workbench/record?${params.toString()}`), {}, target.baseUrl);
-    writeOutput(candidate, parsed, io, (record) => {
-        const value = record;
-        return [
-            `${value.id ?? candidateId}\t${value.status ?? "unknown"}`,
-            ...(value.benchmarkFingerprint ? [`Benchmark version: ${shortDigest(value.benchmarkFingerprint)}`] : []),
-            ...(value.candidateFingerprint ? [`Candidate digest: ${shortDigest(value.candidateFingerprint)}`] : []),
-        ].join("\n");
-    });
-    return 0;
-}
-async function candidateFiles(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud candidates files", 1);
-    const target = await resolveHostedTarget(parsed);
-    const candidateId = readRequiredCandidateId(parsed);
-    const response = await apiRequest(projectApiPath(target.projectId, `/candidates/${encodeURIComponent(candidateId)}/files`), {}, target.baseUrl);
-    writeOutput(response.files, parsed, io, (files) => files
-        .map((file) => `${file.path}\t${file.status}\t${file.preview_kind}`)
-        .join("\n") || "No files.");
-    return 0;
-}
-async function candidatePreview(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "path", "output", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud candidates preview", 1);
-    const target = await resolveHostedTarget(parsed);
-    const candidateId = readRequiredCandidateId(parsed);
-    const filePath = requireFlag(parsed, "path");
-    const params = new URLSearchParams({ path: filePath });
-    const response = await apiRequest(projectApiPath(target.projectId, `/candidates/${encodeURIComponent(candidateId)}/files?${params.toString()}`), {}, target.baseUrl);
-    const content = response.preview.source?.content ??
-        response.preview.rendered_html ??
-        response.preview.diff ??
-        "";
-    const outputPath = asOptionalString(parsed.flags.output);
-    if (outputPath && outputPath !== "-") {
-        await fs.writeFile(outputPath, content);
-        io.stdout.write(`Wrote preview to ${outputPath}\n`);
-    }
-    else if (parsed.flags.json === true) {
-        writeJson(response.preview, io);
-    }
-    else {
-        io.stdout.write(content);
-    }
-    return 0;
-}
-async function candidateExport(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "out", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud candidates pull", 1);
-    const target = await resolveHostedTarget(parsed);
-    const candidateId = readRequiredCandidateId(parsed);
-    const outputDir = requireOutDir(parsed);
-    const response = await apiRequest(projectApiPath(target.projectId, `/candidates/${encodeURIComponent(candidateId)}/export`), {}, target.baseUrl);
-    await writeFiles(outputDir, response.files);
-    writeOutput({ ok: true, outputDir, files: response.files.length }, parsed, io, (result) => {
-        const record = result;
-        return `Exported ${record.files} file(s) to ${record.outputDir}`;
-    });
-    return 0;
-}
-async function candidateVisibility(argv, io, visibility) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, `workbench cloud candidates ${visibility === "public" ? "publish" : "unpublish"}`, 1);
-    const target = await resolveHostedTarget(parsed, { requireProjectIdentity: true });
-    const candidateId = readRequiredCandidateId(parsed);
-    const response = await apiRequest(projectApiPath(target.projectId, `/candidates/${encodeURIComponent(candidateId)}/publish`), { method: visibility === "public" ? "PUT" : "DELETE" }, target.baseUrl);
-    writeOutput({ ok: true, visibility, candidate: response.candidate }, parsed, io, () => `${visibility === "public" ? "Published" : "Unpublished"} candidate ${candidateId}.`);
-    return 0;
-}
-async function runList(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud runs list", 0);
-    const target = await resolveHostedTarget(parsed);
-    const response = await apiRequest(projectApiPath(target.projectId, "/runs"), {}, target.baseUrl);
-    writeOutput(response.runs, parsed, io, (runs) => runs
-        .map((run) => `${run.id}\t${run.status}\t${run.candidateId ?? "pending"}`)
-        .join("\n") || "No runs.");
-    return 0;
-}
-async function runShow(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud runs show", 1);
-    const target = await resolveHostedTarget(parsed, { requireProjectIdentity: true });
-    const runId = readRequiredRunId(parsed);
-    const response = await apiRequest(projectApiPath(target.projectId, `/runs/${encodeURIComponent(runId)}`), {}, target.baseUrl);
-    const detail = withRunDetailUrls(target, response);
-    writeOutput(detail, parsed, io, formatRunDetail);
-    return 0;
-}
-async function runCancel(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud runs cancel", 1);
-    const target = await resolveHostedTarget(parsed, { requireProjectIdentity: true });
-    const runId = readRequiredRunId(parsed);
-    const response = await apiRequest(projectApiPath(target.projectId, `/runs/${encodeURIComponent(runId)}`), { method: "DELETE" }, target.baseUrl);
-    const run = withRunUrls(target, response.run);
-    writeOutput(run, parsed, io, (record) => {
-        const value = record;
-        return [
-            `Cancelled run ${value.id}; status ${value.status}; outcome ${value.outcome ?? "cancelled"}.`,
-            `Open benchmark: ${value.urls?.benchmark ?? buildWorkbenchResourceUrls(target).benchmark}`,
-        ].join("\n");
-    });
-    return 0;
-}
-async function runWatch(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "interval-ms", "timeout-ms", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud watch", 1);
-    const target = await resolveHostedTarget(parsed, { requireProjectIdentity: true });
-    const runId = readRequiredRunId(parsed);
-    if (parsed.flags.json !== true) {
-        io.stdout.write(`Watching run ${runId}.\n${HOSTED_WATCH_LIFECYCLE_NOTE}\n`);
-    }
-    const run = await watchHostedRun({
-        parsed,
-        target,
-        runId,
-        intervalMs: parsePositiveInt(parsed.flags["interval-ms"], 1000, "interval-ms"),
-        timeoutMs: parseOptionalPositiveInt(parsed.flags["timeout-ms"], "timeout-ms"),
-    });
-    const outputRun = await withHostedRunFailureSummary(target, run);
-    writeOutput(withRunUrls(target, outputRun), parsed, io, formatHostedRunResult);
-    return hostedRunSucceeded(run) ? 0 : 1;
-}
-async function runLogs(argv, io) {
-    const parsed = parseArgs(argv);
-    rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
-    rejectUnexpectedPositionals(parsed, "workbench cloud logs", 1);
-    const target = await resolveHostedTarget(parsed);
-    const requestedRunId = parsed.positionals[0];
-    if (requestedRunId) {
-        const response = await apiRequest(projectApiPath(target.projectId, `/runs/${encodeURIComponent(requestedRunId)}`), {}, target.baseUrl);
-        writeOutput({ runId: response.run.id, jobs: response.jobs }, parsed, io, formatRunLogs);
-        return 0;
-    }
-    const project = (await apiRequest(projectApiPath(target.projectId), {}, target.baseUrl)).project;
-    const runId = project.runs.at(-1)?.id;
-    if (!runId) {
-        throw new UsageError("Missing RUN_ID; the benchmark has no runs.");
-    }
-    const jobs = project.jobs.filter((job) => job.runId === runId);
-    writeOutput({ runId, jobs }, parsed, io, formatRunLogs);
-    return 0;
-}
-function formatRunLogs(record) {
-    const value = record;
-    return (value.jobs
-        .map((job) => `${job.id}\t${job.kind}\t${job.status}\t${job.candidateId ?? "-"}${job.error ? `\t${job.error}` : ""}`)
-        .join("\n") || `No jobs for ${value.runId}.`);
-}
 async function openWorkbench(argv, io) {
     const parsed = parseArgs(argv);
     rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "no-open", "json"]));
     if (parsed.positionals.length > 1) {
-        throw new UsageError(`Unexpected argument for workbench open: ${parsed.positionals.slice(1).join(" ")}`);
+        throw new UsageError(`Unexpected argument for workbench open --hosted: ${parsed.positionals.slice(1).join(" ")}`);
     }
     const target = await resolveOpenTarget(parsed);
     const ref = target.openRef;
@@ -4134,11 +3678,12 @@ async function resolveHostedTarget(parsed, options = {}) {
     if (!projectId) {
         throw new UsageError("Missing hosted benchmark. Run workbench push, workbench clone, or pass --benchmark OWNER/BENCHMARK.");
     }
+    const originRemote = origin ? parseOriginRemote(origin) : null;
     return {
         projectId,
-        ...(!explicitProject && origin?.owner ? { owner: origin.owner } : {}),
-        ...(!explicitProject && origin?.project
-            ? { projectName: origin.project }
+        ...(!explicitProject && originRemote ? { owner: originRemote.owner } : {}),
+        ...(!explicitProject && originRemote
+            ? { projectName: originRemote.project }
             : {}),
         dir,
         baseUrl,
@@ -4176,13 +3721,12 @@ async function resolveHostedDryRunTarget(parsed, options = {}) {
         };
     }
     if (origin?.projectId) {
+        const originRemote = parseOriginRemote(origin);
         return {
-            projectRef: origin.owner && origin.project
-                ? `${origin.owner}/${origin.project}`
-                : origin.projectId,
+            projectRef: origin.remote,
             projectId: origin.projectId,
-            ...(origin.owner ? { owner: origin.owner } : {}),
-            ...(origin.project ? { projectName: origin.project } : {}),
+            owner: originRemote.owner,
+            projectName: originRemote.project,
             dir,
             baseUrl,
             origin,
@@ -4241,38 +3785,31 @@ function buildWorkbenchResourceUrls(target, refs = {}) {
 function projectApiPath(projectRef, suffix = "") {
     return `/api/workbench/benchmarks/${encodeURIComponent(projectRef)}${suffix}`;
 }
-function benchmarkApiPath(benchmarkRef) {
-    if (benchmarkRef.includes("/")) {
-        return publicProjectApiPath(parseBenchmarkRef(benchmarkRef));
-    }
-    return projectApiPath(benchmarkRef);
-}
 function publicProjectApiPath(ref) {
     return `/api/workbench/public/benchmarks/${encodeURIComponent(ref.owner)}/${encodeURIComponent(ref.project)}`;
 }
-function publicProjectSourceApiPath(ref) {
-    return `${publicProjectApiPath(ref)}/source`;
+function publicProjectStateApiPath(ref) {
+    return `${publicProjectApiPath(ref)}/state`;
 }
 function readRequiredBenchmarkRef(parsed) {
     const ref = parsed.positionals[0];
     if (!ref) {
-        throw new UsageError("Missing required OWNER/BENCHMARK ref.");
+        throw new UsageError("Missing required OWNER/BENCHMARK.");
     }
     return parseBenchmarkRef(ref);
 }
 function parseBenchmarkRef(value) {
-    const [namePart, versionRef, extraRef] = value.split("@");
-    if (extraRef !== undefined || !namePart) {
-        throw new UsageError("Benchmark refs must use OWNER/BENCHMARK[@REF].");
+    if (value.includes("@")) {
+        throw new UsageError("Benchmark refs must use OWNER/BENCHMARK.");
     }
-    const [owner, project, extra] = namePart.split("/");
+    const [owner, project, extra] = value.split("/");
     if (!owner || !project || extra !== undefined) {
-        throw new UsageError("Benchmark refs must use OWNER/BENCHMARK[@REF].");
+        throw new UsageError("Benchmark refs must use OWNER/BENCHMARK.");
     }
-    return { owner, project, ...(versionRef ? { ref: versionRef } : {}) };
+    return { owner, project };
 }
 function formatBenchmarkRef(ref) {
-    return `${ref.owner}/${ref.project}${ref.ref ? `@${ref.ref}` : ""}`;
+    return `${ref.owner}/${ref.project}`;
 }
 async function resolveRemoteProject(projectRef, baseUrl) {
     if (projectRef.includes("/")) {
@@ -4283,23 +3820,10 @@ async function resolveRemoteProject(projectRef, baseUrl) {
     const response = await apiRequest(projectApiPath(projectRef), {}, baseUrl);
     return response.benchmark;
 }
-function formatProjectRef(project) {
-    return project.name ? `${project.name} (${project.id})` : project.id;
-}
-function originMatchesProjectRef(origin, projectRef) {
-    if (!origin) {
-        return false;
-    }
-    if (origin.projectId === projectRef) {
-        return true;
-    }
-    if (!projectRef.includes("/")) {
-        return false;
-    }
-    const ref = parseBenchmarkRef(projectRef);
-    return origin.owner === ref.owner && origin.project === ref.project;
-}
 function withRunUrls(target, run) {
+    if (!target.owner || !target.projectName) {
+        return { ...run };
+    }
     return {
         ...run,
         urls: buildWorkbenchResourceUrls(target, {
@@ -4308,17 +3832,30 @@ function withRunUrls(target, run) {
         }),
     };
 }
-function withRunDetailUrls(target, detail) {
-    const candidateId = hostedRunEvaluationCandidateId(detail.run, detail.jobs);
-    const run = withRunUrls(target, {
-        ...detail.run,
-        outputCandidateId: detail.run.outputCandidateId ?? candidateId,
-    });
-    return {
-        run,
-        jobs: detail.jobs,
-        urls: run.urls ?? buildWorkbenchResourceUrls(target, { runId: run.id }),
+function hostedTargetForRunStartResponse(target, response) {
+    const projectId = response.benchmark?.id ?? response.run.projectId ?? target.projectId;
+    if (projectId === target.projectId && !response.benchmark) {
+        return target;
+    }
+    const origin = target.origin?.projectId === projectId ? target.origin : null;
+    const next = {
+        ...target,
+        projectId,
+        origin,
     };
+    if (response.benchmark?.ownerUsername) {
+        next.owner = response.benchmark.ownerUsername;
+    }
+    else {
+        delete next.owner;
+    }
+    if (response.benchmark?.name) {
+        next.projectName = response.benchmark.name;
+    }
+    else {
+        delete next.projectName;
+    }
+    return next;
 }
 function hostedRunEvaluationCandidateId(run, jobs = []) {
     if (run.outputCandidateId) {
@@ -4330,6 +3867,80 @@ function hostedRunEvaluationCandidateId(run, jobs = []) {
         .filter((candidateId) => Boolean(candidateId));
     return attemptCandidates.at(-1) ?? run.candidateId ?? null;
 }
+function localProjectState(args) {
+    const stateSource = localProjectStateSource(args.source);
+    const runtime = runtimeBundleForProjectVisibility(args.runtime, args.visibility);
+    const runtimeFingerprint = workbenchRuntimeBundleFingerprint(runtime);
+    return {
+        schema: "workbench.project.state.v1",
+        project: {
+            id: args.origin?.projectId ?? "",
+            remote: args.origin?.remote ?? `local/${args.source.spec.name}`,
+            ownerUsername: args.origin ? parseOriginRemote(args.origin).owner : "local",
+            name: args.origin ? parseOriginRemote(args.origin).project : args.source.spec.name,
+            visibility: args.visibility,
+        },
+        base: {
+            ...(args.origin ? { sourceRevisionId: args.origin.sourceRevisionId } : {}),
+            ...(args.origin ? { sourceFingerprint: args.origin.sourceFingerprint } : {}),
+            runtimeFingerprint: args.origin?.runtimeFingerprint ?? runtimeFingerprint,
+        },
+        source: stateSource,
+        runtime,
+    };
+}
+function localCandidateRecord(candidate) {
+    return {
+        ...candidate,
+        visibility: "private",
+    };
+}
+function runtimeBundleForProjectVisibility(runtime, visibility) {
+    return {
+        ...runtime,
+        candidates: runtime.candidates.map((candidate) => ({
+            ...candidate,
+            visibility,
+        })),
+    };
+}
+function localProjectStateSource(source) {
+    const request = hostedProjectSourceRequest(source);
+    const stateSource = {
+        source: request.source,
+        files: source.sourceFiles.map((file) => ({ ...file })),
+        candidateFiles: request.candidateFiles.map(toSurfaceSnapshotFile),
+        engineResolveFiles: request.engineResolveFiles.map(toSurfaceSnapshotFile),
+        engineResolveBinding: request.engineResolveBinding,
+        adapterFiles: request.adapterFiles.map(toSurfaceSnapshotFile),
+        dockerfile: request.dockerfile,
+        runtimeDockerfile: request.runtimeDockerfile,
+        runtimeFiles: request.runtimeFiles.map(toSurfaceSnapshotFile),
+        network: request.network,
+        resources: { ...request.resources },
+    };
+    return {
+        ...stateSource,
+        fingerprint: workbenchProjectSourceFingerprint(stateSource),
+    };
+}
+function toSurfaceSnapshotFile(file) {
+    return {
+        path: file.path,
+        kind: "kind" in file ? file.kind : file.encoding === "base64" ? "binary" : "text",
+        encoding: file.encoding ?? "utf8",
+        content: file.content,
+        executable: file.executable === true,
+    };
+}
+function hostedProjectSummaryFromState(state) {
+    return {
+        id: state.project.id,
+        ownerUsername: state.project.ownerUsername,
+        name: state.project.name,
+        visibility: state.project.visibility,
+    };
+}
 function sourceFileCount(source) {
     return source.sourceFiles.length;
 }
@@ -4348,28 +3959,6 @@ function hostedProjectSourceRequest(source) {
         resources,
     };
 }
-function hostedEngineResolveFiles(source) {
-    return [
-        ...source.engineResolveFiles,
-        {
-            path: WORKBENCH_ADAPTER_RESULT_FILE,
-            content: `${JSON.stringify({
-                protocol: WORKBENCH_ADAPTER_RESULT_PROTOCOL,
-                operation: "engine.resolve",
-                ok: true,
-                value: {
-                    cases: source.engineCases,
-                    ...(source.engineResolveEnvironment
-                        ? { environment: source.engineResolveEnvironment }
-                        : {}),
-                },
-                feedback: {
-                    path: source.engineResolveFingerprintPath,
-                },
-            }, null, 2)}\n`,
-        },
-    ];
-}
 function isRemoteProjectId(value) {
     return /^wb_[a-f0-9]{12}$/u.test(value);
 }
@@ -4463,71 +4052,12 @@ function formatHostedRunStarted(run, fallbackWorkflow) {
         "",
     ].join("\n");
 }
-function formatRunDetail(record) {
-    const detail = record;
-    const { run, jobs, urls } = detail;
-    const cost = sumJobCostUsd(jobs);
-    const firstFailedJob = jobs.find((job) => job.status === "failed" && job.error);
-    const candidateId = hostedRunEvaluationCandidateId(run, jobs);
-    return [
-        `Run ${run.id}: ${run.status}${run.outcome ? ` (${run.outcome})` : ""}`,
-        `Workflow: ${run.workflow ?? "improve"}`,
-        `Candidate: ${candidateId ?? "pending"}`,
-        ...(run.activeCandidateId && candidateId && run.activeCandidateId !== candidateId
-            ? [`Active candidate: ${run.activeCandidateId}`]
-            : []),
-        `Samples: ${run.samples ?? 0}`,
-        `Attempts: ${run.attemptsExecuted ?? 0}/${run.attemptsRequested ?? run.attemptsExecuted ?? 0}`,
-        `Jobs: ${run.completedJobCount ?? jobs.filter(isTerminalRunJob).length}/${run.jobCount ?? jobs.length} completed${run.failedJobCount ? `; ${run.failedJobCount} failed` : ""}`,
-        ...(typeof run.durationMs === "number"
-            ? [`Duration: ${formatDurationMs(run.durationMs)}`]
-            : []),
-        ...(cost > 0 ? [`Cost: ${formatUsd(cost)}`] : []),
-        ...(firstFailedJob?.error
-            ? [`First failed job ${firstFailedJob.id}: ${firstFailedJob.error}`]
-            : []),
-        ...(urls.candidateEvaluation
-            ? [`Open evaluation: ${urls.candidateEvaluation}`]
-            : [`Open benchmark: ${urls.benchmark}`]),
-        ...(jobs.length > 0 ? ["", "Jobs:", ...jobs.map(formatRunJobLine)] : []),
-    ].join("\n");
-}
-function formatRunJobLine(job) {
-    return [
-        job.id,
-        readRunJobPurpose(job) ?? job.kind ?? "job",
-        job.status,
-        job.candidateId ?? "-",
-        job.error ?? "",
-    ].filter((value, index) => index < 4 || value !== "").join("\t");
-}
-function isTerminalRunJob(job) {
-    return job.status === "succeeded" || job.status === "failed" || job.status === "cancelled";
-}
 function readRunJobPurpose(job) {
     const input = readRecord(job.input);
     const execution = readRecord(input?.execution);
     const purpose = execution?.purpose;
     return typeof purpose === "string" && purpose ? purpose : null;
 }
-function sumJobCostUsd(jobs) {
-    const sum = jobs.reduce((total, job) => total + costUsdFromUsage(readRecord(job.output)?.usage), 0);
-    return Number.isFinite(sum) ? Math.round(sum * 1_000_000) / 1_000_000 : 0;
-}
-function costUsdFromUsage(value) {
-    const usage = readRecord(value);
-    if (!usage) {
-        return 0;
-    }
-    const direct = readFiniteNumber(usage.costUsd);
-    if (direct !== null) {
-        return direct;
-    }
-    return ["total", "improver", "runner", "engine"].reduce((sum, key) => {
-        const nested = readRecord(usage[key]);
-        return sum + (readFiniteNumber(nested?.costUsd) ?? 0);
-    }, 0);
-}
 function readRecord(value) {
     return value && typeof value === "object" && !Array.isArray(value)
         ? value
@@ -4545,24 +4075,6 @@ function integerValue(value) {
 function readFiniteNumber(value) {
     return typeof value === "number" && Number.isFinite(value) ? value : null;
 }
-function formatDurationMs(durationMs) {
-    if (durationMs < 1000) {
-        return `${Math.max(0, Math.round(durationMs))}ms`;
-    }
-    const seconds = durationMs / 1000;
-    if (seconds < 60) {
-        return `${seconds.toFixed(seconds < 10 ? 1 : 0)}s`;
-    }
-    const minutes = Math.floor(seconds / 60);
-    const remainingSeconds = Math.round(seconds % 60);
-    return `${minutes}m ${remainingSeconds}s`;
-}
-function formatUsd(value) {
-    return `$${value.toFixed(value < 1 ? 4 : 2)}`;
-}
-function shortDigest(value) {
-    return value.length > 12 ? value.slice(0, 12) : value;
-}
 async function withHostedRunFailureSummary(target, run) {
     if (hostedRunSucceeded(run) || run.error || (run.failedJobCount ?? 0) <= 0) {
         return run;
@@ -4592,23 +4104,44 @@ function hostedRunSucceeded(run) {
 async function readWorkbenchOrigin(dir) {
     try {
         const parsed = JSON.parse(await fs.readFile(workbenchOriginPath(dir), "utf8"));
-        if (!parsed.projectId ||
-            !parsed.baseUrl ||
-            !parsed.owner ||
-            !parsed.project ||
-            typeof parsed.writable !== "boolean") {
+        if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
+            throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
+        }
+        const originRecord = parsed;
+        const keys = Object.keys(originRecord).sort();
+        const expectedKeys = [
+            "baseUrl",
+            "linkedAt",
+            "projectId",
+            "remote",
+            "runtimeFingerprint",
+            "sourceFingerprint",
+            "sourceRevisionId",
+        ];
+        if (typeof originRecord.projectId !== "string" ||
+            typeof originRecord.baseUrl !== "string" ||
+            typeof originRecord.remote !== "string" ||
+            typeof originRecord.sourceRevisionId !== "string" ||
+            typeof originRecord.sourceFingerprint !== "string" ||
+            typeof originRecord.runtimeFingerprint !== "string" ||
+            typeof originRecord.linkedAt !== "string" ||
+            originRecord.projectId.length === 0 ||
+            originRecord.sourceRevisionId.length === 0 ||
+            originRecord.sourceFingerprint.length === 0 ||
+            originRecord.runtimeFingerprint.length === 0) {
+            throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
+        }
+        if (JSON.stringify(keys) !== JSON.stringify(expectedKeys)) {
             throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
         }
         return {
-            baseUrl: normalizeBaseUrl(parsed.baseUrl),
-            owner: parsed.owner,
-            project: parsed.project,
-            projectId: parsed.projectId,
-            writable: parsed.writable,
-            ...(parsed.sourceRevisionId ? { sourceRevisionId: parsed.sourceRevisionId } : {}),
-            ...(parsed.sourceFingerprint ? { sourceFingerprint: parsed.sourceFingerprint } : {}),
-            ...(parsed.upstream ? { upstream: parsed.upstream } : {}),
-            linkedAt: parsed.linkedAt ?? new Date(0).toISOString(),
+            baseUrl: normalizeBaseUrl(originRecord.baseUrl),
+            remote: normalizeOriginRemote(originRecord.remote),
+            projectId: originRecord.projectId,
+            sourceRevisionId: originRecord.sourceRevisionId,
+            sourceFingerprint: originRecord.sourceFingerprint,
+            runtimeFingerprint: originRecord.runtimeFingerprint,
+            linkedAt: originRecord.linkedAt,
         };
     }
     catch (error) {
@@ -4627,8 +4160,12 @@ async function requireWorkbenchOrigin(dir) {
 }
 async function writeWorkbenchOrigin(dir, input) {
     const origin = {
-        ...input,
         baseUrl: normalizeBaseUrl(input.baseUrl),
+        remote: normalizeOriginRemote(input.remote),
+        projectId: input.projectId,
+        sourceRevisionId: input.sourceRevisionId,
+        sourceFingerprint: input.sourceFingerprint,
+        runtimeFingerprint: input.runtimeFingerprint,
         linkedAt: input.linkedAt ?? new Date().toISOString(),
     };
     const filePath = workbenchOriginPath(dir);
@@ -4636,6 +4173,49 @@ async function writeWorkbenchOrigin(dir, input) {
     await fs.writeFile(filePath, `${JSON.stringify(origin, null, 2)}\n`);
     return origin;
 }
+async function writeWorkbenchOriginFromState(dir, args) {
+    const owner = args.state.project.ownerUsername;
+    const name = args.state.project.name;
+    const sourceRevisionId = args.state.source.revisionId ??
+        args.state.base.sourceRevisionId;
+    const sourceFingerprint = args.state.source.fingerprint ??
+        args.state.base.sourceFingerprint;
+    const runtimeFingerprint = args.state.base.runtimeFingerprint ??
+        workbenchRuntimeBundleFingerprint(args.state.runtime);
+    if (!sourceRevisionId || !sourceFingerprint || !runtimeFingerprint) {
+        throw new UsageError("Hosted project state is missing required origin metadata.");
+    }
+    return await writeWorkbenchOrigin(dir, {
+        baseUrl: args.baseUrl,
+        remote: `${owner}/${name}`,
+        projectId: args.state.project.id,
+        sourceRevisionId,
+        sourceFingerprint,
+        runtimeFingerprint,
+    });
+}
+function parseOriginRemote(origin) {
+    return parseRemoteName(origin.remote);
+}
+function parseRemoteName(remote) {
+    try {
+        return parseBenchmarkRef(remote);
+    }
+    catch {
+        throw new UsageError(`Workbench origin remote must use OWNER/BENCHMARK: ${remote}`);
+    }
+}
+function normalizeOriginRemote(remote) {
+    const parsed = parseRemoteName(remote.trim());
+    return `${parsed.owner}/${parsed.project}`;
+}
+function originRemoteUrlParts(origin) {
+    const remote = parseOriginRemote(origin);
+    return {
+        owner: remote.owner,
+        projectName: remote.project,
+    };
+}
 function workbenchOriginPath(dir) {
     return path.join(dir, ".workbench", "origin.json");
 }
@@ -4674,30 +4254,6 @@ async function readWorkbenchProfileStatus(config) {
         return { authenticated: true, profile: null };
     }
 }
-function readOptionalCandidateId(parsed) {
-    return asOptionalString(parsed.flags.candidate) ?? parsed.positionals[0];
-}
-function readRequiredCandidateId(parsed) {
-    const candidateId = readOptionalCandidateId(parsed);
-    if (!candidateId) {
-        throw new UsageError("Missing required CANDIDATE_ID.");
-    }
-    return candidateId;
-}
-function readRequiredRunId(parsed) {
-    const runId = parsed.positionals[0];
-    if (!runId) {
-        throw new UsageError("Missing required RUN_ID.");
-    }
-    return runId;
-}
-function requireOutDir(parsed) {
-    const output = asOptionalString(parsed.flags.out);
-    if (!output) {
-        throw new UsageError("Missing required --out.");
-    }
-    return output;
-}
 async function apiRequest(apiPath, options = {}, baseUrlOverride) {
     const config = await loadConfig();
     const baseUrl = normalizeBaseUrl(baseUrlOverride ??
@@ -5219,10 +4775,12 @@ function resolveSourceDir(parsed) {
     if (parsed.positionals.length > 1) {
         throw new UsageError("Expected at most one source file or directory argument.");
     }
-    if (parsed.positionals.length > 0 && parsed.flags.dir !== undefined) {
-        throw new UsageError("Use either --dir or SOURCE, not both.");
+    const dir = asOptionalString(parsed.flags.dir);
+    const source = parsed.positionals[0];
+    if (dir && source) {
+        return path.resolve(dir, source);
     }
-    return path.resolve(asOptionalString(parsed.flags.dir) ?? parsed.positionals[0] ?? process.cwd());
+    return path.resolve(dir ?? source ?? process.cwd());
 }
 function isWorkbenchSourceYamlPath(filePath) {
     return path.basename(filePath) === WORKBENCH_BENCHMARK_FILE;
@@ -5368,6 +4926,14 @@ async function syncSourceFiles(outputDir, files) {
     }
     await writeFiles(outputDir, files);
 }
+async function assertLocalSourceMatchesOrigin(dir, origin) {
+    const source = await readLocalProjectSource(dir);
+    const fingerprint = localProjectStateSource(source).fingerprint;
+    if (fingerprint === origin.sourceFingerprint) {
+        return;
+    }
+    throw new UsageError("Local source changed since the last pull or push. Run `workbench push` before pulling, or restore the local source changes and try again.");
+}
 async function readManagedSourceFilePaths(outputDir) {
     try {
         const source = await readLocalProjectSource(outputDir);