@workbench-ai/workbench 0.0.50 → 0.0.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5,17 +5,17 @@ import { createRequire } from "node:module";
5
5
  import os from "node:os";
6
6
  import path from "node:path";
7
7
  import { Writable } from "node:stream";
8
- import { createCandidateFilePreview, createBaselineCandidateJob as createRuntimeBaselineCandidateJob, evaluationScorecardId, evaluationMeanMetrics, executeWorkbenchExecutionJob, engineResolveBindingForSpec, filterCandidateSourceFiles, workbenchExecutionPurpose, workbenchRunExecutionFingerprint, createWorkbenchAdapterAuthBundle, createOptimizerTraceInputFiles, DOCKER_SANDBOX_BACKEND, localWorkbenchAdapterAuthStore, materializeWorkbenchRunResult, normalizeSurfaceFiles, planWorkbenchExecutionJobsForPurpose, runWorkbenchExecutionDag, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, summarizeCandidateFiles, validateWorkbenchRunEnvelope, parseWorkbenchAdapterAuthTarget, } from "@workbench-ai/workbench-core";
9
- import { assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterAuthRequirements, WORKBENCH_ADAPTER_RESULT_FILE, WORKBENCH_ADAPTER_RESULT_PROTOCOL, normalizeWorkbenchAdapterOperationRequest, readWorkbenchAdapterOperationResult, workbenchAdapterOperationCommand, workbenchAdapterOperationResultPath, withDefaultWorkbenchAdapterAuthProfiles as applyDefaultWorkbenchAdapterAuthProfiles, } from "@workbench-ai/workbench-protocol";
8
+ import { createCandidateFilePreview, createBaselineCandidateJob as createRuntimeBaselineCandidateJob, evaluationScorecardId, evaluationMeanMetrics, executeWorkbenchExecutionJob, engineResolveBindingForSpec, filterOptimizerTraceJobsForCaseIds, filterCandidateSourceFiles, formatWorkbenchCaseSelector, formatWorkbenchSelectionPolicy, workbenchCaseSelectorUsesAllCases, workbenchExecutionPurpose, workbenchRunExecutionFingerprint, createWorkbenchAdapterAuthBundle, createOptimizerTraceInputFiles, DOCKER_SANDBOX_BACKEND, localWorkbenchAdapterAuthStore, materializeWorkbenchRunResult, normalizeSurfaceFiles, planWorkbenchExecutionJobsForPurpose, runWorkbenchExecutionDag, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, summarizeCandidateFiles, validateWorkbenchRunEnvelope, parseWorkbenchAdapterAuthTarget, workbenchEngineCaseIdsForImproveEvaluation, workbenchEngineCaseIdsForSelector, workbenchImproveOptimizeSelector, workbenchImproveSelectionPolicy, workbenchProjectSourceFingerprint, workbenchRuntimeBundleFingerprint, } from "@workbench-ai/workbench-core";
9
+ import { assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterAuthRequirements, normalizeWorkbenchAdapterOperationRequest, readWorkbenchAdapterOperationResult, workbenchAdapterOperationCommand, workbenchAdapterOperationResultPath, withDefaultWorkbenchAdapterAuthProfiles as applyDefaultWorkbenchAdapterAuthProfiles, } from "@workbench-ai/workbench-protocol";
10
10
  import { builtinLocalTraceAdapter, builtinLocalTraceAdapters, sortLocalTraceRefs, } from "@workbench-ai/workbench-built-in-adapters/local-traces";
11
11
  import { commandUsage, HOSTED_WATCH_LIFECYCLE_NOTE, LOCAL_DEV_OPEN_LIFECYCLE_NOTE, rootUsage, } from "./command-model.js";
12
12
  import { startLocalWorkbenchDevServer } from "./dev-open-server.js";
13
13
  import { createWorkbenchInitScaffold, } from "./init-scaffold.js";
14
14
  import { defaultAdapterManifests, composeRuntimeDockerfileWithAdapters, resolveDefaultWorkbenchAdapter, resolveProjectAdapterSource, resolveWorkbenchAdaptersForProject, WORKBENCH_ADAPTER_MANIFEST_FILE, } from "./adapter-project.js";
15
15
  import { createAdapterCommandEnv } from "./adapter-command-env.js";
16
- import { loadLocalArchive, loadLocalArchiveIndex, materializeCandidateRoot, readLocalCandidate, readLocalCandidateFiles, readLocalJobs, saveLocalArchive, saveLocalJobs, setLocalActive, upsertLocalRun, upsertLocalCandidate, upsertLocalEvaluation, } from "./local-archive.js";
16
+ import { loadLocalArchive, loadLocalArchiveIndex, exportLocalRuntimeBundle, importLocalRuntimeBundle, runtimeBundleStats, materializeCandidateRoot, readLocalCandidate, readLocalCandidateFiles, readLocalJobs, saveLocalArchive, saveLocalJobs, setLocalActive, upsertLocalRun, upsertLocalCandidate, upsertLocalEvaluation, } from "./local-archive.js";
17
17
  import { WorkspaceSnapshotError, } from "./workspace-snapshot.js";
18
- import { readLocalProjectSource, WORKBENCH_BENCHMARK_FILE, } from "./project-source.js";
18
+ import { hostedEngineResolveFiles, readLocalProjectSource, WORKBENCH_BENCHMARK_FILE, } from "./project-source.js";
19
19
  import { localBenchmarkFingerprint, localCandidateFingerprint, } from "./benchmark-fingerprint.js";
20
20
  const require = createRequire(import.meta.url);
21
21
  function getCliVersion() {
@@ -74,32 +74,38 @@ export async function runCli(argv, io = {
74
74
  if (argv[0] === "clone") {
75
75
  return await cloneProject(argv.slice(1), io);
76
76
  }
77
- if (argv[0] === "fetch") {
78
- return await fetchProject(argv.slice(1), io);
79
- }
80
77
  if (argv[0] === "pull") {
81
78
  return await pullProject(argv.slice(1), io);
82
79
  }
83
80
  if (argv[0] === "push") {
84
81
  return await pushBenchmark(argv.slice(1), io);
85
82
  }
86
- if (argv[0] === "remote") {
87
- return await runRemoteCommand(argv.slice(1), io);
88
- }
89
83
  if (argv[0] === "eval") {
90
- return await localEvaluateCandidate(argv.slice(1), io, runtimeOptions);
84
+ const hosted = extractHostedFlag(argv.slice(1));
85
+ return hosted.enabled
86
+ ? await startHostedWorkflow("eval", hosted.argv, io)
87
+ : await localEvaluateCandidate(hosted.argv, io, runtimeOptions);
91
88
  }
92
89
  if (argv[0] === "retry") {
93
- return await localRetry(argv.slice(1), io, runtimeOptions);
90
+ const hosted = extractHostedFlag(argv.slice(1));
91
+ return hosted.enabled
92
+ ? await retryHostedWorkflow(hosted.argv, io)
93
+ : await localRetry(hosted.argv, io, runtimeOptions);
94
94
  }
95
95
  if (argv[0] === "improve") {
96
- return await localRun(argv.slice(1), io, runtimeOptions);
96
+ const hosted = extractHostedFlag(argv.slice(1));
97
+ return hosted.enabled
98
+ ? await startHostedWorkflow("improve", hosted.argv, io)
99
+ : await localRun(hosted.argv, io, runtimeOptions);
97
100
  }
98
101
  if (argv[0] === "restore") {
99
102
  return await localRestore(argv.slice(1), io);
100
103
  }
101
104
  if (argv[0] === "open") {
102
- return await localDevOpen(argv.slice(1), io);
105
+ const hosted = extractHostedFlag(argv.slice(1));
106
+ return hosted.enabled
107
+ ? await openWorkbench(hosted.argv, io)
108
+ : await localDevOpen(hosted.argv, io);
103
109
  }
104
110
  if (argv[0] === "auth") {
105
111
  return await runAuthCommand(argv.slice(1), io);
@@ -110,9 +116,6 @@ export async function runCli(argv, io = {
110
116
  if (argv[0] === "traces") {
111
117
  return await runTracesCommand(argv.slice(1), io);
112
118
  }
113
- if (argv[0] === "cloud") {
114
- return await runCloudCommand(argv.slice(1), io);
115
- }
116
119
  const commandPath = argv.slice(0, 2).join(" ");
117
120
  const rest = argv.slice(2);
118
121
  switch (commandPath) {
@@ -148,9 +151,6 @@ export async function runCli(argv, io = {
148
151
  }
149
152
  function commandPathForHelp(argv) {
150
153
  const positionals = argv.filter((arg) => arg !== "--help" && arg !== "-h" && !arg.startsWith("--"));
151
- if (positionals[0] === "cloud") {
152
- return positionals.slice(0, 3).join(" ");
153
- }
154
154
  if (positionals[0] === "adapters" &&
155
155
  ["create", "list", "inspect", "test"].includes(positionals[1] ?? "")) {
156
156
  return positionals.slice(0, 2).join(" ");
@@ -159,7 +159,7 @@ function commandPathForHelp(argv) {
159
159
  ["collect", "list", "show"].includes(positionals[1] ?? "")) {
160
160
  return positionals.slice(0, 2).join(" ");
161
161
  }
162
- if (positionals[0] === "auth" || positionals[0] === "remote") {
162
+ if (positionals[0] === "auth") {
163
163
  return positionals.slice(0, 2).join(" ");
164
164
  }
165
165
  if (positionals[0] === "runs" &&
@@ -172,65 +172,18 @@ function commandPathForHelp(argv) {
172
172
  }
173
173
  return positionals[0] ?? "";
174
174
  }
175
- async function runCloudCommand(argv, io) {
176
- const command = argv[0];
177
- const rest = argv.slice(1);
178
- switch (command) {
179
- case "eval":
180
- return await startHostedWorkflow("eval", rest, io);
181
- case "retry":
182
- return await retryHostedWorkflow(rest, io);
183
- case "improve":
184
- return await startHostedWorkflow("improve", rest, io);
185
- case "open":
186
- return await openWorkbench(rest, io);
187
- case "watch":
188
- return await runWatch(rest, io);
189
- case "logs":
190
- return await runLogs(rest, io);
191
- case "star":
192
- return await starProject(rest, io, true);
193
- case "unstar":
194
- return await starProject(rest, io, false);
195
- default:
196
- break;
197
- }
198
- const commandPath = argv.slice(0, 2).join(" ");
199
- const subRest = argv.slice(2);
200
- switch (commandPath) {
201
- case "benchmarks list":
202
- return await benchmarkList(subRest, io);
203
- case "benchmarks show":
204
- return await benchmarkShow(subRest, io);
205
- case "benchmarks versions":
206
- return await benchmarkVersions(subRest, io);
207
- case "benchmarks starred":
208
- return await benchmarkStarred(subRest, io);
209
- case "benchmarks delete":
210
- return await benchmarkDelete(subRest, io);
211
- case "runs list":
212
- return await runList(subRest, io);
213
- case "runs show":
214
- return await runShow(subRest, io);
215
- case "runs cancel":
216
- return await runCancel(subRest, io);
217
- case "candidates list":
218
- return await candidateList(subRest, io);
219
- case "candidates show":
220
- return await candidateShow(subRest, io);
221
- case "candidates files":
222
- return await candidateFiles(subRest, io);
223
- case "candidates preview":
224
- return await candidatePreview(subRest, io);
225
- case "candidates pull":
226
- return await candidateExport(subRest, io);
227
- case "candidates publish":
228
- return await candidateVisibility(subRest, io, "public");
229
- case "candidates unpublish":
230
- return await candidateVisibility(subRest, io, "private");
231
- default:
232
- throw new UsageError(`Unknown command: cloud ${argv.join(" ")}`);
175
+ function extractHostedFlag(argv) {
176
+ let enabled = false;
177
+ const next = [];
178
+ for (const arg of argv) {
179
+ if (arg === "--hosted") {
180
+ enabled = true;
181
+ }
182
+ else {
183
+ next.push(arg);
184
+ }
233
185
  }
186
+ return { enabled, argv: next };
234
187
  }
235
188
  async function localDevOpen(argv, io) {
236
189
  const parsed = parseArgs(argv);
@@ -708,12 +661,28 @@ async function localRun(argv, io, runtimeOptions) {
708
661
  if (caseIds.length === 0) {
709
662
  throw new UsageError("Engine resolver must emit at least one case.");
710
663
  }
664
+ const optimizeSelector = workbenchImproveOptimizeSelector(spec);
665
+ const selectionPolicy = workbenchImproveSelectionPolicy(spec);
666
+ const optimizeCaseIds = workbenchEngineCaseIdsForSelector(engineCases, optimizeSelector);
667
+ if (optimizeCaseIds.length === 0) {
668
+ throw new UsageError(`Improve optimizeOn selector matched no cases: ${formatWorkbenchCaseSelector(optimizeSelector)}.`);
669
+ }
670
+ const selectionCaseIds = workbenchEngineCaseIdsForSelector(engineCases, selectionPolicy.selector);
671
+ if (selectionCaseIds.length === 0) {
672
+ throw new UsageError(`Improve selectBy selector matched no cases: ${formatWorkbenchCaseSelector(selectionPolicy.selector)}.`);
673
+ }
674
+ const selectionScoreCaseIds = workbenchCaseSelectorUsesAllCases(selectionPolicy.selector)
675
+ ? undefined
676
+ : selectionCaseIds;
677
+ const evaluationCaseIds = workbenchEngineCaseIdsForImproveEvaluation({ spec, engineCases });
711
678
  requireValidRunEnvelope({
712
679
  workflow: "improve",
713
680
  budget,
714
681
  samples,
715
- caseCount: caseIds.length,
682
+ caseCount: evaluationCaseIds.length,
716
683
  });
684
+ const optimizeOnLabel = formatWorkbenchCaseSelector(optimizeSelector);
685
+ const selectByLabel = formatWorkbenchSelectionPolicy(selectionPolicy);
717
686
  const environmentRefs = await ensureLocalDockerfileEnvironments(workspace, spec, engineCases);
718
687
  const benchmarkFingerprint = await readLocalBenchmarkFingerprint(workspace);
719
688
  const executionFingerprint = localRunExecutionFingerprint(projectSource);
@@ -771,7 +740,7 @@ async function localRun(argv, io, runtimeOptions) {
771
740
  const events = [
772
741
  createLocalEvent("run_started", startedAt, {
773
742
  runId,
774
- detail: { budget, samples, strategy: "greedy" },
743
+ detail: { budget, samples, strategy: "greedy", optimizeOn: optimizeOnLabel, selectBy: selectByLabel },
775
744
  }),
776
745
  ];
777
746
  const runningRun = {
@@ -786,6 +755,8 @@ async function localRun(argv, io, runtimeOptions) {
786
755
  improver: formatSpecImprover(spec),
787
756
  engineRun: spec.engineRun.use,
788
757
  strategy: "greedy",
758
+ optimizeOn: optimizeOnLabel,
759
+ selectBy: selectByLabel,
789
760
  budget,
790
761
  repairBudget: 0,
791
762
  attemptsRequested: budget,
@@ -815,7 +786,7 @@ async function localRun(argv, io, runtimeOptions) {
815
786
  throw new UsageError("Candidate snapshot must include at least one file.");
816
787
  }
817
788
  const candidateRevisionTraceFiles = createOptimizerTraceInputFiles({
818
- jobs: [...baselineTraceJobs, ...runTraceJobs],
789
+ jobs: filterOptimizerTraceJobsForCaseIds([...baselineTraceJobs, ...runTraceJobs], optimizeCaseIds),
819
790
  });
820
791
  const candidateId = `candidate_${runId.replace(/^run_/u, "")}_${String(attemptIndex + 1).padStart(3, "0")}`;
821
792
  const plannedCandidateRevision = planWorkbenchExecutionJobsForPurpose({
@@ -825,7 +796,7 @@ async function localRun(argv, io, runtimeOptions) {
825
796
  candidateId,
826
797
  attemptIndex,
827
798
  samples,
828
- caseIds,
799
+ caseIds: optimizeCaseIds,
829
800
  engineCases,
830
801
  spec,
831
802
  workflow: "improve",
@@ -861,7 +832,7 @@ async function localRun(argv, io, runtimeOptions) {
861
832
  attemptIndex,
862
833
  samples,
863
834
  now: new Date().toISOString(),
864
- caseIds,
835
+ caseIds: evaluationCaseIds,
865
836
  engineCases,
866
837
  spec,
867
838
  environmentRefsByCase: environmentRefs.byCase,
@@ -891,16 +862,22 @@ async function localRun(argv, io, runtimeOptions) {
891
862
  jobs: completedJobs,
892
863
  previousCandidate: activeCandidate,
893
864
  existingCandidateCount: snapshot.candidates.length,
865
+ selection: {
866
+ metric: selectionPolicy.metric,
867
+ ...(selectionScoreCaseIds ? { caseIds: selectionScoreCaseIds } : {}),
868
+ label: selectByLabel,
869
+ },
894
870
  });
895
871
  for (const candidate of materialized.candidates) {
896
- outputCandidateId = candidate.id;
897
- snapshot = upsertLocalCandidate(snapshot, candidate, materialized.candidateFiles[candidate.id] ?? []);
898
- events.push(createLocalEvent("candidate_created", candidate.createdAt, {
872
+ const localCandidate = localCandidateRecord(candidate);
873
+ outputCandidateId = localCandidate.id;
874
+ snapshot = upsertLocalCandidate(snapshot, localCandidate, materialized.candidateFiles[localCandidate.id] ?? []);
875
+ events.push(createLocalEvent("candidate_created", localCandidate.createdAt, {
899
876
  runId,
900
- candidateId: candidate.id,
901
- baseId: candidate.baseId,
902
- status: candidate.status,
903
- metrics: evaluationMeanMetrics(candidate.eval),
877
+ candidateId: localCandidate.id,
878
+ baseId: localCandidate.baseId,
879
+ status: localCandidate.status,
880
+ metrics: evaluationMeanMetrics(localCandidate.eval),
904
881
  }));
905
882
  }
906
883
  for (const evaluation of materialized.evaluations) {
@@ -944,6 +921,8 @@ async function localRun(argv, io, runtimeOptions) {
944
921
  improver: formatSpecImprover(spec),
945
922
  engineRun: spec.engineRun.use,
946
923
  strategy: "greedy",
924
+ optimizeOn: optimizeOnLabel,
925
+ selectBy: selectByLabel,
947
926
  budget,
948
927
  repairBudget: 0,
949
928
  attemptsRequested: budget,
@@ -1302,7 +1281,7 @@ async function localEvaluateCandidate(argv, io, runtimeOptions) {
1302
1281
  previousCandidate: existingCandidate ?? null,
1303
1282
  existingCandidateCount: snapshot.candidates.length,
1304
1283
  });
1305
- for (const candidateRecord of materialized.candidates) {
1284
+ for (const candidateRecord of materialized.candidates.map(localCandidateRecord)) {
1306
1285
  snapshot = upsertLocalCandidate(snapshot, candidateRecord, materialized.candidateFiles[candidateRecord.id] ?? []);
1307
1286
  }
1308
1287
  if (materialized.activeCandidateId) {
@@ -2932,13 +2911,21 @@ function adapterAuthRecord(value) {
2932
2911
  }
2933
2912
  async function pushBenchmark(argv, io) {
2934
2913
  const parsed = parseArgs(argv);
2935
- rejectUnknownFlags(parsed, new Set(["dir", "tag", "visibility", "dry-run", "json"]));
2914
+ rejectUnknownFlags(parsed, new Set(["dir", "visibility", "dry-run", "json"]));
2936
2915
  const dir = resolveSourceDir(parsed);
2937
2916
  const source = await readLocalProjectSource(dir);
2938
2917
  const origin = await readWorkbenchOrigin(dir);
2939
2918
  const baseUrl = await effectiveBaseUrl(origin?.baseUrl);
2940
- const visibility = readBenchmarkVisibility(parsed.flags.visibility);
2919
+ const visibility = readOptionalBenchmarkVisibility(parsed.flags.visibility);
2920
+ const createVisibility = visibility ?? "public";
2941
2921
  const dryRun = parsed.flags["dry-run"] === true;
2922
+ const runtime = await exportLocalRuntimeBundle(dir);
2923
+ const state = localProjectState({
2924
+ source,
2925
+ runtime,
2926
+ origin,
2927
+ visibility: createVisibility,
2928
+ });
2942
2929
  if (!origin) {
2943
2930
  if (dryRun) {
2944
2931
  writeOutput({
@@ -2948,35 +2935,36 @@ async function pushBenchmark(argv, io) {
2948
2935
  dir,
2949
2936
  baseUrl,
2950
2937
  benchmarkName: source.spec.name,
2951
- tag: asOptionalString(parsed.flags.tag) ?? null,
2952
- visibility,
2938
+ visibility: createVisibility,
2953
2939
  sourceFileCount: sourceFileCount(source),
2940
+ runtime: runtimeBundleStats(runtime),
2941
+ sourceFingerprint: state.source.fingerprint,
2942
+ runtimeFingerprint: state.base.runtimeFingerprint,
2954
2943
  }, parsed, io, () => `Would push benchmark ${source.spec.name}.`);
2955
2944
  return 0;
2956
2945
  }
2957
- const { project, publishedProject, origin: nextOrigin } = await createHostedBenchmarkFromSource({
2946
+ const { project, origin: nextOrigin, result } = await createHostedBenchmarkFromState({
2958
2947
  baseUrl,
2959
2948
  dir,
2960
- source,
2961
- visibility,
2949
+ state,
2962
2950
  });
2963
2951
  writeOutput({
2964
2952
  ok: true,
2965
2953
  action: "create",
2966
- benchmark: publishedProject,
2967
- tag: asOptionalString(parsed.flags.tag) ?? null,
2968
- visibility,
2954
+ benchmark: project,
2955
+ visibility: project.visibility ?? createVisibility,
2969
2956
  origin: nextOrigin,
2957
+ source: result.source,
2958
+ runtime: result.runtime.stats,
2970
2959
  urls: buildWorkbenchResourceUrls({
2971
2960
  baseUrl,
2972
- projectId: publishedProject.id ?? project.id,
2973
- owner: nextOrigin.owner,
2974
- projectName: nextOrigin.project,
2961
+ projectId: project.id,
2962
+ ...originRemoteUrlParts(nextOrigin),
2975
2963
  }),
2976
2964
  }, parsed, io, (record) => {
2977
2965
  const value = record;
2978
2966
  return [
2979
- `Pushed ${value.origin.owner}/${value.origin.project} (${value.origin.projectId}).`,
2967
+ `Pushed ${value.origin.remote} (${value.origin.projectId}).`,
2980
2968
  `Open benchmark: ${value.urls.benchmark}`,
2981
2969
  ].join("\n");
2982
2970
  });
@@ -2986,57 +2974,6 @@ async function pushBenchmark(argv, io) {
2986
2974
  if (!projectId) {
2987
2975
  throw new UsageError("Missing hosted benchmark. Run workbench push from a source directory.");
2988
2976
  }
2989
- if (!origin.writable) {
2990
- const signedInUsername = dryRun ? null : await readAuthenticatedWorkbenchUsername(baseUrl);
2991
- if (signedInUsername !== origin.owner) {
2992
- const upstream = upstreamFromOrigin(origin);
2993
- if (dryRun) {
2994
- writeOutput({
2995
- ok: true,
2996
- dryRun: true,
2997
- action: "create",
2998
- dir,
2999
- baseUrl,
3000
- benchmarkName: source.spec.name,
3001
- tag: asOptionalString(parsed.flags.tag) ?? null,
3002
- visibility,
3003
- sourceFileCount: sourceFileCount(source),
3004
- upstream: upstream ?? null,
3005
- }, parsed, io, () => `Would create a writable benchmark from read-only origin ${origin.owner}/${origin.project}.`);
3006
- return 0;
3007
- }
3008
- const { project, publishedProject, origin: nextOrigin } = await createHostedBenchmarkFromSource({
3009
- baseUrl,
3010
- dir,
3011
- source,
3012
- visibility,
3013
- upstream,
3014
- });
3015
- writeOutput({
3016
- ok: true,
3017
- action: "create",
3018
- benchmark: publishedProject,
3019
- tag: asOptionalString(parsed.flags.tag) ?? null,
3020
- visibility,
3021
- origin: nextOrigin,
3022
- upstream: upstream ?? null,
3023
- urls: buildWorkbenchResourceUrls({
3024
- baseUrl,
3025
- projectId: publishedProject.id ?? project.id,
3026
- owner: nextOrigin.owner,
3027
- projectName: nextOrigin.project,
3028
- }),
3029
- }, parsed, io, (record) => {
3030
- const value = record;
3031
- return [
3032
- `Pushed ${value.origin.owner}/${value.origin.project} (${value.origin.projectId}).`,
3033
- ...(value.upstream ? [`Upstream: ${value.upstream.owner}/${value.upstream.project}`] : []),
3034
- `Open benchmark: ${value.urls.benchmark}`,
3035
- ].join("\n");
3036
- });
3037
- return 0;
3038
- }
3039
- }
3040
2977
  if (dryRun) {
3041
2978
  writeOutput({
3042
2979
  ok: true,
@@ -3045,92 +2982,78 @@ async function pushBenchmark(argv, io) {
3045
2982
  dir,
3046
2983
  baseUrl,
3047
2984
  benchmarkId: projectId,
3048
- tag: asOptionalString(parsed.flags.tag) ?? null,
3049
- visibility,
2985
+ remote: origin.remote,
2986
+ benchmarkName: source.spec.name,
2987
+ visibility: visibility ?? "unchanged",
3050
2988
  sourceFileCount: sourceFileCount(source),
3051
- }, parsed, io, () => `Would push ${sourceFileCount(source)} source file(s) to ${projectId}.`);
2989
+ runtime: runtimeBundleStats(runtime),
2990
+ sourceFingerprint: state.source.fingerprint,
2991
+ runtimeFingerprint: state.base.runtimeFingerprint,
2992
+ }, parsed, io, () => `Would push ${sourceFileCount(source)} source file(s) and runtime history to ${origin.remote}.`);
3052
2993
  return 0;
3053
2994
  }
3054
- const response = await apiRequest(projectApiPath(projectId, "/source"), {
2995
+ const response = await apiRequest(projectApiPath(projectId, "/state"), {
3055
2996
  method: "PUT",
3056
- body: hostedProjectSourceRequest(source),
2997
+ body: state,
3057
2998
  }, baseUrl);
3058
- const publishedProject = visibility === "public"
3059
- ? (await apiRequest(projectApiPath(response.benchmark.id, "/publish"), { method: "PUT" }, baseUrl)).benchmark
3060
- : response.benchmark;
3061
- const nextOrigin = await writeWorkbenchOrigin(dir, {
2999
+ const responseProject = hostedProjectSummaryFromState(response.state);
3000
+ const publishedProject = await applyRequestedProjectVisibility({
3062
3001
  baseUrl,
3063
- owner: publishedProject.ownerUsername ?? response.benchmark.ownerUsername ?? origin.owner,
3064
- project: publishedProject.name ?? response.benchmark.name ?? origin.project ?? source.spec.name,
3065
- projectId: publishedProject.id ?? response.benchmark.id,
3066
- writable: true,
3067
- sourceRevisionId: publishedProject.currentSpecVersionId ?? response.benchmark.currentSpecVersionId,
3068
- sourceFingerprint: response.sourceFingerprint ?? publishedProject.sourceFingerprint ?? response.benchmark.sourceFingerprint,
3069
- upstream: origin.upstream,
3002
+ projectId: responseProject.id,
3003
+ responseProject,
3004
+ visibility,
3005
+ });
3006
+ const nextOrigin = await writeWorkbenchOriginFromState(dir, {
3007
+ baseUrl,
3008
+ state: response.state,
3070
3009
  });
3071
3010
  writeOutput({
3072
3011
  ok: true,
3073
3012
  action: "update",
3074
3013
  changed: response.changed === true,
3075
3014
  benchmark: publishedProject,
3076
- tag: asOptionalString(parsed.flags.tag) ?? null,
3077
- visibility,
3015
+ visibility: visibility ?? "unchanged",
3078
3016
  origin: nextOrigin,
3017
+ source: response.source,
3018
+ runtime: response.runtime.stats,
3079
3019
  urls: buildWorkbenchResourceUrls({
3080
3020
  baseUrl,
3081
- projectId: publishedProject.id ?? response.benchmark.id,
3082
- owner: nextOrigin.owner,
3083
- projectName: nextOrigin.project,
3021
+ projectId: publishedProject.id ?? responseProject.id,
3022
+ ...originRemoteUrlParts(nextOrigin),
3084
3023
  }),
3085
3024
  }, parsed, io, (record) => {
3086
3025
  const value = record;
3087
3026
  return [
3088
- `${value.changed ? "Pushed" : "Already up to date"} ${value.origin.owner}/${value.origin.project} (${value.origin.projectId}).`,
3027
+ `${value.changed ? "Pushed" : "Already up to date"} ${value.origin.remote} (${value.origin.projectId}).`,
3089
3028
  `Open benchmark: ${value.urls.benchmark}`,
3090
3029
  ].join("\n");
3091
3030
  });
3092
3031
  return 0;
3093
3032
  }
3094
- async function createHostedBenchmarkFromSource(args) {
3095
- const response = await apiRequest("/api/workbench/benchmarks", {
3033
+ async function createHostedBenchmarkFromState(args) {
3034
+ const result = await apiRequest("/api/workbench/benchmarks/state", {
3096
3035
  method: "POST",
3097
- body: hostedProjectSourceRequest(args.source),
3036
+ body: args.state,
3098
3037
  }, args.baseUrl);
3099
- const project = response.benchmark;
3100
- const publishedProject = args.visibility === "public"
3101
- ? (await apiRequest(projectApiPath(project.id, "/publish"), { method: "PUT" }, args.baseUrl)).benchmark
3102
- : project;
3103
- const origin = await writeWorkbenchOrigin(args.dir, {
3038
+ const project = hostedProjectSummaryFromState(result.state);
3039
+ const origin = await writeWorkbenchOriginFromState(args.dir, {
3104
3040
  baseUrl: args.baseUrl,
3105
- owner: publishedProject.ownerUsername ?? project.ownerUsername ?? "",
3106
- project: publishedProject.name ?? project.name ?? args.source.spec.name,
3107
- projectId: publishedProject.id ?? project.id,
3108
- writable: true,
3109
- sourceRevisionId: publishedProject.currentSpecVersionId ?? project.currentSpecVersionId,
3110
- sourceFingerprint: publishedProject.sourceFingerprint ?? project.sourceFingerprint,
3111
- ...(args.upstream ? { upstream: args.upstream } : {}),
3041
+ state: result.state,
3112
3042
  });
3113
- return { project, publishedProject, origin };
3114
- }
3115
- async function readAuthenticatedWorkbenchUsername(baseUrl) {
3116
- const config = await loadConfig();
3117
- const status = await readWorkbenchProfileStatus({ ...config, baseUrl });
3118
- return status.authenticated ? status.profile?.username ?? null : null;
3043
+ return { project, origin, result };
3119
3044
  }
3120
- function upstreamFromOrigin(origin) {
3121
- if (!origin.owner || !origin.project || !origin.projectId || !origin.sourceRevisionId) {
3122
- return undefined;
3045
+ async function applyRequestedProjectVisibility(args) {
3046
+ if (args.visibility === "public") {
3047
+ return (await apiRequest(projectApiPath(args.projectId, "/publish"), { method: "PUT" }, args.baseUrl)).benchmark;
3123
3048
  }
3124
- return {
3125
- owner: origin.owner,
3126
- project: origin.project,
3127
- projectId: origin.projectId,
3128
- sourceRevisionId: origin.sourceRevisionId,
3129
- };
3049
+ if (args.visibility === "private") {
3050
+ return (await apiRequest(projectApiPath(args.projectId, "/publish"), { method: "DELETE" }, args.baseUrl)).benchmark;
3051
+ }
3052
+ return args.responseProject;
3130
3053
  }
3131
- function readBenchmarkVisibility(value) {
3054
+ function readOptionalBenchmarkVisibility(value) {
3132
3055
  if (value === undefined) {
3133
- return "public";
3056
+ return undefined;
3134
3057
  }
3135
3058
  if (value === "private" || value === "public") {
3136
3059
  return value;
@@ -3143,41 +3066,37 @@ async function cloneProject(argv, io) {
3143
3066
  const ref = readRequiredBenchmarkRef(parsed);
3144
3067
  const outputDir = parsed.positionals[1] ?? ref.project;
3145
3068
  if (parsed.positionals.length > 2) {
3146
- throw new UsageError("workbench clone accepts OWNER/BENCHMARK[@REF] and an optional output directory.");
3069
+ throw new UsageError("workbench clone accepts OWNER/BENCHMARK and an optional output directory.");
3147
3070
  }
3148
3071
  const baseUrl = await effectiveBaseUrl();
3149
- const projectResponse = await apiRequest(publicProjectApiPath(ref), {}, baseUrl);
3150
- const filesResponse = await apiRequest(publicProjectSourceApiPath(ref), {}, baseUrl);
3072
+ const state = await apiRequest(publicProjectStateApiPath(ref), {}, baseUrl);
3151
3073
  if (parsed.flags["dry-run"] === true) {
3152
3074
  writeOutput({
3153
3075
  ok: true,
3154
3076
  dryRun: true,
3155
3077
  ref,
3156
3078
  outputDir,
3157
- fileCount: filesResponse.files.length,
3079
+ fileCount: state.source.files.length,
3080
+ runtime: runtimeBundleStats(state.runtime),
3081
+ sourceFingerprint: state.source.fingerprint ?? state.base.sourceFingerprint ?? null,
3082
+ runtimeFingerprint: state.base.runtimeFingerprint ?? null,
3158
3083
  }, parsed, io, () => `Would clone ${formatBenchmarkRef(ref)} to ${outputDir}.`);
3159
3084
  return 0;
3160
3085
  }
3161
- await syncSourceFiles(outputDir, filesResponse.files);
3162
- const project = projectResponse.benchmark;
3163
- const sourceProject = filesResponse.benchmark;
3164
- const origin = await writeWorkbenchOrigin(outputDir, {
3086
+ const applied = await applyProjectStateToLocal({
3087
+ dir: outputDir,
3165
3088
  baseUrl,
3166
- owner: sourceProject?.ownerUsername ?? project.ownerUsername,
3167
- project: sourceProject?.name ?? project.name,
3168
- projectId: sourceProject?.id ?? project.id,
3169
- writable: false,
3170
- sourceRevisionId: sourceProject?.currentSpecVersionId ?? project.currentSpecVersionId,
3171
- sourceFingerprint: sourceProject?.sourceFingerprint ?? project.sourceFingerprint,
3089
+ state,
3172
3090
  });
3173
3091
  writeOutput({
3174
3092
  ok: true,
3175
- origin,
3093
+ origin: applied.origin,
3176
3094
  outputDir,
3177
- files: filesResponse.files.length,
3095
+ files: applied.files,
3096
+ runtime: applied.runtime,
3178
3097
  }, parsed, io, (record) => {
3179
3098
  const value = record;
3180
- return `Cloned ${value.origin.owner}/${value.origin.project} to ${value.outputDir} (${value.files} file(s)).`;
3099
+ return `Cloned ${value.origin.remote} to ${value.outputDir} (${value.files} file(s)).`;
3181
3100
  });
3182
3101
  return 0;
3183
3102
  }
@@ -3185,167 +3104,60 @@ async function pullProject(argv, io) {
3185
3104
  const parsed = parseArgs(argv);
3186
3105
  rejectUnknownFlags(parsed, new Set(["dir", "dry-run", "json"]));
3187
3106
  if (parsed.positionals.length > 0) {
3188
- throw new UsageError("workbench pull updates the current origin; use workbench clone OWNER/BENCHMARK[@REF] DIR for a new directory.");
3107
+ throw new UsageError("workbench pull updates the current origin; use workbench clone OWNER/BENCHMARK DIR for a new directory.");
3189
3108
  }
3190
3109
  const dir = resolveDir(parsed);
3191
3110
  const origin = await requireWorkbenchOrigin(dir);
3192
- const filesResponse = origin.writable
3193
- ? await apiRequest(projectApiPath(origin.projectId, "/source"), {}, await effectiveBaseUrl(origin.baseUrl))
3194
- : await apiRequest(publicProjectSourceApiPath({ owner: origin.owner, project: origin.project }), {}, await effectiveBaseUrl(origin.baseUrl));
3111
+ const baseUrl = await effectiveBaseUrl(origin.baseUrl);
3112
+ const remoteRef = parseOriginRemote(origin);
3113
+ const state = await apiRequest(publicProjectStateApiPath(remoteRef), {}, baseUrl);
3195
3114
  if (parsed.flags["dry-run"] === true) {
3196
3115
  writeOutput({
3197
3116
  ok: true,
3198
3117
  dryRun: true,
3199
3118
  dir,
3200
- fileCount: filesResponse.files.length,
3201
- }, parsed, io, () => `Would pull ${filesResponse.files.length} source file(s) into ${dir}.`);
3119
+ fileCount: state.source.files.length,
3120
+ runtime: runtimeBundleStats(state.runtime),
3121
+ sourceFingerprint: state.source.fingerprint ?? state.base.sourceFingerprint ?? null,
3122
+ runtimeFingerprint: state.base.runtimeFingerprint ?? null,
3123
+ }, parsed, io, () => `Would pull ${state.source.files.length} source file(s) and runtime history into ${dir}.`);
3202
3124
  return 0;
3203
3125
  }
3204
- await syncSourceFiles(dir, filesResponse.files);
3205
- const sourceProject = filesResponse.benchmark;
3206
- const nextOrigin = await writeWorkbenchOrigin(dir, {
3207
- ...origin,
3208
- ...(sourceProject?.ownerUsername ? { owner: sourceProject.ownerUsername } : {}),
3209
- ...(sourceProject?.name ? { project: sourceProject.name } : {}),
3210
- ...(sourceProject?.id ? { projectId: sourceProject.id } : {}),
3211
- ...(sourceProject?.currentSpecVersionId ? { sourceRevisionId: sourceProject.currentSpecVersionId } : {}),
3212
- ...(sourceProject?.sourceFingerprint ? { sourceFingerprint: sourceProject.sourceFingerprint } : {}),
3213
- });
3214
- writeOutput({
3215
- ok: true,
3216
- origin: nextOrigin,
3126
+ const applied = await applyProjectStateToLocal({
3217
3127
  dir,
3218
- files: filesResponse.files.length,
3219
- }, parsed, io, (record) => {
3220
- const value = record;
3221
- return `Pulled ${value.files} source file(s) into ${value.dir}.`;
3222
- });
3223
- return 0;
3224
- }
3225
- async function fetchProject(argv, io) {
3226
- const parsed = parseArgs(argv);
3227
- rejectUnknownFlags(parsed, new Set(["dir", "json"]));
3228
- if (parsed.positionals.length > 0) {
3229
- throw new UsageError("workbench fetch updates the current remote cache; use workbench clone OWNER/BENCHMARK[@REF] DIR for a new directory.");
3230
- }
3231
- const dir = resolveDir(parsed);
3232
- const origin = await requireWorkbenchOrigin(dir);
3233
- const filesResponse = await readRemoteSourceFiles(origin);
3234
- const fetchRoot = path.join(dir, ".workbench", "fetch");
3235
- await fs.rm(fetchRoot, { force: true, recursive: true });
3236
- await fs.mkdir(fetchRoot, { recursive: true });
3237
- await writeFiles(path.join(fetchRoot, "source"), filesResponse.files);
3238
- const sourceProject = filesResponse.benchmark;
3239
- const nextOrigin = await writeWorkbenchOrigin(dir, {
3240
- ...origin,
3241
- ...(sourceProject?.ownerUsername ? { owner: sourceProject.ownerUsername } : {}),
3242
- ...(sourceProject?.name ? { project: sourceProject.name } : {}),
3243
- ...(sourceProject?.id ? { projectId: sourceProject.id } : {}),
3244
- ...(sourceProject?.currentSpecVersionId ? { sourceRevisionId: sourceProject.currentSpecVersionId } : {}),
3245
- ...(sourceProject?.sourceFingerprint ? { sourceFingerprint: sourceProject.sourceFingerprint } : {}),
3128
+ baseUrl,
3129
+ state,
3130
+ origin,
3131
+ requireCleanSource: true,
3246
3132
  });
3247
- await fs.writeFile(path.join(fetchRoot, "manifest.json"), `${JSON.stringify({
3248
- fetchedAt: new Date().toISOString(),
3249
- origin: nextOrigin,
3250
- files: filesResponse.files.map((file) => file.path),
3251
- }, null, 2)}\n`);
3252
3133
  writeOutput({
3253
3134
  ok: true,
3254
- origin: nextOrigin,
3135
+ origin: applied.origin,
3255
3136
  dir,
3256
- fetchRoot,
3257
- files: filesResponse.files.length,
3137
+ files: applied.files,
3138
+ runtime: applied.runtime,
3258
3139
  }, parsed, io, (record) => {
3259
3140
  const value = record;
3260
- return `Fetched ${value.files} source file(s) into ${value.fetchRoot}.`;
3261
- });
3262
- return 0;
3263
- }
3264
- async function readRemoteSourceFiles(origin) {
3265
- return origin.writable
3266
- ? await apiRequest(projectApiPath(origin.projectId, "/source"), {}, await effectiveBaseUrl(origin.baseUrl))
3267
- : await apiRequest(publicProjectSourceApiPath({ owner: origin.owner, project: origin.project }), {}, await effectiveBaseUrl(origin.baseUrl));
3268
- }
3269
- async function runRemoteCommand(argv, io) {
3270
- const command = argv[0] ?? "show";
3271
- switch (command) {
3272
- case "show":
3273
- return await remoteShow(argv.slice(1), io);
3274
- case "add":
3275
- return await remoteAdd(argv.slice(1), io, "add");
3276
- case "set-url":
3277
- return await remoteAdd(argv.slice(1), io, "set-url");
3278
- case "remove":
3279
- return await remoteRemove(argv.slice(1), io);
3280
- default:
3281
- throw new UsageError(`Unknown command: remote ${argv.join(" ")}`);
3282
- }
3283
- }
3284
- async function remoteShow(argv, io) {
3285
- const parsed = parseArgs(argv);
3286
- rejectUnknownFlags(parsed, new Set(["dir", "json"]));
3287
- const origin = await requireWorkbenchOrigin(resolveDir(parsed));
3288
- writeOutput({ ok: true, remote: "origin", origin }, parsed, io, (record) => {
3289
- const value = record;
3290
- return [
3291
- `origin\t${value.origin.owner}/${value.origin.project}`,
3292
- `url\t${value.origin.baseUrl}`,
3293
- `writable\t${value.origin.writable ? "yes" : "no"}`,
3294
- ...(value.origin.sourceFingerprint ? [`fingerprint\t${value.origin.sourceFingerprint}`] : []),
3295
- ].join("\n");
3296
- });
3297
- return 0;
3298
- }
3299
- async function remoteAdd(argv, io, command) {
3300
- const parsed = parseArgs(argv);
3301
- rejectUnknownFlags(parsed, new Set(["dir", "json"]));
3302
- const [name, refValue] = parsed.positionals;
3303
- if (name !== "origin" || !refValue || parsed.positionals.length !== 2) {
3304
- throw new UsageError(`workbench remote ${command} accepts: origin OWNER/BENCHMARK[@REF].`);
3305
- }
3306
- const ref = parseBenchmarkRef(refValue);
3307
- const baseUrl = await effectiveBaseUrl();
3308
- const project = await resolveRemoteProject(formatBenchmarkRef(ref), baseUrl);
3309
- const origin = await writeWorkbenchOrigin(resolveDir(parsed), {
3310
- baseUrl,
3311
- owner: project.ownerUsername ?? ref.owner,
3312
- project: project.name ?? ref.project,
3313
- projectId: project.id,
3314
- writable: false,
3315
- ...(project.currentSpecVersionId ? { sourceRevisionId: project.currentSpecVersionId } : {}),
3316
- ...(project.sourceFingerprint ? { sourceFingerprint: project.sourceFingerprint } : {}),
3141
+ return `Pulled ${value.files} source file(s) into ${value.dir}.`;
3317
3142
  });
3318
- writeOutput({ ok: true, remote: "origin", origin }, parsed, io, () => `Set origin to ${origin.owner}/${origin.project}.`);
3319
- return 0;
3320
- }
3321
- async function remoteRemove(argv, io) {
3322
- const parsed = parseArgs(argv);
3323
- rejectUnknownFlags(parsed, new Set(["dir", "json"]));
3324
- const [name] = parsed.positionals;
3325
- if (name !== "origin" || parsed.positionals.length !== 1) {
3326
- throw new UsageError("workbench remote remove accepts: origin.");
3327
- }
3328
- const originPath = workbenchOriginPath(resolveDir(parsed));
3329
- const existed = await fileIsReadable(originPath);
3330
- await fs.rm(originPath, { force: true });
3331
- writeOutput({ ok: true, remote: "origin", removed: existed, path: originPath }, parsed, io, () => existed
3332
- ? `Removed origin (${originPath}).`
3333
- : `No origin configured (${originPath}).`);
3334
3143
  return 0;
3335
3144
  }
3336
- async function starProject(argv, io, starred) {
3337
- const parsed = parseArgs(argv);
3338
- rejectUnknownFlags(parsed, new Set(["json"]));
3339
- const ref = readRequiredBenchmarkRef(parsed);
3340
- if (parsed.positionals.length > 1) {
3341
- throw new UsageError(`${starred ? "workbench cloud star" : "workbench cloud unstar"} accepts exactly one OWNER/BENCHMARK ref.`);
3145
+ async function applyProjectStateToLocal(args) {
3146
+ if (args.requireCleanSource === true && args.origin) {
3147
+ await assertLocalSourceMatchesOrigin(args.dir, args.origin);
3342
3148
  }
3343
- const response = await apiRequest(`${publicProjectApiPath(ref)}/star`, { method: starred ? "PUT" : "DELETE" }, await effectiveBaseUrl());
3344
- writeOutput({ ok: true, benchmark: response.benchmark }, parsed, io, (record) => {
3345
- const value = record;
3346
- return `${starred ? "Starred" : "Unstarred"} ${formatBenchmarkRef(ref)}; ${value.benchmark.starCount} star(s).`;
3149
+ await syncSourceFiles(args.dir, args.state.source.files);
3150
+ const benchmarkFingerprint = localBenchmarkFingerprint(await readLocalProjectSource(args.dir));
3151
+ const runtimeImport = await importLocalRuntimeBundle(args.dir, args.state.runtime, benchmarkFingerprint);
3152
+ const origin = await writeWorkbenchOriginFromState(args.dir, {
3153
+ baseUrl: args.baseUrl,
3154
+ state: args.state,
3347
3155
  });
3348
- return 0;
3156
+ return {
3157
+ origin,
3158
+ files: args.state.source.files.length,
3159
+ runtime: runtimeImport.stats,
3160
+ };
3349
3161
  }
3350
3162
  async function retryHostedWorkflow(argv, io) {
3351
3163
  const parsed = parseArgs(argv);
@@ -3357,7 +3169,7 @@ async function retryHostedWorkflow(argv, io) {
3357
3169
  "timeout-ms",
3358
3170
  "json",
3359
3171
  ]));
3360
- rejectUnexpectedPositionals(parsed, "workbench cloud retry", 1);
3172
+ rejectUnexpectedPositionals(parsed, "workbench retry --hosted", 1);
3361
3173
  const targetId = parsed.positionals[0];
3362
3174
  if (!targetId) {
3363
3175
  throw new UsageError("Missing required TARGET_ID.");
@@ -3378,19 +3190,21 @@ async function retryHostedWorkflow(argv, io) {
3378
3190
  method: "POST",
3379
3191
  body: retryTarget.request,
3380
3192
  }, target.baseUrl);
3381
- const startedRun = withRunUrls(target, response.run);
3193
+ const runTarget = hostedTargetForRunStartResponse(target, response);
3194
+ const startedRun = withRunUrls(runTarget, response.run);
3382
3195
  if (parsed.flags.watch === true) {
3383
3196
  if (parsed.flags.json !== true) {
3384
3197
  io.stdout.write(`${formatHostedRunStarted(startedRun, retryTarget.workflow).trimEnd()}\n${HOSTED_WATCH_LIFECYCLE_NOTE}\n`);
3385
3198
  }
3386
3199
  const watched = await watchHostedRun({
3387
3200
  parsed,
3388
- target,
3201
+ target: runTarget,
3389
3202
  runId: response.run.id,
3390
3203
  intervalMs: watchIntervalMs ?? 1000,
3391
3204
  timeoutMs: watchTimeoutMs,
3392
3205
  });
3393
- const outputRun = withRunUrls(target, await withHostedRunFailureSummary(target, watched));
3206
+ const outputRun = withRunUrls(runTarget, await withHostedRunFailureSummary(runTarget, watched));
3207
+ await tryImportTerminalHostedProjectState({ target: runTarget, io });
3394
3208
  const result = {
3395
3209
  ok: hostedRunSucceeded(watched),
3396
3210
  retried: {
@@ -3435,7 +3249,7 @@ async function resolveHostedRetryTarget(target, targetId) {
3435
3249
  throw new UsageError(`Run ${run.id} is ${run.status}; wait for it to finish before retrying.`);
3436
3250
  }
3437
3251
  if (!hostedRunRecordFailed(run)) {
3438
- throw new UsageError(`Run ${run.id} did not fail; use workbench cloud ${run.workflow ?? "eval"} to intentionally run it again.`);
3252
+ throw new UsageError(`Run ${run.id} did not fail; use workbench ${run.workflow ?? "eval"} --hosted to intentionally run it again.`);
3439
3253
  }
3440
3254
  if (run.workflow === "eval") {
3441
3255
  const candidateId = hostedRunEvaluationCandidateId(run, detail.jobs);
@@ -3485,7 +3299,7 @@ async function resolveHostedEvaluationRetryTarget(target, evaluationId) {
3485
3299
  }
3486
3300
  const run = snapshot.runs.find((entry) => entry.id === evaluation.runId) ?? null;
3487
3301
  if (!evaluationScorecardFailed(evaluation, run)) {
3488
- throw new UsageError(`Evaluation ${evaluation.id} did not fail; use workbench cloud eval to intentionally run it again.`);
3302
+ throw new UsageError(`Evaluation ${evaluation.id} did not fail; use workbench eval --hosted to intentionally run it again.`);
3489
3303
  }
3490
3304
  if (!run) {
3491
3305
  throw new UsageError(`Evaluation ${evaluation.id} is missing its run record.`);
@@ -3527,6 +3341,25 @@ function uniqueCaseSamplePairs(pairs) {
3527
3341
  async function readHostedRunDetail(target, runId) {
3528
3342
  return await apiRequest(projectApiPath(target.projectId, `/runs/${encodeURIComponent(runId)}`), {}, target.baseUrl);
3529
3343
  }
3344
+ async function tryImportTerminalHostedProjectState(args) {
3345
+ const origin = args.target.origin;
3346
+ if (!origin || origin.projectId !== args.target.projectId) {
3347
+ return;
3348
+ }
3349
+ try {
3350
+ const state = await apiRequest(projectApiPath(args.target.projectId, "/state"), {}, args.target.baseUrl);
3351
+ await applyProjectStateToLocal({
3352
+ dir: args.target.dir,
3353
+ baseUrl: args.target.baseUrl,
3354
+ state,
3355
+ origin,
3356
+ requireCleanSource: true,
3357
+ });
3358
+ }
3359
+ catch (error) {
3360
+ args.io.stderr.write(`Hosted run finished, but local project state was not updated: ${errorMessage(error)}\n`);
3361
+ }
3362
+ }
3530
3363
  function hostedRetrySourceYaml(run, runId) {
3531
3364
  const sourceYaml = stringValue(readRecord(run.input)?.sourceYaml);
3532
3365
  if (!sourceYaml) {
@@ -3542,12 +3375,10 @@ function hostedRunRecordFailed(run) {
3542
3375
  }
3543
3376
  async function startHostedWorkflow(workflow, argv, io) {
3544
3377
  const parsed = parseArgs(argv);
3545
- rejectUnknownFlags(parsed, new Set([
3378
+ const allowedFlags = new Set([
3546
3379
  "dir",
3547
3380
  "benchmark",
3548
- "base",
3549
3381
  "runs",
3550
- "budget",
3551
3382
  "samples",
3552
3383
  "rerun",
3553
3384
  "watch",
@@ -3555,14 +3386,19 @@ async function startHostedWorkflow(workflow, argv, io) {
3555
3386
  "interval-ms",
3556
3387
  "timeout-ms",
3557
3388
  "json",
3558
- ]));
3559
- if (parsed.positionals.length > 1) {
3560
- throw new UsageError(`workbench cloud ${workflow} accepts at most one source file or directory argument.`);
3389
+ ]);
3390
+ if (workflow === "eval") {
3391
+ allowedFlags.add("candidate");
3561
3392
  }
3562
- const sourceArg = parsed.positionals[0] ?? asOptionalString(parsed.flags.dir) ?? process.cwd();
3563
- if (parsed.positionals.length > 0 && parsed.flags.dir !== undefined) {
3564
- throw new UsageError("Use either --dir or SOURCE, not both.");
3393
+ else {
3394
+ allowedFlags.add("base");
3395
+ allowedFlags.add("budget");
3565
3396
  }
3397
+ rejectUnknownFlags(parsed, allowedFlags);
3398
+ if (parsed.positionals.length > 1) {
3399
+ throw new UsageError(`workbench ${workflow} --hosted accepts at most one source file or directory argument.`);
3400
+ }
3401
+ const sourceArg = resolveSourceDir(parsed);
3566
3402
  const samples = parsePositiveInt(parsed.flags.samples, 1, "samples");
3567
3403
  const budget = workflow === "improve"
3568
3404
  ? parsePositiveInt(parsed.flags.budget, 1, "budget")
@@ -3575,7 +3411,7 @@ async function startHostedWorkflow(workflow, argv, io) {
3575
3411
  const defaultProjectSource = await readLocalProjectSource(path.resolve(sourceArg));
3576
3412
  const selectedRunIds = workflow === "eval"
3577
3413
  ? resolveCandidateRunSelection(defaultProjectSource, runsFlag)
3578
- : [singleRequestedRunId(runsFlag, `workbench cloud ${workflow}`) ?? defaultProjectSource.candidateRunId];
3414
+ : [singleRequestedRunId(runsFlag, `workbench ${workflow} --hosted`) ?? defaultProjectSource.candidateRunId];
3579
3415
  if (workflow === "eval" && selectedRunIds.length > 1) {
3580
3416
  let failed = 0;
3581
3417
  const results = [];
@@ -3599,25 +3435,27 @@ async function startHostedWorkflow(workflow, argv, io) {
3599
3435
  }, parsed, io, () => `Processed ${selectedRunIds.length} hosted candidate run(s); ${failed} failed.`);
3600
3436
  return failed === 0 ? 0 : 1;
3601
3437
  }
3602
- const baseCandidateId = asOptionalString(parsed.flags.base);
3438
+ const selectedCandidateId = workflow === "eval"
3439
+ ? asOptionalString(parsed.flags.candidate)
3440
+ : asOptionalString(parsed.flags.base);
3603
3441
  const request = workflow === "improve"
3604
3442
  ? {
3605
3443
  workflow,
3606
3444
  budget,
3607
3445
  samples,
3608
- ...(baseCandidateId ? { candidateId: baseCandidateId } : {}),
3446
+ ...(selectedCandidateId ? { candidateId: selectedCandidateId } : {}),
3609
3447
  }
3610
3448
  : {
3611
3449
  workflow,
3612
3450
  samples,
3613
- ...(baseCandidateId ? { candidateId: baseCandidateId } : {}),
3451
+ ...(selectedCandidateId ? { candidateId: selectedCandidateId } : {}),
3614
3452
  };
3615
3453
  const projectSource = selectedRunIds[0] === defaultProjectSource.candidateRunId
3616
3454
  ? defaultProjectSource
3617
3455
  : await readLocalProjectSource(path.resolve(sourceArg), { runId: selectedRunIds[0] });
3618
3456
  request.sourceYaml = projectSource.specSource;
3619
3457
  request.adapterFiles = projectSource.adapterFiles;
3620
- if (workflow === "eval" && !baseCandidateId) {
3458
+ if (workflow === "eval" && !selectedCandidateId) {
3621
3459
  request.candidateFiles = projectSource.candidateFiles;
3622
3460
  }
3623
3461
  if (parsed.flags.rerun === true) {
@@ -3652,22 +3490,25 @@ async function startHostedWorkflow(workflow, argv, io) {
3652
3490
  parsed,
3653
3491
  target,
3654
3492
  samples: request.samples,
3655
- candidateId: baseCandidateId,
3493
+ candidateId: selectedCandidateId,
3656
3494
  sourceYaml: projectSource.specSource,
3657
3495
  adapterFiles: projectSource.adapterFiles,
3658
3496
  intervalMs: watchIntervalMs ?? 1000,
3659
3497
  timeoutMs: watchTimeoutMs,
3498
+ io,
3660
3499
  });
3661
3500
  }
3662
3501
  const response = await apiRequest(projectApiPath(target.projectId, "/runs"), {
3663
3502
  method: "POST",
3664
3503
  body: request,
3665
3504
  }, target.baseUrl);
3666
- const startedRun = withRunUrls(target, response.run);
3505
+ const runTarget = hostedTargetForRunStartResponse(target, response);
3506
+ const startedRun = withRunUrls(runTarget, response.run);
3667
3507
  const startedRunOutput = response.reused === true
3668
3508
  ? { ...startedRun, reused: true }
3669
3509
  : startedRun;
3670
3510
  if (response.reused === true && response.run.status === "finished") {
3511
+ await tryImportTerminalHostedProjectState({ target: runTarget, io });
3671
3512
  writeOutput({
3672
3513
  ok: hostedRunSucceeded(response.run),
3673
3514
  reused: true,
@@ -3683,13 +3524,14 @@ async function startHostedWorkflow(workflow, argv, io) {
3683
3524
  }
3684
3525
  const watched = await watchHostedRun({
3685
3526
  parsed,
3686
- target,
3527
+ target: runTarget,
3687
3528
  runId: response.run.id,
3688
3529
  intervalMs: watchIntervalMs ?? 1000,
3689
3530
  timeoutMs: watchTimeoutMs,
3690
3531
  });
3691
- const outputRun = await withHostedRunFailureSummary(target, watched);
3692
- writeOutput(withRunUrls(target, outputRun), parsed, io, formatHostedRunResult);
3532
+ const outputRun = await withHostedRunFailureSummary(runTarget, watched);
3533
+ await tryImportTerminalHostedProjectState({ target: runTarget, io });
3534
+ writeOutput(withRunUrls(runTarget, outputRun), parsed, io, formatHostedRunResult);
3693
3535
  return hostedRunSucceeded(watched) ? 0 : 1;
3694
3536
  }
3695
3537
  writeOutput(startedRunOutput, parsed, io, (run) => formatHostedRunStarted(run, workflow).trimEnd());
@@ -3721,9 +3563,10 @@ async function ensureHostedImproveBaseCandidate(args) {
3721
3563
  ...(args.adapterFiles.length > 0 ? { adapterFiles: args.adapterFiles } : {}),
3722
3564
  },
3723
3565
  }, args.target.baseUrl);
3566
+ const runTarget = hostedTargetForRunStartResponse(args.target, response);
3724
3567
  const watched = await watchHostedRun({
3725
3568
  parsed: args.parsed,
3726
- target: args.target,
3569
+ target: runTarget,
3727
3570
  runId: response.run.id,
3728
3571
  intervalMs: args.intervalMs,
3729
3572
  timeoutMs: args.timeoutMs,
@@ -3734,14 +3577,14 @@ async function ensureHostedImproveBaseCandidate(args) {
3734
3577
  if (!watched.candidateId) {
3735
3578
  throw new UsageError(`Parent candidate eval ${watched.id} did not produce a candidate.`);
3736
3579
  }
3580
+ await tryImportTerminalHostedProjectState({ target: runTarget, io: args.io });
3737
3581
  return watched.candidateId;
3738
3582
  }
3739
3583
  function hostedWorkflowArgsForRun(args) {
3740
3584
  const next = ["--dir", args.sourceDir, "--runs", args.runId, "--json"];
3741
3585
  appendStringFlag(next, "benchmark", asOptionalString(args.parsed.flags.benchmark));
3742
- appendStringFlag(next, "base", asOptionalString(args.parsed.flags.base));
3586
+ appendStringFlag(next, "candidate", asOptionalString(args.parsed.flags.candidate));
3743
3587
  appendStringFlag(next, "samples", asOptionalString(args.parsed.flags.samples));
3744
- appendStringFlag(next, "budget", asOptionalString(args.parsed.flags.budget));
3745
3588
  appendStringFlag(next, "interval-ms", asOptionalString(args.parsed.flags["interval-ms"]));
3746
3589
  appendStringFlag(next, "timeout-ms", asOptionalString(args.parsed.flags["timeout-ms"]));
3747
3590
  if (args.parsed.flags.watch === true) {
@@ -3776,310 +3619,11 @@ async function readEvaluatedActiveHostedCandidate(target) {
3776
3619
  function hostedCandidateIsEvaluated(candidate) {
3777
3620
  return candidate.status === "evaluated" || candidate.eval != null;
3778
3621
  }
3779
- async function benchmarkList(argv, io) {
3780
- const parsed = parseArgs(argv);
3781
- rejectUnknownFlags(parsed, new Set(["json"]));
3782
- rejectUnexpectedPositionals(parsed, "workbench cloud benchmarks list", 0);
3783
- const response = await apiRequest("/api/workbench/public/benchmarks");
3784
- writeOutput(response.benchmarks, parsed, io, (projects) => {
3785
- if (projects.length === 0) {
3786
- return "No hosted Workbench benchmarks.";
3787
- }
3788
- return projects
3789
- .map((project) => `${project.id}\t${project.name}\t${project.runCount} runs\t${project.candidateCount} candidates`)
3790
- .join("\n");
3791
- });
3792
- return 0;
3793
- }
3794
- async function benchmarkShow(argv, io) {
3795
- const parsed = parseArgs(argv);
3796
- rejectUnknownFlags(parsed, new Set(["dir", "json"]));
3797
- rejectUnexpectedPositionals(parsed, "workbench cloud benchmarks show", 1);
3798
- const dir = resolveDir(parsed);
3799
- const origin = await readWorkbenchOrigin(dir);
3800
- const projectRef = parsed.positionals[0] ??
3801
- origin?.projectId;
3802
- if (!projectRef) {
3803
- throw new UsageError("Missing hosted benchmark. Pass OWNER/BENCHMARK, run workbench push, or run workbench clone.");
3804
- }
3805
- const response = await apiRequest(benchmarkApiPath(projectRef), {}, await effectiveBaseUrl(origin?.baseUrl));
3806
- writeOutput(response.benchmark, parsed, io, (project) => {
3807
- const record = project;
3808
- return `${record.name} (${record.id})\n${record.runs.length} runs\n${record.candidates.length} candidates`;
3809
- });
3810
- return 0;
3811
- }
3812
- async function benchmarkDelete(argv, io) {
3813
- const parsed = parseArgs(argv);
3814
- rejectUnknownFlags(parsed, new Set(["dir", "dry-run", "json"]));
3815
- if (parsed.positionals.length > 1) {
3816
- throw new UsageError(`Unexpected argument for workbench benchmarks delete: ${parsed.positionals.slice(1).join(" ")}`);
3817
- }
3818
- const dir = resolveDir(parsed);
3819
- const origin = await readWorkbenchOrigin(dir);
3820
- const projectRef = parsed.positionals[0] ??
3821
- origin?.projectId;
3822
- if (!projectRef) {
3823
- throw new UsageError("Missing hosted benchmark. Pass OWNER/BENCHMARK, run workbench push, or run workbench clone.");
3824
- }
3825
- const originPath = workbenchOriginPath(dir);
3826
- const baseUrl = await effectiveBaseUrl(origin?.baseUrl);
3827
- if (parsed.flags["dry-run"] === true) {
3828
- const originProjectDeleted = originMatchesProjectRef(origin, projectRef);
3829
- writeOutput({
3830
- ok: true,
3831
- dryRun: true,
3832
- projectRef,
3833
- ...(isRemoteProjectId(projectRef) ? { projectId: projectRef } : {}),
3834
- ...(originProjectDeleted && origin?.project ? { projectName: origin.project } : {}),
3835
- baseUrl,
3836
- ...(originProjectDeleted ? { originPath } : {}),
3837
- }, parsed, io, () => originProjectDeleted
3838
- ? `Would delete hosted benchmark ${projectRef} and remove local origin ${originPath}.`
3839
- : `Would delete hosted benchmark ${projectRef}.`);
3840
- return 0;
3841
- }
3842
- const project = await resolveRemoteProject(projectRef, baseUrl);
3843
- const projectId = project.id;
3844
- const projectName = project.name;
3845
- const originProjectDeleted = origin ? origin.projectId === projectId : false;
3846
- await apiRequest(projectApiPath(projectId), { method: "DELETE" }, baseUrl);
3847
- if (originProjectDeleted) {
3848
- await fs.rm(originPath, { force: true });
3849
- }
3850
- writeOutput({
3851
- ok: true,
3852
- deleted: true,
3853
- projectId,
3854
- ...(projectName ? { projectName } : {}),
3855
- originRemoved: originProjectDeleted,
3856
- ...(originProjectDeleted ? { originPath } : {}),
3857
- }, parsed, io, () => originProjectDeleted
3858
- ? `Deleted benchmark ${formatProjectRef(project)} and removed local origin ${originPath}.`
3859
- : `Deleted benchmark ${formatProjectRef(project)}.`);
3860
- return 0;
3861
- }
3862
- async function benchmarkVersions(argv, io) {
3863
- const parsed = parseArgs(argv);
3864
- rejectUnknownFlags(parsed, new Set(["dir", "json"]));
3865
- rejectUnexpectedPositionals(parsed, "workbench cloud benchmarks versions", 1);
3866
- const projectRef = parsed.positionals[0];
3867
- const origin = await readWorkbenchOrigin(resolveDir(parsed));
3868
- if (!projectRef && !origin) {
3869
- throw new UsageError("Missing benchmark ref. Pass OWNER/BENCHMARK or run from a benchmark clone.");
3870
- }
3871
- const response = await apiRequest(benchmarkApiPath(projectRef ?? origin.projectId), {}, await effectiveBaseUrl(origin?.baseUrl));
3872
- const version = response.benchmark.sourceFingerprint ?? response.benchmark.currentSpecVersionId ?? "current";
3873
- writeOutput({
3874
- ok: true,
3875
- benchmark: response.benchmark,
3876
- versions: [{ ref: "main", digest: version, current: true }],
3877
- }, parsed, io, () => `${response.benchmark.name ?? projectRef ?? origin.project}\tmain\t${shortDigest(version)}\tcurrent`);
3878
- return 0;
3879
- }
3880
- async function benchmarkStarred(argv, io) {
3881
- const parsed = parseArgs(argv);
3882
- rejectUnknownFlags(parsed, new Set(["json"]));
3883
- rejectUnexpectedPositionals(parsed, "workbench cloud benchmarks starred", 0);
3884
- const response = await apiRequest("/api/workbench/benchmarks");
3885
- const starred = response.benchmarks.filter((project) => project.viewerHasStarred === true);
3886
- writeOutput(starred, parsed, io, (benchmarks) => {
3887
- if (benchmarks.length === 0) {
3888
- return "No starred benchmarks.";
3889
- }
3890
- return benchmarks
3891
- .map((benchmark) => `${benchmark.ownerUsername ?? "-"} / ${benchmark.name ?? "-"}\t${benchmark.starCount ?? 0} stars`)
3892
- .join("\n");
3893
- });
3894
- return 0;
3895
- }
3896
- async function candidateList(argv, io) {
3897
- const parsed = parseArgs(argv);
3898
- rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
3899
- rejectUnexpectedPositionals(parsed, "workbench cloud candidates list", 0);
3900
- const target = await resolveHostedTarget(parsed);
3901
- const response = await apiRequest(projectApiPath(target.projectId, "/candidates"), {}, target.baseUrl);
3902
- writeOutput(response.candidates, parsed, io, (candidates) => {
3903
- if (candidates.length === 0) {
3904
- return "No candidates yet.";
3905
- }
3906
- return candidates
3907
- .map((candidate) => `${candidate.id}\t${candidate.status}\t${candidate.fileChanges?.length ?? 0} files`)
3908
- .join("\n");
3909
- });
3910
- return 0;
3911
- }
3912
- async function candidateShow(argv, io) {
3913
- const parsed = parseArgs(argv);
3914
- rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
3915
- rejectUnexpectedPositionals(parsed, "workbench cloud candidates show", 1);
3916
- const target = await resolveHostedTarget(parsed);
3917
- const candidateId = readRequiredCandidateId(parsed);
3918
- const params = new URLSearchParams({ id: candidateId });
3919
- const candidate = await apiRequest(projectApiPath(target.projectId, `/workbench/record?${params.toString()}`), {}, target.baseUrl);
3920
- writeOutput(candidate, parsed, io, (record) => {
3921
- const value = record;
3922
- return [
3923
- `${value.id ?? candidateId}\t${value.status ?? "unknown"}`,
3924
- ...(value.benchmarkFingerprint ? [`Benchmark version: ${shortDigest(value.benchmarkFingerprint)}`] : []),
3925
- ...(value.candidateFingerprint ? [`Candidate digest: ${shortDigest(value.candidateFingerprint)}`] : []),
3926
- ].join("\n");
3927
- });
3928
- return 0;
3929
- }
3930
- async function candidateFiles(argv, io) {
3931
- const parsed = parseArgs(argv);
3932
- rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
3933
- rejectUnexpectedPositionals(parsed, "workbench cloud candidates files", 1);
3934
- const target = await resolveHostedTarget(parsed);
3935
- const candidateId = readRequiredCandidateId(parsed);
3936
- const response = await apiRequest(projectApiPath(target.projectId, `/candidates/${encodeURIComponent(candidateId)}/files`), {}, target.baseUrl);
3937
- writeOutput(response.files, parsed, io, (files) => files
3938
- .map((file) => `${file.path}\t${file.status}\t${file.preview_kind}`)
3939
- .join("\n") || "No files.");
3940
- return 0;
3941
- }
3942
- async function candidatePreview(argv, io) {
3943
- const parsed = parseArgs(argv);
3944
- rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "path", "output", "json"]));
3945
- rejectUnexpectedPositionals(parsed, "workbench cloud candidates preview", 1);
3946
- const target = await resolveHostedTarget(parsed);
3947
- const candidateId = readRequiredCandidateId(parsed);
3948
- const filePath = requireFlag(parsed, "path");
3949
- const params = new URLSearchParams({ path: filePath });
3950
- const response = await apiRequest(projectApiPath(target.projectId, `/candidates/${encodeURIComponent(candidateId)}/files?${params.toString()}`), {}, target.baseUrl);
3951
- const content = response.preview.source?.content ??
3952
- response.preview.rendered_html ??
3953
- response.preview.diff ??
3954
- "";
3955
- const outputPath = asOptionalString(parsed.flags.output);
3956
- if (outputPath && outputPath !== "-") {
3957
- await fs.writeFile(outputPath, content);
3958
- io.stdout.write(`Wrote preview to ${outputPath}\n`);
3959
- }
3960
- else if (parsed.flags.json === true) {
3961
- writeJson(response.preview, io);
3962
- }
3963
- else {
3964
- io.stdout.write(content);
3965
- }
3966
- return 0;
3967
- }
3968
- async function candidateExport(argv, io) {
3969
- const parsed = parseArgs(argv);
3970
- rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "out", "json"]));
3971
- rejectUnexpectedPositionals(parsed, "workbench cloud candidates pull", 1);
3972
- const target = await resolveHostedTarget(parsed);
3973
- const candidateId = readRequiredCandidateId(parsed);
3974
- const outputDir = requireOutDir(parsed);
3975
- const response = await apiRequest(projectApiPath(target.projectId, `/candidates/${encodeURIComponent(candidateId)}/export`), {}, target.baseUrl);
3976
- await writeFiles(outputDir, response.files);
3977
- writeOutput({ ok: true, outputDir, files: response.files.length }, parsed, io, (result) => {
3978
- const record = result;
3979
- return `Exported ${record.files} file(s) to ${record.outputDir}`;
3980
- });
3981
- return 0;
3982
- }
3983
- async function candidateVisibility(argv, io, visibility) {
3984
- const parsed = parseArgs(argv);
3985
- rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
3986
- rejectUnexpectedPositionals(parsed, `workbench cloud candidates ${visibility === "public" ? "publish" : "unpublish"}`, 1);
3987
- const target = await resolveHostedTarget(parsed, { requireProjectIdentity: true });
3988
- const candidateId = readRequiredCandidateId(parsed);
3989
- const response = await apiRequest(projectApiPath(target.projectId, `/candidates/${encodeURIComponent(candidateId)}/publish`), { method: visibility === "public" ? "PUT" : "DELETE" }, target.baseUrl);
3990
- writeOutput({ ok: true, visibility, candidate: response.candidate }, parsed, io, () => `${visibility === "public" ? "Published" : "Unpublished"} candidate ${candidateId}.`);
3991
- return 0;
3992
- }
3993
- async function runList(argv, io) {
3994
- const parsed = parseArgs(argv);
3995
- rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
3996
- rejectUnexpectedPositionals(parsed, "workbench cloud runs list", 0);
3997
- const target = await resolveHostedTarget(parsed);
3998
- const response = await apiRequest(projectApiPath(target.projectId, "/runs"), {}, target.baseUrl);
3999
- writeOutput(response.runs, parsed, io, (runs) => runs
4000
- .map((run) => `${run.id}\t${run.status}\t${run.candidateId ?? "pending"}`)
4001
- .join("\n") || "No runs.");
4002
- return 0;
4003
- }
4004
- async function runShow(argv, io) {
4005
- const parsed = parseArgs(argv);
4006
- rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
4007
- rejectUnexpectedPositionals(parsed, "workbench cloud runs show", 1);
4008
- const target = await resolveHostedTarget(parsed, { requireProjectIdentity: true });
4009
- const runId = readRequiredRunId(parsed);
4010
- const response = await apiRequest(projectApiPath(target.projectId, `/runs/${encodeURIComponent(runId)}`), {}, target.baseUrl);
4011
- const detail = withRunDetailUrls(target, response);
4012
- writeOutput(detail, parsed, io, formatRunDetail);
4013
- return 0;
4014
- }
4015
- async function runCancel(argv, io) {
4016
- const parsed = parseArgs(argv);
4017
- rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
4018
- rejectUnexpectedPositionals(parsed, "workbench cloud runs cancel", 1);
4019
- const target = await resolveHostedTarget(parsed, { requireProjectIdentity: true });
4020
- const runId = readRequiredRunId(parsed);
4021
- const response = await apiRequest(projectApiPath(target.projectId, `/runs/${encodeURIComponent(runId)}`), { method: "DELETE" }, target.baseUrl);
4022
- const run = withRunUrls(target, response.run);
4023
- writeOutput(run, parsed, io, (record) => {
4024
- const value = record;
4025
- return [
4026
- `Cancelled run ${value.id}; status ${value.status}; outcome ${value.outcome ?? "cancelled"}.`,
4027
- `Open benchmark: ${value.urls?.benchmark ?? buildWorkbenchResourceUrls(target).benchmark}`,
4028
- ].join("\n");
4029
- });
4030
- return 0;
4031
- }
4032
- async function runWatch(argv, io) {
4033
- const parsed = parseArgs(argv);
4034
- rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "interval-ms", "timeout-ms", "json"]));
4035
- rejectUnexpectedPositionals(parsed, "workbench cloud watch", 1);
4036
- const target = await resolveHostedTarget(parsed, { requireProjectIdentity: true });
4037
- const runId = readRequiredRunId(parsed);
4038
- if (parsed.flags.json !== true) {
4039
- io.stdout.write(`Watching run ${runId}.\n${HOSTED_WATCH_LIFECYCLE_NOTE}\n`);
4040
- }
4041
- const run = await watchHostedRun({
4042
- parsed,
4043
- target,
4044
- runId,
4045
- intervalMs: parsePositiveInt(parsed.flags["interval-ms"], 1000, "interval-ms"),
4046
- timeoutMs: parseOptionalPositiveInt(parsed.flags["timeout-ms"], "timeout-ms"),
4047
- });
4048
- const outputRun = await withHostedRunFailureSummary(target, run);
4049
- writeOutput(withRunUrls(target, outputRun), parsed, io, formatHostedRunResult);
4050
- return hostedRunSucceeded(run) ? 0 : 1;
4051
- }
4052
- async function runLogs(argv, io) {
4053
- const parsed = parseArgs(argv);
4054
- rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "json"]));
4055
- rejectUnexpectedPositionals(parsed, "workbench cloud logs", 1);
4056
- const target = await resolveHostedTarget(parsed);
4057
- const requestedRunId = parsed.positionals[0];
4058
- if (requestedRunId) {
4059
- const response = await apiRequest(projectApiPath(target.projectId, `/runs/${encodeURIComponent(requestedRunId)}`), {}, target.baseUrl);
4060
- writeOutput({ runId: response.run.id, jobs: response.jobs }, parsed, io, formatRunLogs);
4061
- return 0;
4062
- }
4063
- const project = (await apiRequest(projectApiPath(target.projectId), {}, target.baseUrl)).project;
4064
- const runId = project.runs.at(-1)?.id;
4065
- if (!runId) {
4066
- throw new UsageError("Missing RUN_ID; the benchmark has no runs.");
4067
- }
4068
- const jobs = project.jobs.filter((job) => job.runId === runId);
4069
- writeOutput({ runId, jobs }, parsed, io, formatRunLogs);
4070
- return 0;
4071
- }
4072
- function formatRunLogs(record) {
4073
- const value = record;
4074
- return (value.jobs
4075
- .map((job) => `${job.id}\t${job.kind}\t${job.status}\t${job.candidateId ?? "-"}${job.error ? `\t${job.error}` : ""}`)
4076
- .join("\n") || `No jobs for ${value.runId}.`);
4077
- }
4078
3622
  async function openWorkbench(argv, io) {
4079
3623
  const parsed = parseArgs(argv);
4080
3624
  rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "no-open", "json"]));
4081
3625
  if (parsed.positionals.length > 1) {
4082
- throw new UsageError(`Unexpected argument for workbench open: ${parsed.positionals.slice(1).join(" ")}`);
3626
+ throw new UsageError(`Unexpected argument for workbench open --hosted: ${parsed.positionals.slice(1).join(" ")}`);
4083
3627
  }
4084
3628
  const target = await resolveOpenTarget(parsed);
4085
3629
  const ref = target.openRef;
@@ -4134,11 +3678,12 @@ async function resolveHostedTarget(parsed, options = {}) {
4134
3678
  if (!projectId) {
4135
3679
  throw new UsageError("Missing hosted benchmark. Run workbench push, workbench clone, or pass --benchmark OWNER/BENCHMARK.");
4136
3680
  }
3681
+ const originRemote = origin ? parseOriginRemote(origin) : null;
4137
3682
  return {
4138
3683
  projectId,
4139
- ...(!explicitProject && origin?.owner ? { owner: origin.owner } : {}),
4140
- ...(!explicitProject && origin?.project
4141
- ? { projectName: origin.project }
3684
+ ...(!explicitProject && originRemote ? { owner: originRemote.owner } : {}),
3685
+ ...(!explicitProject && originRemote
3686
+ ? { projectName: originRemote.project }
4142
3687
  : {}),
4143
3688
  dir,
4144
3689
  baseUrl,
@@ -4176,13 +3721,12 @@ async function resolveHostedDryRunTarget(parsed, options = {}) {
4176
3721
  };
4177
3722
  }
4178
3723
  if (origin?.projectId) {
3724
+ const originRemote = parseOriginRemote(origin);
4179
3725
  return {
4180
- projectRef: origin.owner && origin.project
4181
- ? `${origin.owner}/${origin.project}`
4182
- : origin.projectId,
3726
+ projectRef: origin.remote,
4183
3727
  projectId: origin.projectId,
4184
- ...(origin.owner ? { owner: origin.owner } : {}),
4185
- ...(origin.project ? { projectName: origin.project } : {}),
3728
+ owner: originRemote.owner,
3729
+ projectName: originRemote.project,
4186
3730
  dir,
4187
3731
  baseUrl,
4188
3732
  origin,
@@ -4241,38 +3785,31 @@ function buildWorkbenchResourceUrls(target, refs = {}) {
4241
3785
  function projectApiPath(projectRef, suffix = "") {
4242
3786
  return `/api/workbench/benchmarks/${encodeURIComponent(projectRef)}${suffix}`;
4243
3787
  }
4244
- function benchmarkApiPath(benchmarkRef) {
4245
- if (benchmarkRef.includes("/")) {
4246
- return publicProjectApiPath(parseBenchmarkRef(benchmarkRef));
4247
- }
4248
- return projectApiPath(benchmarkRef);
4249
- }
4250
3788
  function publicProjectApiPath(ref) {
4251
3789
  return `/api/workbench/public/benchmarks/${encodeURIComponent(ref.owner)}/${encodeURIComponent(ref.project)}`;
4252
3790
  }
4253
- function publicProjectSourceApiPath(ref) {
4254
- return `${publicProjectApiPath(ref)}/source`;
3791
+ function publicProjectStateApiPath(ref) {
3792
+ return `${publicProjectApiPath(ref)}/state`;
4255
3793
  }
4256
3794
  function readRequiredBenchmarkRef(parsed) {
4257
3795
  const ref = parsed.positionals[0];
4258
3796
  if (!ref) {
4259
- throw new UsageError("Missing required OWNER/BENCHMARK ref.");
3797
+ throw new UsageError("Missing required OWNER/BENCHMARK.");
4260
3798
  }
4261
3799
  return parseBenchmarkRef(ref);
4262
3800
  }
4263
3801
  function parseBenchmarkRef(value) {
4264
- const [namePart, versionRef, extraRef] = value.split("@");
4265
- if (extraRef !== undefined || !namePart) {
4266
- throw new UsageError("Benchmark refs must use OWNER/BENCHMARK[@REF].");
3802
+ if (value.includes("@")) {
3803
+ throw new UsageError("Benchmark refs must use OWNER/BENCHMARK.");
4267
3804
  }
4268
- const [owner, project, extra] = namePart.split("/");
3805
+ const [owner, project, extra] = value.split("/");
4269
3806
  if (!owner || !project || extra !== undefined) {
4270
- throw new UsageError("Benchmark refs must use OWNER/BENCHMARK[@REF].");
3807
+ throw new UsageError("Benchmark refs must use OWNER/BENCHMARK.");
4271
3808
  }
4272
- return { owner, project, ...(versionRef ? { ref: versionRef } : {}) };
3809
+ return { owner, project };
4273
3810
  }
4274
3811
  function formatBenchmarkRef(ref) {
4275
- return `${ref.owner}/${ref.project}${ref.ref ? `@${ref.ref}` : ""}`;
3812
+ return `${ref.owner}/${ref.project}`;
4276
3813
  }
4277
3814
  async function resolveRemoteProject(projectRef, baseUrl) {
4278
3815
  if (projectRef.includes("/")) {
@@ -4283,23 +3820,10 @@ async function resolveRemoteProject(projectRef, baseUrl) {
4283
3820
  const response = await apiRequest(projectApiPath(projectRef), {}, baseUrl);
4284
3821
  return response.benchmark;
4285
3822
  }
4286
- function formatProjectRef(project) {
4287
- return project.name ? `${project.name} (${project.id})` : project.id;
4288
- }
4289
- function originMatchesProjectRef(origin, projectRef) {
4290
- if (!origin) {
4291
- return false;
4292
- }
4293
- if (origin.projectId === projectRef) {
4294
- return true;
4295
- }
4296
- if (!projectRef.includes("/")) {
4297
- return false;
4298
- }
4299
- const ref = parseBenchmarkRef(projectRef);
4300
- return origin.owner === ref.owner && origin.project === ref.project;
4301
- }
4302
3823
  function withRunUrls(target, run) {
3824
+ if (!target.owner || !target.projectName) {
3825
+ return { ...run };
3826
+ }
4303
3827
  return {
4304
3828
  ...run,
4305
3829
  urls: buildWorkbenchResourceUrls(target, {
@@ -4308,17 +3832,30 @@ function withRunUrls(target, run) {
4308
3832
  }),
4309
3833
  };
4310
3834
  }
4311
- function withRunDetailUrls(target, detail) {
4312
- const candidateId = hostedRunEvaluationCandidateId(detail.run, detail.jobs);
4313
- const run = withRunUrls(target, {
4314
- ...detail.run,
4315
- outputCandidateId: detail.run.outputCandidateId ?? candidateId,
4316
- });
4317
- return {
4318
- run,
4319
- jobs: detail.jobs,
4320
- urls: run.urls ?? buildWorkbenchResourceUrls(target, { runId: run.id }),
3835
+ function hostedTargetForRunStartResponse(target, response) {
3836
+ const projectId = response.benchmark?.id ?? response.run.projectId ?? target.projectId;
3837
+ if (projectId === target.projectId && !response.benchmark) {
3838
+ return target;
3839
+ }
3840
+ const origin = target.origin?.projectId === projectId ? target.origin : null;
3841
+ const next = {
3842
+ ...target,
3843
+ projectId,
3844
+ origin,
4321
3845
  };
3846
+ if (response.benchmark?.ownerUsername) {
3847
+ next.owner = response.benchmark.ownerUsername;
3848
+ }
3849
+ else {
3850
+ delete next.owner;
3851
+ }
3852
+ if (response.benchmark?.name) {
3853
+ next.projectName = response.benchmark.name;
3854
+ }
3855
+ else {
3856
+ delete next.projectName;
3857
+ }
3858
+ return next;
4322
3859
  }
4323
3860
  function hostedRunEvaluationCandidateId(run, jobs = []) {
4324
3861
  if (run.outputCandidateId) {
@@ -4330,6 +3867,80 @@ function hostedRunEvaluationCandidateId(run, jobs = []) {
4330
3867
  .filter((candidateId) => Boolean(candidateId));
4331
3868
  return attemptCandidates.at(-1) ?? run.candidateId ?? null;
4332
3869
  }
3870
+ function localProjectState(args) {
3871
+ const stateSource = localProjectStateSource(args.source);
3872
+ const runtime = runtimeBundleForProjectVisibility(args.runtime, args.visibility);
3873
+ const runtimeFingerprint = workbenchRuntimeBundleFingerprint(runtime);
3874
+ return {
3875
+ schema: "workbench.project.state.v1",
3876
+ project: {
3877
+ id: args.origin?.projectId ?? "",
3878
+ remote: args.origin?.remote ?? `local/${args.source.spec.name}`,
3879
+ ownerUsername: args.origin ? parseOriginRemote(args.origin).owner : "local",
3880
+ name: args.origin ? parseOriginRemote(args.origin).project : args.source.spec.name,
3881
+ visibility: args.visibility,
3882
+ },
3883
+ base: {
3884
+ ...(args.origin ? { sourceRevisionId: args.origin.sourceRevisionId } : {}),
3885
+ ...(args.origin ? { sourceFingerprint: args.origin.sourceFingerprint } : {}),
3886
+ runtimeFingerprint: args.origin?.runtimeFingerprint ?? runtimeFingerprint,
3887
+ },
3888
+ source: stateSource,
3889
+ runtime,
3890
+ };
3891
+ }
3892
+ function localCandidateRecord(candidate) {
3893
+ return {
3894
+ ...candidate,
3895
+ visibility: "private",
3896
+ };
3897
+ }
3898
+ function runtimeBundleForProjectVisibility(runtime, visibility) {
3899
+ return {
3900
+ ...runtime,
3901
+ candidates: runtime.candidates.map((candidate) => ({
3902
+ ...candidate,
3903
+ visibility,
3904
+ })),
3905
+ };
3906
+ }
3907
+ function localProjectStateSource(source) {
3908
+ const request = hostedProjectSourceRequest(source);
3909
+ const stateSource = {
3910
+ source: request.source,
3911
+ files: source.sourceFiles.map((file) => ({ ...file })),
3912
+ candidateFiles: request.candidateFiles.map(toSurfaceSnapshotFile),
3913
+ engineResolveFiles: request.engineResolveFiles.map(toSurfaceSnapshotFile),
3914
+ engineResolveBinding: request.engineResolveBinding,
3915
+ adapterFiles: request.adapterFiles.map(toSurfaceSnapshotFile),
3916
+ dockerfile: request.dockerfile,
3917
+ runtimeDockerfile: request.runtimeDockerfile,
3918
+ runtimeFiles: request.runtimeFiles.map(toSurfaceSnapshotFile),
3919
+ network: request.network,
3920
+ resources: { ...request.resources },
3921
+ };
3922
+ return {
3923
+ ...stateSource,
3924
+ fingerprint: workbenchProjectSourceFingerprint(stateSource),
3925
+ };
3926
+ }
3927
+ function toSurfaceSnapshotFile(file) {
3928
+ return {
3929
+ path: file.path,
3930
+ kind: "kind" in file ? file.kind : file.encoding === "base64" ? "binary" : "text",
3931
+ encoding: file.encoding ?? "utf8",
3932
+ content: file.content,
3933
+ executable: file.executable === true,
3934
+ };
3935
+ }
3936
+ function hostedProjectSummaryFromState(state) {
3937
+ return {
3938
+ id: state.project.id,
3939
+ ownerUsername: state.project.ownerUsername,
3940
+ name: state.project.name,
3941
+ visibility: state.project.visibility,
3942
+ };
3943
+ }
4333
3944
  function sourceFileCount(source) {
4334
3945
  return source.sourceFiles.length;
4335
3946
  }
@@ -4348,28 +3959,6 @@ function hostedProjectSourceRequest(source) {
4348
3959
  resources,
4349
3960
  };
4350
3961
  }
4351
- function hostedEngineResolveFiles(source) {
4352
- return [
4353
- ...source.engineResolveFiles,
4354
- {
4355
- path: WORKBENCH_ADAPTER_RESULT_FILE,
4356
- content: `${JSON.stringify({
4357
- protocol: WORKBENCH_ADAPTER_RESULT_PROTOCOL,
4358
- operation: "engine.resolve",
4359
- ok: true,
4360
- value: {
4361
- cases: source.engineCases,
4362
- ...(source.engineResolveEnvironment
4363
- ? { environment: source.engineResolveEnvironment }
4364
- : {}),
4365
- },
4366
- feedback: {
4367
- path: source.engineResolveFingerprintPath,
4368
- },
4369
- }, null, 2)}\n`,
4370
- },
4371
- ];
4372
- }
4373
3962
  function isRemoteProjectId(value) {
4374
3963
  return /^wb_[a-f0-9]{12}$/u.test(value);
4375
3964
  }
@@ -4463,71 +4052,12 @@ function formatHostedRunStarted(run, fallbackWorkflow) {
4463
4052
  "",
4464
4053
  ].join("\n");
4465
4054
  }
4466
- function formatRunDetail(record) {
4467
- const detail = record;
4468
- const { run, jobs, urls } = detail;
4469
- const cost = sumJobCostUsd(jobs);
4470
- const firstFailedJob = jobs.find((job) => job.status === "failed" && job.error);
4471
- const candidateId = hostedRunEvaluationCandidateId(run, jobs);
4472
- return [
4473
- `Run ${run.id}: ${run.status}${run.outcome ? ` (${run.outcome})` : ""}`,
4474
- `Workflow: ${run.workflow ?? "improve"}`,
4475
- `Candidate: ${candidateId ?? "pending"}`,
4476
- ...(run.activeCandidateId && candidateId && run.activeCandidateId !== candidateId
4477
- ? [`Active candidate: ${run.activeCandidateId}`]
4478
- : []),
4479
- `Samples: ${run.samples ?? 0}`,
4480
- `Attempts: ${run.attemptsExecuted ?? 0}/${run.attemptsRequested ?? run.attemptsExecuted ?? 0}`,
4481
- `Jobs: ${run.completedJobCount ?? jobs.filter(isTerminalRunJob).length}/${run.jobCount ?? jobs.length} completed${run.failedJobCount ? `; ${run.failedJobCount} failed` : ""}`,
4482
- ...(typeof run.durationMs === "number"
4483
- ? [`Duration: ${formatDurationMs(run.durationMs)}`]
4484
- : []),
4485
- ...(cost > 0 ? [`Cost: ${formatUsd(cost)}`] : []),
4486
- ...(firstFailedJob?.error
4487
- ? [`First failed job ${firstFailedJob.id}: ${firstFailedJob.error}`]
4488
- : []),
4489
- ...(urls.candidateEvaluation
4490
- ? [`Open evaluation: ${urls.candidateEvaluation}`]
4491
- : [`Open benchmark: ${urls.benchmark}`]),
4492
- ...(jobs.length > 0 ? ["", "Jobs:", ...jobs.map(formatRunJobLine)] : []),
4493
- ].join("\n");
4494
- }
4495
- function formatRunJobLine(job) {
4496
- return [
4497
- job.id,
4498
- readRunJobPurpose(job) ?? job.kind ?? "job",
4499
- job.status,
4500
- job.candidateId ?? "-",
4501
- job.error ?? "",
4502
- ].filter((value, index) => index < 4 || value !== "").join("\t");
4503
- }
4504
- function isTerminalRunJob(job) {
4505
- return job.status === "succeeded" || job.status === "failed" || job.status === "cancelled";
4506
- }
4507
4055
  function readRunJobPurpose(job) {
4508
4056
  const input = readRecord(job.input);
4509
4057
  const execution = readRecord(input?.execution);
4510
4058
  const purpose = execution?.purpose;
4511
4059
  return typeof purpose === "string" && purpose ? purpose : null;
4512
4060
  }
4513
- function sumJobCostUsd(jobs) {
4514
- const sum = jobs.reduce((total, job) => total + costUsdFromUsage(readRecord(job.output)?.usage), 0);
4515
- return Number.isFinite(sum) ? Math.round(sum * 1_000_000) / 1_000_000 : 0;
4516
- }
4517
- function costUsdFromUsage(value) {
4518
- const usage = readRecord(value);
4519
- if (!usage) {
4520
- return 0;
4521
- }
4522
- const direct = readFiniteNumber(usage.costUsd);
4523
- if (direct !== null) {
4524
- return direct;
4525
- }
4526
- return ["total", "improver", "runner", "engine"].reduce((sum, key) => {
4527
- const nested = readRecord(usage[key]);
4528
- return sum + (readFiniteNumber(nested?.costUsd) ?? 0);
4529
- }, 0);
4530
- }
4531
4061
  function readRecord(value) {
4532
4062
  return value && typeof value === "object" && !Array.isArray(value)
4533
4063
  ? value
@@ -4545,24 +4075,6 @@ function integerValue(value) {
4545
4075
  function readFiniteNumber(value) {
4546
4076
  return typeof value === "number" && Number.isFinite(value) ? value : null;
4547
4077
  }
4548
- function formatDurationMs(durationMs) {
4549
- if (durationMs < 1000) {
4550
- return `${Math.max(0, Math.round(durationMs))}ms`;
4551
- }
4552
- const seconds = durationMs / 1000;
4553
- if (seconds < 60) {
4554
- return `${seconds.toFixed(seconds < 10 ? 1 : 0)}s`;
4555
- }
4556
- const minutes = Math.floor(seconds / 60);
4557
- const remainingSeconds = Math.round(seconds % 60);
4558
- return `${minutes}m ${remainingSeconds}s`;
4559
- }
4560
- function formatUsd(value) {
4561
- return `$${value.toFixed(value < 1 ? 4 : 2)}`;
4562
- }
4563
- function shortDigest(value) {
4564
- return value.length > 12 ? value.slice(0, 12) : value;
4565
- }
4566
4078
  async function withHostedRunFailureSummary(target, run) {
4567
4079
  if (hostedRunSucceeded(run) || run.error || (run.failedJobCount ?? 0) <= 0) {
4568
4080
  return run;
@@ -4592,23 +4104,44 @@ function hostedRunSucceeded(run) {
4592
4104
  async function readWorkbenchOrigin(dir) {
4593
4105
  try {
4594
4106
  const parsed = JSON.parse(await fs.readFile(workbenchOriginPath(dir), "utf8"));
4595
- if (!parsed.projectId ||
4596
- !parsed.baseUrl ||
4597
- !parsed.owner ||
4598
- !parsed.project ||
4599
- typeof parsed.writable !== "boolean") {
4107
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
4108
+ throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
4109
+ }
4110
+ const originRecord = parsed;
4111
+ const keys = Object.keys(originRecord).sort();
4112
+ const expectedKeys = [
4113
+ "baseUrl",
4114
+ "linkedAt",
4115
+ "projectId",
4116
+ "remote",
4117
+ "runtimeFingerprint",
4118
+ "sourceFingerprint",
4119
+ "sourceRevisionId",
4120
+ ];
4121
+ if (typeof originRecord.projectId !== "string" ||
4122
+ typeof originRecord.baseUrl !== "string" ||
4123
+ typeof originRecord.remote !== "string" ||
4124
+ typeof originRecord.sourceRevisionId !== "string" ||
4125
+ typeof originRecord.sourceFingerprint !== "string" ||
4126
+ typeof originRecord.runtimeFingerprint !== "string" ||
4127
+ typeof originRecord.linkedAt !== "string" ||
4128
+ originRecord.projectId.length === 0 ||
4129
+ originRecord.sourceRevisionId.length === 0 ||
4130
+ originRecord.sourceFingerprint.length === 0 ||
4131
+ originRecord.runtimeFingerprint.length === 0) {
4132
+ throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
4133
+ }
4134
+ if (JSON.stringify(keys) !== JSON.stringify(expectedKeys)) {
4600
4135
  throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
4601
4136
  }
4602
4137
  return {
4603
- baseUrl: normalizeBaseUrl(parsed.baseUrl),
4604
- owner: parsed.owner,
4605
- project: parsed.project,
4606
- projectId: parsed.projectId,
4607
- writable: parsed.writable,
4608
- ...(parsed.sourceRevisionId ? { sourceRevisionId: parsed.sourceRevisionId } : {}),
4609
- ...(parsed.sourceFingerprint ? { sourceFingerprint: parsed.sourceFingerprint } : {}),
4610
- ...(parsed.upstream ? { upstream: parsed.upstream } : {}),
4611
- linkedAt: parsed.linkedAt ?? new Date(0).toISOString(),
4138
+ baseUrl: normalizeBaseUrl(originRecord.baseUrl),
4139
+ remote: normalizeOriginRemote(originRecord.remote),
4140
+ projectId: originRecord.projectId,
4141
+ sourceRevisionId: originRecord.sourceRevisionId,
4142
+ sourceFingerprint: originRecord.sourceFingerprint,
4143
+ runtimeFingerprint: originRecord.runtimeFingerprint,
4144
+ linkedAt: originRecord.linkedAt,
4612
4145
  };
4613
4146
  }
4614
4147
  catch (error) {
@@ -4627,8 +4160,12 @@ async function requireWorkbenchOrigin(dir) {
4627
4160
  }
4628
4161
  async function writeWorkbenchOrigin(dir, input) {
4629
4162
  const origin = {
4630
- ...input,
4631
4163
  baseUrl: normalizeBaseUrl(input.baseUrl),
4164
+ remote: normalizeOriginRemote(input.remote),
4165
+ projectId: input.projectId,
4166
+ sourceRevisionId: input.sourceRevisionId,
4167
+ sourceFingerprint: input.sourceFingerprint,
4168
+ runtimeFingerprint: input.runtimeFingerprint,
4632
4169
  linkedAt: input.linkedAt ?? new Date().toISOString(),
4633
4170
  };
4634
4171
  const filePath = workbenchOriginPath(dir);
@@ -4636,6 +4173,49 @@ async function writeWorkbenchOrigin(dir, input) {
4636
4173
  await fs.writeFile(filePath, `${JSON.stringify(origin, null, 2)}\n`);
4637
4174
  return origin;
4638
4175
  }
4176
+ async function writeWorkbenchOriginFromState(dir, args) {
4177
+ const owner = args.state.project.ownerUsername;
4178
+ const name = args.state.project.name;
4179
+ const sourceRevisionId = args.state.source.revisionId ??
4180
+ args.state.base.sourceRevisionId;
4181
+ const sourceFingerprint = args.state.source.fingerprint ??
4182
+ args.state.base.sourceFingerprint;
4183
+ const runtimeFingerprint = args.state.base.runtimeFingerprint ??
4184
+ workbenchRuntimeBundleFingerprint(args.state.runtime);
4185
+ if (!sourceRevisionId || !sourceFingerprint || !runtimeFingerprint) {
4186
+ throw new UsageError("Hosted project state is missing required origin metadata.");
4187
+ }
4188
+ return await writeWorkbenchOrigin(dir, {
4189
+ baseUrl: args.baseUrl,
4190
+ remote: `${owner}/${name}`,
4191
+ projectId: args.state.project.id,
4192
+ sourceRevisionId,
4193
+ sourceFingerprint,
4194
+ runtimeFingerprint,
4195
+ });
4196
+ }
4197
+ function parseOriginRemote(origin) {
4198
+ return parseRemoteName(origin.remote);
4199
+ }
4200
+ function parseRemoteName(remote) {
4201
+ try {
4202
+ return parseBenchmarkRef(remote);
4203
+ }
4204
+ catch {
4205
+ throw new UsageError(`Workbench origin remote must use OWNER/BENCHMARK: ${remote}`);
4206
+ }
4207
+ }
4208
+ function normalizeOriginRemote(remote) {
4209
+ const parsed = parseRemoteName(remote.trim());
4210
+ return `${parsed.owner}/${parsed.project}`;
4211
+ }
4212
+ function originRemoteUrlParts(origin) {
4213
+ const remote = parseOriginRemote(origin);
4214
+ return {
4215
+ owner: remote.owner,
4216
+ projectName: remote.project,
4217
+ };
4218
+ }
4639
4219
  function workbenchOriginPath(dir) {
4640
4220
  return path.join(dir, ".workbench", "origin.json");
4641
4221
  }
@@ -4674,30 +4254,6 @@ async function readWorkbenchProfileStatus(config) {
4674
4254
  return { authenticated: true, profile: null };
4675
4255
  }
4676
4256
  }
4677
- function readOptionalCandidateId(parsed) {
4678
- return asOptionalString(parsed.flags.candidate) ?? parsed.positionals[0];
4679
- }
4680
- function readRequiredCandidateId(parsed) {
4681
- const candidateId = readOptionalCandidateId(parsed);
4682
- if (!candidateId) {
4683
- throw new UsageError("Missing required CANDIDATE_ID.");
4684
- }
4685
- return candidateId;
4686
- }
4687
- function readRequiredRunId(parsed) {
4688
- const runId = parsed.positionals[0];
4689
- if (!runId) {
4690
- throw new UsageError("Missing required RUN_ID.");
4691
- }
4692
- return runId;
4693
- }
4694
- function requireOutDir(parsed) {
4695
- const output = asOptionalString(parsed.flags.out);
4696
- if (!output) {
4697
- throw new UsageError("Missing required --out.");
4698
- }
4699
- return output;
4700
- }
4701
4257
  async function apiRequest(apiPath, options = {}, baseUrlOverride) {
4702
4258
  const config = await loadConfig();
4703
4259
  const baseUrl = normalizeBaseUrl(baseUrlOverride ??
@@ -5219,10 +4775,12 @@ function resolveSourceDir(parsed) {
5219
4775
  if (parsed.positionals.length > 1) {
5220
4776
  throw new UsageError("Expected at most one source file or directory argument.");
5221
4777
  }
5222
- if (parsed.positionals.length > 0 && parsed.flags.dir !== undefined) {
5223
- throw new UsageError("Use either --dir or SOURCE, not both.");
4778
+ const dir = asOptionalString(parsed.flags.dir);
4779
+ const source = parsed.positionals[0];
4780
+ if (dir && source) {
4781
+ return path.resolve(dir, source);
5224
4782
  }
5225
- return path.resolve(asOptionalString(parsed.flags.dir) ?? parsed.positionals[0] ?? process.cwd());
4783
+ return path.resolve(dir ?? source ?? process.cwd());
5226
4784
  }
5227
4785
  function isWorkbenchSourceYamlPath(filePath) {
5228
4786
  return path.basename(filePath) === WORKBENCH_BENCHMARK_FILE;
@@ -5368,6 +4926,14 @@ async function syncSourceFiles(outputDir, files) {
5368
4926
  }
5369
4927
  await writeFiles(outputDir, files);
5370
4928
  }
4929
+ async function assertLocalSourceMatchesOrigin(dir, origin) {
4930
+ const source = await readLocalProjectSource(dir);
4931
+ const fingerprint = localProjectStateSource(source).fingerprint;
4932
+ if (fingerprint === origin.sourceFingerprint) {
4933
+ return;
4934
+ }
4935
+ throw new UsageError("Local source changed since the last pull or push. Run `workbench push` before pulling, or restore the local source changes and try again.");
4936
+ }
5371
4937
  async function readManagedSourceFilePaths(outputDir) {
5372
4938
  try {
5373
4939
  const source = await readLocalProjectSource(outputDir);