@workbench-ai/workbench 0.0.64 → 0.0.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5,17 +5,18 @@ import { createRequire } from "node:module";
5
5
  import os from "node:os";
6
6
  import path from "node:path";
7
7
  import { Writable } from "node:stream";
8
- import { createCandidateFilePreview, createBaselineCandidateJob as createRuntimeBaselineCandidateJob, evaluationScorecardId, evaluationMeanMetrics, executeWorkbenchExecutionJob, engineResolveBindingForSpec, filterOptimizerTraceJobsForCaseIds, filterCandidateSourceFiles, formatWorkbenchCaseSelector, formatWorkbenchSelectionPolicy, workbenchCaseSelectorUsesAllCases, workbenchExecutionPurpose, workbenchRunExecutionFingerprint, createWorkbenchAdapterAuthBundle, createOptimizerTraceInputFiles, DOCKER_SANDBOX_BACKEND, localWorkbenchAdapterAuthStore, materializeWorkbenchRunResult, normalizeSurfaceFiles, planWorkbenchExecutionJobsForPurpose, runWorkbenchExecutionDag, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, runtimeResources, summarizeCandidateFiles, validateWorkbenchRunEnvelope, parseWorkbenchAdapterAuthTarget, workbenchEngineCaseIdsForImproveEvaluation, workbenchEngineCaseIdsForSelector, workbenchImproveOptimizeSelector, workbenchImproveSelectionPolicy, workbenchProjectSourceFingerprint, workbenchRuntimeBundleFingerprint, workbenchRuntimeExplicitActiveId, } from "@workbench-ai/workbench-core";
8
+ import { createBaselineCandidateJob as createRuntimeBaselineCandidateJob, evaluationScorecardId, evaluationMeanMetrics, executeWorkbenchExecutionJob, engineResolveBindingForSpec, filterOptimizerTraceJobsForCaseIds, filterCandidateSourceFiles, formatWorkbenchCaseSelector, formatWorkbenchSelectionPolicy, workbenchCaseSelectorUsesAllCases, workbenchExecutionPurpose, workbenchRunExecutionFingerprint, createWorkbenchAdapterAuthBundle, createOptimizerTraceInputFiles, DOCKER_SANDBOX_BACKEND, localWorkbenchAdapterAuthStore, materializeWorkbenchRunResult, normalizeSurfaceFiles, planWorkbenchExecutionJobsForPurpose, runWorkbenchExecutionDag, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, runtimeResources, validateWorkbenchRunEnvelope, parseWorkbenchAdapterAuthTarget, workbenchEngineCaseIdsForImproveEvaluation, workbenchEngineCaseIdsForSelector, workbenchImproveOptimizeSelector, workbenchImproveSelectionPolicy, workbenchProjectSourceFingerprint, workbenchRuntimeBundleFingerprint, workbenchRuntimeExplicitActiveId, } from "@workbench-ai/workbench-core";
9
9
  import { assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterAuthRequirements, normalizeWorkbenchAdapterOperationRequest, readWorkbenchAdapterOperationResult, workbenchAdapterOperationCommand, workbenchAdapterOperationResultPath, withDefaultWorkbenchAdapterAuthProfiles as applyDefaultWorkbenchAdapterAuthProfiles, } from "@workbench-ai/workbench-protocol";
10
10
  import { builtinLocalTraceAdapter, builtinLocalTraceAdapters, sortLocalTraceRefs, } from "@workbench-ai/workbench-built-in-adapters/local-traces";
11
- import { commandUsage, HOSTED_WATCH_LIFECYCLE_NOTE, LOCAL_DEV_OPEN_LIFECYCLE_NOTE, rootUsage, } from "./command-model.js";
11
+ import { commandUsage, REMOTE_WATCH_LIFECYCLE_NOTE, LOCAL_DEV_OPEN_LIFECYCLE_NOTE, rootUsage, } from "./command-model.js";
12
12
  import { startLocalWorkbenchDevServer } from "./dev-open-server.js";
13
+ import { createLocalWorkbenchInspection } from "./local-inspection.js";
13
14
  import { createWorkbenchInitScaffold, } from "./init-scaffold.js";
14
15
  import { defaultAdapterManifests, composeRuntimeDockerfileWithAdapters, resolveDefaultWorkbenchAdapter, resolveProjectAdapterSource, resolveWorkbenchAdaptersForProject, WORKBENCH_ADAPTER_MANIFEST_FILE, } from "./adapter-project.js";
15
16
  import { createAdapterCommandEnv } from "./adapter-command-env.js";
16
17
  import { loadLocalArchive, loadLocalArchiveIndex, exportLocalRuntimeBundle, importLocalRuntimeBundle, runtimeBundleStats, materializeCandidateRoot, readLocalCandidate, readLocalCandidateFiles, readLocalJobs, saveLocalArchive, saveLocalJobs, setLocalActive, upsertLocalRun, upsertLocalCandidate, upsertLocalEvaluation, } from "./local-archive.js";
17
18
  import { WorkspaceSnapshotError, } from "./workspace-snapshot.js";
18
- import { hostedEngineResolveFiles, readLocalProjectSource, WORKBENCH_BENCHMARK_FILE, } from "./project-source.js";
19
+ import { remoteEngineResolveFiles, readLocalProjectSource, WORKBENCH_BENCHMARK_FILE, } from "./project-source.js";
19
20
  import { localBenchmarkFingerprint, localCandidateFingerprint, projectStateBenchmarkFingerprint, } from "./benchmark-fingerprint.js";
20
21
  const require = createRequire(import.meta.url);
21
22
  function getCliVersion() {
@@ -82,31 +83,31 @@ export async function runCli(argv, io = {
82
83
  return await pushBenchmark(argv.slice(1), io);
83
84
  }
84
85
  if (argv[0] === "eval") {
85
- const hosted = extractHostedFlag(argv.slice(1));
86
- return hosted.enabled
87
- ? await startHostedWorkflow("eval", hosted.argv, io)
88
- : await localEvaluateCandidate(hosted.argv, io, runtimeOptions);
86
+ const remote = extractRemoteFlag(argv.slice(1));
87
+ return remote.enabled
88
+ ? await startRemoteWorkflow("eval", remote.argv, io)
89
+ : await localEvaluateCandidate(remote.argv, io, runtimeOptions);
89
90
  }
90
91
  if (argv[0] === "retry") {
91
- const hosted = extractHostedFlag(argv.slice(1));
92
- return hosted.enabled
93
- ? await retryHostedWorkflow(hosted.argv, io)
94
- : await localRetry(hosted.argv, io, runtimeOptions);
92
+ const remote = extractRemoteFlag(argv.slice(1));
93
+ return remote.enabled
94
+ ? await retryRemoteWorkflow(remote.argv, io)
95
+ : await localRetry(remote.argv, io, runtimeOptions);
95
96
  }
96
97
  if (argv[0] === "improve") {
97
- const hosted = extractHostedFlag(argv.slice(1));
98
- return hosted.enabled
99
- ? await startHostedWorkflow("improve", hosted.argv, io)
100
- : await localRun(hosted.argv, io, runtimeOptions);
98
+ const remote = extractRemoteFlag(argv.slice(1));
99
+ return remote.enabled
100
+ ? await startRemoteWorkflow("improve", remote.argv, io)
101
+ : await localRun(remote.argv, io, runtimeOptions);
101
102
  }
102
103
  if (argv[0] === "restore") {
103
104
  return await localRestore(argv.slice(1), io);
104
105
  }
105
106
  if (argv[0] === "open") {
106
- const hosted = extractHostedFlag(argv.slice(1));
107
- return hosted.enabled
108
- ? await openWorkbench(hosted.argv, io)
109
- : await localDevOpen(hosted.argv, io);
107
+ const remote = extractRemoteFlag(argv.slice(1));
108
+ return remote.enabled
109
+ ? await openWorkbench(remote.argv, io)
110
+ : await localDevOpen(remote.argv, io);
110
111
  }
111
112
  if (argv[0] === "auth") {
112
113
  return await runAuthCommand(argv.slice(1), io);
@@ -117,6 +118,9 @@ export async function runCli(argv, io = {
117
118
  if (argv[0] === "traces") {
118
119
  return await runTracesCommand(argv.slice(1), io);
119
120
  }
121
+ if (argv[0] === "diagnose") {
122
+ return await localDiagnose(argv.slice(1), io);
123
+ }
120
124
  const commandPath = argv.slice(0, 2).join(" ");
121
125
  const rest = argv.slice(2);
122
126
  switch (commandPath) {
@@ -124,6 +128,12 @@ export async function runCli(argv, io = {
124
128
  return await localRunList(rest, io);
125
129
  case "runs show":
126
130
  return await localRunShow(rest, io);
131
+ case "evaluations list":
132
+ return await localEvaluationList(rest, io);
133
+ case "evaluations show":
134
+ return await localEvaluationShow(rest, io);
135
+ case "executions trace":
136
+ return await localExecutionTrace(rest, io);
127
137
  case "candidates list":
128
138
  return await localCandidateList(rest, io);
129
139
  case "candidates show":
@@ -167,17 +177,25 @@ function commandPathForHelp(argv) {
167
177
  ["list", "show"].includes(positionals[1] ?? "")) {
168
178
  return positionals.slice(0, 2).join(" ");
169
179
  }
180
+ if (positionals[0] === "evaluations" &&
181
+ ["list", "show"].includes(positionals[1] ?? "")) {
182
+ return positionals.slice(0, 2).join(" ");
183
+ }
184
+ if (positionals[0] === "executions" &&
185
+ ["trace"].includes(positionals[1] ?? "")) {
186
+ return positionals.slice(0, 2).join(" ");
187
+ }
170
188
  if (positionals[0] === "candidates" &&
171
189
  ["list", "show", "files", "preview"].includes(positionals[1] ?? "")) {
172
190
  return positionals.slice(0, 2).join(" ");
173
191
  }
174
192
  return positionals[0] ?? "";
175
193
  }
176
- function extractHostedFlag(argv) {
194
+ function extractRemoteFlag(argv) {
177
195
  let enabled = false;
178
196
  const next = [];
179
197
  for (const arg of argv) {
180
- if (arg === "--hosted") {
198
+ if (arg === "--remote") {
181
199
  enabled = true;
182
200
  }
183
201
  else {
@@ -1498,6 +1516,9 @@ function latestCompletedAttemptJobsByPair(jobs, desiredKeys) {
1498
1516
  return byPair;
1499
1517
  }
1500
1518
  function caseSamplePairFromJob(job) {
1519
+ if (job.caseId && Number.isSafeInteger(job.sampleIndex) && job.sampleIndex >= 0) {
1520
+ return { caseId: job.caseId, sampleIndex: job.sampleIndex };
1521
+ }
1501
1522
  const input = readRecord(job.input);
1502
1523
  const execution = readRecord(input?.execution);
1503
1524
  const metadata = readRecord(execution?.metadata);
@@ -1593,7 +1614,7 @@ function resolveProjectPath(root, filePath) {
1593
1614
  }
1594
1615
  async function executeLocalDevelopmentJob(args) {
1595
1616
  return await executeWorkbenchExecutionJob(args, {
1596
- sandboxProvider: DOCKER_SANDBOX_BACKEND,
1617
+ sandboxBackend: DOCKER_SANDBOX_BACKEND,
1597
1618
  loadLocalAdapterAuthProfiles: true,
1598
1619
  });
1599
1620
  }
@@ -1604,7 +1625,7 @@ async function executeLocalDevelopmentDag(args) {
1604
1625
  const result = await runWorkbenchExecutionDag({
1605
1626
  jobs: args.jobs,
1606
1627
  capacity: args.capacity,
1607
- sandboxProvider: DOCKER_SANDBOX_BACKEND,
1628
+ sandboxBackend: DOCKER_SANDBOX_BACKEND,
1608
1629
  executeJob: async (job) => {
1609
1630
  return await executeLocalDevelopmentJob({
1610
1631
  job,
@@ -1802,11 +1823,16 @@ async function localRestore(argv, io) {
1802
1823
  writeOutput({ ok: true, activeCandidateId: candidateId, changedPaths }, parsed, io, () => `Restored ${candidateId} to ${candidateRoot}.`);
1803
1824
  return 0;
1804
1825
  }
1826
+ function localInspectionFromParsed(parsed) {
1827
+ return createLocalWorkbenchInspection({ workspace: resolveDir(parsed) });
1828
+ }
1805
1829
  async function localCandidateList(argv, io) {
1806
1830
  const parsed = parseArgs(argv);
1807
1831
  rejectUnknownFlags(parsed, new Set(["dir", "json"]));
1808
- const snapshot = await loadLocalArchive(resolveDir(parsed));
1809
- writeOutput(snapshot.candidates, parsed, io, (candidates) => candidates
1832
+ const inspection = localInspectionFromParsed(parsed);
1833
+ const snapshot = await inspection.snapshot();
1834
+ const candidates = await Promise.all(snapshot.summaries.map((candidate) => inspection.candidate({ id: candidate.id })));
1835
+ writeOutput(candidates, parsed, io, (candidates) => candidates
1810
1836
  .map((candidate) => `${candidate.id}\t${candidate.status}\tevaluation ${formatCandidateEvaluationScore(candidate)}${snapshot.activeId === candidate.id ? "\tactive" : ""}`)
1811
1837
  .join("\n") || "No candidates.");
1812
1838
  return 0;
@@ -1814,13 +1840,14 @@ async function localCandidateList(argv, io) {
1814
1840
  async function localCandidateShow(argv, io) {
1815
1841
  const parsed = parseArgs(argv);
1816
1842
  rejectUnknownFlags(parsed, new Set(["dir", "candidate", "json"]));
1817
- const snapshot = await loadLocalArchive(resolveDir(parsed));
1843
+ const inspection = localInspectionFromParsed(parsed);
1844
+ const snapshot = await inspection.snapshot();
1818
1845
  const candidateId = readCandidateIdFlag(parsed, snapshot);
1819
- const candidate = readLocalCandidate(snapshot, candidateId);
1846
+ const candidate = await inspection.candidate({ id: candidateId });
1820
1847
  writeOutput(candidate, parsed, io, (record) => [
1821
1848
  `${record.id}\t${record.status}`,
1822
1849
  `benchmark\t${record.benchmarkFingerprint}`,
1823
- `candidate\t${record.candidateFingerprint ?? record.candidateFingerprint}`,
1850
+ `candidate\t${record.candidateFingerprint}`,
1824
1851
  `evaluation\t${formatCandidateEvaluationSummary(record)}`,
1825
1852
  ...(record.baseId ? [`base\t${record.baseId}`] : []),
1826
1853
  ].join("\n"));
@@ -1829,10 +1856,10 @@ async function localCandidateShow(argv, io) {
1829
1856
  async function localCandidateFiles(argv, io) {
1830
1857
  const parsed = parseArgs(argv);
1831
1858
  rejectUnknownFlags(parsed, new Set(["dir", "candidate", "json"]));
1832
- const snapshot = await loadLocalArchive(resolveDir(parsed));
1859
+ const inspection = localInspectionFromParsed(parsed);
1860
+ const snapshot = await inspection.snapshot();
1833
1861
  const candidateId = readCandidateIdFlag(parsed, snapshot);
1834
- const candidate = readLocalCandidate(snapshot, candidateId);
1835
- const files = summarizeCandidateFiles(readLocalCandidateFiles(snapshot, candidateId), candidate.fileChanges);
1862
+ const files = await inspection.candidateFiles({ id: candidateId });
1836
1863
  writeOutput(files, parsed, io, (records) => records
1837
1864
  .map((file) => `${file.path}\t${file.status}\t${file.preview_kind}`)
1838
1865
  .join("\n") || "No files.");
@@ -1841,10 +1868,11 @@ async function localCandidateFiles(argv, io) {
1841
1868
  async function localCandidatePreview(argv, io) {
1842
1869
  const parsed = parseArgs(argv);
1843
1870
  rejectUnknownFlags(parsed, new Set(["dir", "candidate", "path", "output", "view", "json"]));
1844
- const snapshot = await loadLocalArchive(resolveDir(parsed));
1871
+ const inspection = localInspectionFromParsed(parsed);
1872
+ const snapshot = await inspection.snapshot();
1845
1873
  const candidateId = readCandidateIdFlag(parsed, snapshot);
1846
- const preview = createCandidateFilePreview({
1847
- files: readLocalCandidateFiles(snapshot, candidateId),
1874
+ const preview = await inspection.candidatePreview({
1875
+ id: candidateId,
1848
1876
  path: requireFlag(parsed, "path"),
1849
1877
  view: readPreviewMode(parsed),
1850
1878
  });
@@ -1865,7 +1893,7 @@ async function localCandidatePreview(argv, io) {
1865
1893
  async function localRunList(argv, io) {
1866
1894
  const parsed = parseArgs(argv);
1867
1895
  rejectUnknownFlags(parsed, new Set(["dir", "json"]));
1868
- const snapshot = await loadLocalArchive(resolveDir(parsed));
1896
+ const snapshot = await localInspectionFromParsed(parsed).snapshot();
1869
1897
  writeOutput(snapshot.runs, parsed, io, (runs) => runs
1870
1898
  .map((run) => `${run.id}\t${run.workflow}\t${run.status}\t${run.outcome ?? "pending"}\t${run.attemptsExecuted ?? 0}/${run.attemptsRequested ?? 0}`)
1871
1899
  .join("\n") || "No runs.");
@@ -1873,26 +1901,114 @@ async function localRunList(argv, io) {
1873
1901
  }
1874
1902
  async function localRunShow(argv, io) {
1875
1903
  const parsed = parseArgs(argv);
1876
- rejectUnknownFlags(parsed, new Set(["dir", "json"]));
1904
+ rejectUnknownFlags(parsed, new Set(["dir", "jobs", "failures", "json"]));
1877
1905
  const runId = parsed.positionals[0];
1878
1906
  if (!runId) {
1879
1907
  throw new UsageError("workbench runs show requires RUN_ID.");
1880
1908
  }
1881
- const snapshot = await loadLocalArchive(resolveDir(parsed));
1882
- const run = snapshot.runs.find((entry) => entry.id === runId);
1883
- if (!run) {
1884
- throw new UsageError(`Run not found: ${runId}`);
1885
- }
1886
- writeOutput(run, parsed, io, (record) => [
1887
- `${record.id}\t${record.workflow}\t${record.status}`,
1888
- `outcome\t${record.outcome ?? "pending"}`,
1889
- `started\t${record.startedAt}`,
1890
- ...(record.finishedAt ? [`finished\t${record.finishedAt}`] : []),
1891
- `attempts\t${record.attemptsExecuted ?? 0}/${record.attemptsRequested ?? 0}`,
1892
- `samples\t${record.samples ?? 0}`,
1909
+ const inspection = localInspectionFromParsed(parsed);
1910
+ const detail = await inspection.run({
1911
+ id: runId,
1912
+ includeJobs: parsed.flags.jobs === true || parsed.flags.failures === true,
1913
+ });
1914
+ const diagnosis = parsed.flags.failures === true
1915
+ ? await inspection.diagnose({ targetId: runId })
1916
+ : null;
1917
+ writeOutput(parsed.flags.failures === true
1918
+ ? { ...detail, diagnosis }
1919
+ : detail, parsed, io, (record) => {
1920
+ const run = record.run;
1921
+ const jobs = "jobs" in record && Array.isArray(record.jobs)
1922
+ ? record.jobs
1923
+ : [];
1924
+ const failures = "diagnosis" in record && record.diagnosis
1925
+ ? record.diagnosis.failures
1926
+ : [];
1927
+ return [
1928
+ `${run.id}\t${run.workflow}\t${run.status}`,
1929
+ `outcome\t${run.outcome ?? "pending"}`,
1930
+ `started\t${run.startedAt}`,
1931
+ ...(run.finishedAt ? [`finished\t${run.finishedAt}`] : []),
1932
+ `attempts\t${run.attemptsExecuted ?? 0}/${run.attemptsRequested ?? 0}`,
1933
+ `samples\t${run.samples ?? 0}`,
1934
+ ...(jobs.length > 0
1935
+ ? [
1936
+ "jobs",
1937
+ ...jobs.map((job) => `${job.id}\t${job.kind}\t${job.status}${job.error ? `\t${job.error}` : ""}`),
1938
+ ]
1939
+ : []),
1940
+ ...(failures.length > 0
1941
+ ? [
1942
+ "failures",
1943
+ ...failures.map(formatFailureLine),
1944
+ ]
1945
+ : []),
1946
+ ].join("\n");
1947
+ });
1948
+ return 0;
1949
+ }
1950
+ async function localEvaluationList(argv, io) {
1951
+ const parsed = parseArgs(argv);
1952
+ rejectUnknownFlags(parsed, new Set(["dir", "json"]));
1953
+ const comparison = await localInspectionFromParsed(parsed).evaluations();
1954
+ writeOutput(comparison, parsed, io, (record) => record.rows
1955
+ .map((row) => `${row.evaluationId}\t${row.status}\t${formatNullableMetric(row.score)}\t${row.candidateLabel}\t${row.configurationLabel}\t${row.runId}`)
1956
+ .join("\n") || "No evaluations.");
1957
+ return 0;
1958
+ }
1959
+ async function localEvaluationShow(argv, io) {
1960
+ const parsed = parseArgs(argv);
1961
+ rejectUnknownFlags(parsed, new Set(["dir", "json"]));
1962
+ const evaluationId = parsed.positionals[0];
1963
+ if (!evaluationId) {
1964
+ throw new UsageError("workbench evaluations show requires EVALUATION_ID.");
1965
+ }
1966
+ const evaluation = await localInspectionFromParsed(parsed).evaluation({ id: evaluationId });
1967
+ writeOutput(evaluation, parsed, io, (record) => [
1968
+ `${record.id}\t${record.status}`,
1969
+ `candidate\t${record.candidateName ?? record.candidateId}`,
1970
+ `run\t${record.runId}`,
1971
+ `samples\t${record.completedSampleCount}/${record.sampleCount}`,
1972
+ `errors\t${record.errorSampleCount}`,
1973
+ `score\t${formatNullableMetric(record.metrics?.score?.mean ?? null)}`,
1974
+ ...(record.error ? [`error\t${record.error}`] : []),
1975
+ ...(record.evaluation.cases?.length
1976
+ ? [
1977
+ "cases",
1978
+ ...record.evaluation.cases.map((entry) => `${entry.id}\t${entry.status ?? "unknown"}\t${formatNullableMetric(entry.metrics?.score?.mean ?? null)}`),
1979
+ ]
1980
+ : []),
1893
1981
  ].join("\n"));
1894
1982
  return 0;
1895
1983
  }
1984
+ async function localExecutionTrace(argv, io) {
1985
+ const parsed = parseArgs(argv);
1986
+ rejectUnknownFlags(parsed, new Set(["dir", "run", "job", "json"]));
1987
+ const runId = requireFlag(parsed, "run");
1988
+ const jobId = requireFlag(parsed, "job");
1989
+ const detail = await localInspectionFromParsed(parsed).executionTrace({ runId, jobId });
1990
+ writeOutput(detail, parsed, io, (record) => record.executions
1991
+ .map((execution) => [
1992
+ `${execution.id}\t${execution.kind}\t${execution.status}`,
1993
+ `jobs\t${execution.jobIds.join(",")}`,
1994
+ `sessions\t${execution.sessions.length}`,
1995
+ `spans\t${execution.trace.spans.length}`,
1996
+ `events\t${execution.trace.events.length}`,
1997
+ `summaries\t${execution.trace.summaries.length}`,
1998
+ ].join("\n"))
1999
+ .join("\n\n") || "No execution trace.");
2000
+ return 0;
2001
+ }
2002
+ async function localDiagnose(argv, io) {
2003
+ const parsed = parseArgs(argv);
2004
+ rejectUnknownFlags(parsed, new Set(["dir", "json"]));
2005
+ rejectUnexpectedPositionals(parsed, "workbench diagnose", 1);
2006
+ const diagnosis = await localInspectionFromParsed(parsed).diagnose({ targetId: parsed.positionals[0] ?? null });
2007
+ writeOutput(diagnosis, parsed, io, (record) => record.failures.length > 0
2008
+ ? record.failures.map(formatFailureLine).join("\n")
2009
+ : "No failures.");
2010
+ return 0;
2011
+ }
1896
2012
  async function runAuthCommand(argv, io) {
1897
2013
  const command = argv[0];
1898
2014
  const rest = argv.slice(1);
@@ -2446,7 +2562,11 @@ fs.writeFileSync(resultPath, JSON.stringify({
2446
2562
  async function login(argv, io) {
2447
2563
  const parsed = parseArgs(argv);
2448
2564
  rejectUnknownFlags(parsed, new Set(["base-url", "no-open", "json"]));
2449
- const baseUrl = asOptionalString(parsed.flags["base-url"]) ?? DEFAULT_BASE_URL;
2565
+ const config = await loadConfig();
2566
+ const baseUrl = selectWorkbenchBaseUrl({
2567
+ explicitBaseUrl: asOptionalString(parsed.flags["base-url"]),
2568
+ configBaseUrl: config.baseUrl,
2569
+ });
2450
2570
  const authorization = await requestDeviceAuthorization(baseUrl);
2451
2571
  if (parsed.flags.json === true) {
2452
2572
  writeJson({ ok: true, status: "authorization_pending", ...authorization }, io);
@@ -2472,7 +2592,7 @@ async function logout(argv, io) {
2472
2592
  const parsed = parseArgs(argv);
2473
2593
  rejectUnknownFlags(parsed, new Set(["json"]));
2474
2594
  const config = await loadConfig();
2475
- const baseUrl = normalizeBaseUrl(process.env.WORKBENCH_API_URL ?? config.baseUrl ?? DEFAULT_BASE_URL);
2595
+ const baseUrl = selectWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
2476
2596
  if (config.accessToken) {
2477
2597
  await fetch(`${baseUrl}/api/oauth/revoke`, {
2478
2598
  method: "POST",
@@ -2493,8 +2613,8 @@ async function authStatus(argv, io) {
2493
2613
  const baseUrl = await effectiveBaseUrl();
2494
2614
  const profileStatus = await readWorkbenchProfileStatus(config);
2495
2615
  const adapterStatuses = await localWorkbenchAdapterAuthStore().listStatus();
2496
- const hostedAuth = profileStatus.authenticated
2497
- ? await readHostedAdapterAuthStatuses().catch((error) => ({
2616
+ const remoteAuth = profileStatus.authenticated
2617
+ ? await readRemoteAdapterAuthStatuses().catch((error) => ({
2498
2618
  adapters: [],
2499
2619
  error: error instanceof Error ? error.message : String(error),
2500
2620
  }))
@@ -2503,7 +2623,7 @@ async function authStatus(argv, io) {
2503
2623
  error: "not_authenticated",
2504
2624
  };
2505
2625
  const dir = resolveDir(parsed);
2506
- const adapterAuth = await projectAdapterAuthStatus(dir, adapterStatuses, hostedAuth.adapters).catch(() => []);
2626
+ const adapterAuth = await projectAdapterAuthStatus(dir, adapterStatuses, remoteAuth.adapters).catch(() => []);
2507
2627
  const result = {
2508
2628
  ok: true,
2509
2629
  workbench: {
@@ -2512,7 +2632,7 @@ async function authStatus(argv, io) {
2512
2632
  username: profileStatus.profile?.username ?? null,
2513
2633
  },
2514
2634
  adapterStatuses,
2515
- hostedAuth,
2635
+ remoteAuth,
2516
2636
  adapterAuth,
2517
2637
  };
2518
2638
  writeOutput(result, parsed, io, (record) => {
@@ -2525,28 +2645,28 @@ async function authStatus(argv, io) {
2525
2645
  ? [
2526
2646
  "",
2527
2647
  "Required adapter auth:",
2528
- ...value.adapterAuth.map((adapter) => `${adapter.adapter}${adapter.profile !== "default" ? ` profile ${adapter.profile}` : ""}: local ${adapter.local.status}${adapter.local.method ? ` (${adapter.local.method})` : ""}${adapter.local.reason ? ` (${adapter.local.reason})` : ""}, hosted ${adapter.hosted.status}${adapter.hosted.method ? ` (${adapter.hosted.method})` : ""}${adapter.hosted.reason ? ` (${adapter.hosted.reason})` : ""}`),
2648
+ ...value.adapterAuth.map((adapter) => `${adapter.adapter}${adapter.profile !== "default" ? ` profile ${adapter.profile}` : ""}: local ${adapter.local.status}${adapter.local.method ? ` (${adapter.local.method})` : ""}${adapter.local.reason ? ` (${adapter.local.reason})` : ""}, remote ${adapter.remote.status}${adapter.remote.method ? ` (${adapter.remote.method})` : ""}${adapter.remote.reason ? ` (${adapter.remote.reason})` : ""}`),
2529
2649
  ]
2530
2650
  : []),
2531
2651
  ].join("\n");
2532
2652
  });
2533
2653
  return 0;
2534
2654
  }
2535
- async function projectAdapterAuthStatus(dir, adapterStatuses, hostedAdapters) {
2655
+ async function projectAdapterAuthStatus(dir, adapterStatuses, remoteAdapters) {
2536
2656
  const spec = (await readLocalProjectSource(dir)).spec;
2537
2657
  const adapters = await resolveWorkbenchAdaptersForProject(dir, spec);
2538
2658
  const adapterStatusMap = new Map(adapterStatuses.map((status) => [
2539
2659
  adapterAuthStatusKey(status.adapterId, status.slot, status.profile),
2540
2660
  status,
2541
2661
  ]));
2542
- const hostedAdapterStatusMap = new Map(hostedAdapters.map((status) => [
2662
+ const remoteAdapterStatusMap = new Map(remoteAdapters.map((status) => [
2543
2663
  adapterAuthStatusKey(status.adapterId, status.slot, status.profile),
2544
2664
  status,
2545
2665
  ]));
2546
2666
  const adapterById = new Map(adapters.map((adapter) => [adapter.manifest.id, adapter]));
2547
2667
  return requiredAuthTargetsForSpec(spec, adapterById).map((target) => {
2548
2668
  const adapterStatus = adapterStatusMap.get(adapterAuthStatusKey(target.adapter, target.slot, target.profile));
2549
- const hostedAdapterStatus = hostedAdapterStatusMap.get(adapterAuthStatusKey(target.adapter, target.slot, target.profile));
2669
+ const remoteAdapterStatus = remoteAdapterStatusMap.get(adapterAuthStatusKey(target.adapter, target.slot, target.profile));
2550
2670
  return {
2551
2671
  ...target,
2552
2672
  local: adapterStatus
@@ -2556,17 +2676,17 @@ async function projectAdapterAuthStatus(dir, adapterStatuses, hostedAdapters) {
2556
2676
  ...(adapterStatus.reason ? { reason: adapterStatus.reason } : {}),
2557
2677
  }
2558
2678
  : { status: "disconnected" },
2559
- hosted: hostedAdapterStatus
2679
+ remote: remoteAdapterStatus
2560
2680
  ? {
2561
- status: hostedAdapterStatus.status,
2562
- ...(hostedAdapterStatus.method ? { method: hostedAdapterStatus.method } : {}),
2563
- ...(hostedAdapterStatus.reason ? { reason: hostedAdapterStatus.reason } : {}),
2681
+ status: remoteAdapterStatus.status,
2682
+ ...(remoteAdapterStatus.method ? { method: remoteAdapterStatus.method } : {}),
2683
+ ...(remoteAdapterStatus.reason ? { reason: remoteAdapterStatus.reason } : {}),
2564
2684
  }
2565
2685
  : { status: "disconnected" },
2566
2686
  };
2567
2687
  });
2568
2688
  }
2569
- async function readHostedAdapterAuthStatuses() {
2689
+ async function readRemoteAdapterAuthStatuses() {
2570
2690
  const adapterResponse = await apiRequest("/api/workbench/auth/adapters");
2571
2691
  return {
2572
2692
  adapters: adapterResponse.adapters ?? [],
@@ -2941,7 +3061,7 @@ async function pushBenchmark(argv, io) {
2941
3061
  const dir = resolveSourceDir(parsed);
2942
3062
  const source = await readLocalProjectSource(dir);
2943
3063
  const origin = await readWorkbenchOrigin(dir);
2944
- const baseUrl = await effectiveBaseUrl(origin?.baseUrl);
3064
+ const baseUrl = await effectiveOriginBaseUrl(origin?.baseUrl);
2945
3065
  const visibility = readOptionalBenchmarkVisibility(parsed.flags.visibility);
2946
3066
  const createVisibility = visibility ?? "public";
2947
3067
  const dryRun = parsed.flags["dry-run"] === true;
@@ -2972,7 +3092,7 @@ async function pushBenchmark(argv, io) {
2972
3092
  }, parsed, io, () => `Would push benchmark ${source.spec.name}.`);
2973
3093
  return 0;
2974
3094
  }
2975
- const { project, origin: nextOrigin, result } = await createHostedBenchmarkFromState({
3095
+ const { project, origin: nextOrigin, result } = await createRemoteBenchmarkFromState({
2976
3096
  baseUrl,
2977
3097
  dir,
2978
3098
  state,
@@ -3001,7 +3121,7 @@ async function pushBenchmark(argv, io) {
3001
3121
  }
3002
3122
  const projectId = origin.projectId;
3003
3123
  if (!projectId) {
3004
- throw new UsageError("Missing hosted benchmark. Run workbench push from a source directory.");
3124
+ throw new UsageError("Missing remote benchmark. Run workbench push from a source directory.");
3005
3125
  }
3006
3126
  if (dryRun) {
3007
3127
  const remoteProject = await verifyLinkedPushDryRunTarget({
@@ -3017,7 +3137,7 @@ async function pushBenchmark(argv, io) {
3017
3137
  baseUrl,
3018
3138
  benchmarkId: projectId,
3019
3139
  remote: origin.remote,
3020
- benchmark: hostedProjectSummaryForOutput(remoteProject),
3140
+ benchmark: remoteProjectSummaryForOutput(remoteProject),
3021
3141
  benchmarkName: source.spec.name,
3022
3142
  visibility: visibility ?? "unchanged",
3023
3143
  sourceFileCount: sourceFileCount(source),
@@ -3031,7 +3151,7 @@ async function pushBenchmark(argv, io) {
3031
3151
  method: "PUT",
3032
3152
  body: state,
3033
3153
  }, baseUrl);
3034
- const responseProject = hostedProjectSummaryFromState(response.state);
3154
+ const responseProject = remoteProjectSummaryFromState(response.state);
3035
3155
  const publishedProject = await applyRequestedProjectVisibility({
3036
3156
  baseUrl,
3037
3157
  projectId: responseProject.id,
@@ -3079,7 +3199,7 @@ async function verifyLinkedPushDryRunTarget(args) {
3079
3199
  }
3080
3200
  return response.benchmark;
3081
3201
  }
3082
- function hostedProjectSummaryForOutput(project) {
3202
+ function remoteProjectSummaryForOutput(project) {
3083
3203
  return {
3084
3204
  ...(project.id ? { id: project.id } : {}),
3085
3205
  ...(project.ownerUsername ? { ownerUsername: project.ownerUsername } : {}),
@@ -3089,12 +3209,12 @@ function hostedProjectSummaryForOutput(project) {
3089
3209
  ...(typeof project.starCount === "number" ? { starCount: project.starCount } : {}),
3090
3210
  };
3091
3211
  }
3092
- async function createHostedBenchmarkFromState(args) {
3212
+ async function createRemoteBenchmarkFromState(args) {
3093
3213
  const result = await apiRequest("/api/workbench/benchmarks/state", {
3094
3214
  method: "POST",
3095
3215
  body: args.state,
3096
3216
  }, args.baseUrl);
3097
- const project = hostedProjectSummaryFromState(result.state);
3217
+ const project = remoteProjectSummaryFromState(result.state);
3098
3218
  const applied = await acceptPushedProjectStateToLocal({
3099
3219
  dir: args.dir,
3100
3220
  baseUrl: args.baseUrl,
@@ -3168,7 +3288,7 @@ async function pullProject(argv, io) {
3168
3288
  }
3169
3289
  const dir = resolveDir(parsed);
3170
3290
  const origin = await requireWorkbenchOrigin(dir);
3171
- const baseUrl = await effectiveBaseUrl(origin.baseUrl);
3291
+ const baseUrl = await effectiveOriginBaseUrl(origin.baseUrl);
3172
3292
  const remoteRef = parseOriginRemote(origin);
3173
3293
  const state = await apiRequest(publicProjectStateApiPath(remoteRef), {}, baseUrl);
3174
3294
  if (parsed.flags["dry-run"] === true) {
@@ -3228,7 +3348,7 @@ async function acceptPushedProjectStateToLocal(args) {
3228
3348
  });
3229
3349
  return { origin, runtime: runtime.stats };
3230
3350
  }
3231
- async function retryHostedWorkflow(argv, io) {
3351
+ async function retryRemoteWorkflow(argv, io) {
3232
3352
  const parsed = parseArgs(argv);
3233
3353
  rejectUnknownFlags(parsed, new Set([
3234
3354
  "dir",
@@ -3238,7 +3358,7 @@ async function retryHostedWorkflow(argv, io) {
3238
3358
  "timeout-ms",
3239
3359
  "json",
3240
3360
  ]));
3241
- rejectUnexpectedPositionals(parsed, "workbench retry --hosted", 1);
3361
+ rejectUnexpectedPositionals(parsed, "workbench retry --remote", 1);
3242
3362
  const targetId = parsed.positionals[0];
3243
3363
  if (!targetId) {
3244
3364
  throw new UsageError("Missing required TARGET_ID.");
@@ -3247,8 +3367,8 @@ async function retryHostedWorkflow(argv, io) {
3247
3367
  parsed.flags["timeout-ms"] !== undefined)) {
3248
3368
  throw new UsageError("--interval-ms and --timeout-ms require --watch.");
3249
3369
  }
3250
- const target = await resolveHostedTarget(parsed, { requireProjectIdentity: true });
3251
- const retryTarget = await resolveHostedRetryTarget(target, targetId);
3370
+ const target = await resolveRemoteTarget(parsed, { requireProjectIdentity: true });
3371
+ const retryTarget = await resolveRemoteRetryTarget(target, targetId);
3252
3372
  const watchIntervalMs = parsed.flags.watch === true
3253
3373
  ? parsePositiveInt(parsed.flags["interval-ms"], 1000, "interval-ms")
3254
3374
  : undefined;
@@ -3259,23 +3379,23 @@ async function retryHostedWorkflow(argv, io) {
3259
3379
  method: "POST",
3260
3380
  body: retryTarget.request,
3261
3381
  }, target.baseUrl);
3262
- const runTarget = hostedTargetForRunStartResponse(target, response);
3382
+ const runTarget = remoteTargetForRunStartResponse(target, response);
3263
3383
  const startedRun = withRunUrls(runTarget, response.run);
3264
3384
  if (parsed.flags.watch === true) {
3265
3385
  if (parsed.flags.json !== true) {
3266
- io.stdout.write(`${formatHostedRunStarted(startedRun, retryTarget.workflow).trimEnd()}\n${HOSTED_WATCH_LIFECYCLE_NOTE}\n`);
3386
+ io.stdout.write(`${formatRemoteRunStarted(startedRun, retryTarget.workflow).trimEnd()}\n${REMOTE_WATCH_LIFECYCLE_NOTE}\n`);
3267
3387
  }
3268
- const watched = await watchHostedRun({
3388
+ const watched = await watchRemoteRun({
3269
3389
  parsed,
3270
3390
  target: runTarget,
3271
3391
  runId: response.run.id,
3272
3392
  intervalMs: watchIntervalMs ?? 1000,
3273
3393
  timeoutMs: watchTimeoutMs,
3274
3394
  });
3275
- const outputRun = withRunUrls(runTarget, await withHostedRunFailureSummary(runTarget, watched));
3276
- await tryImportTerminalHostedProjectState({ target: runTarget, io });
3395
+ const outputRun = withRunUrls(runTarget, await withRemoteRunFailureSummary(runTarget, watched));
3396
+ await tryImportTerminalRemoteProjectState({ target: runTarget, io });
3277
3397
  const result = {
3278
- ok: hostedRunSucceeded(watched),
3398
+ ok: remoteRunSucceeded(watched),
3279
3399
  retried: {
3280
3400
  id: retryTarget.sourceId,
3281
3401
  kind: retryTarget.sourceKind,
@@ -3290,7 +3410,7 @@ async function retryHostedWorkflow(argv, io) {
3290
3410
  ...(outputRun.error ? { error: outputRun.error } : {}),
3291
3411
  };
3292
3412
  writeOutput(result, parsed, io, formatRetryCommandResult);
3293
- return hostedRunSucceeded(watched) ? 0 : 1;
3413
+ return remoteRunSucceeded(watched) ? 0 : 1;
3294
3414
  }
3295
3415
  const result = {
3296
3416
  ok: true,
@@ -3308,20 +3428,20 @@ async function retryHostedWorkflow(argv, io) {
3308
3428
  writeOutput(result, parsed, io, formatRetryCommandResult);
3309
3429
  return 0;
3310
3430
  }
3311
- async function resolveHostedRetryTarget(target, targetId) {
3431
+ async function resolveRemoteRetryTarget(target, targetId) {
3312
3432
  if (targetId.startsWith("eval_")) {
3313
- return await resolveHostedEvaluationRetryTarget(target, targetId);
3433
+ return await resolveRemoteEvaluationRetryTarget(target, targetId);
3314
3434
  }
3315
- const detail = await readHostedRunDetail(target, targetId);
3435
+ const detail = await readRemoteRunDetail(target, targetId);
3316
3436
  const run = detail.run;
3317
3437
  if (run.status !== "finished") {
3318
3438
  throw new UsageError(`Run ${run.id} is ${run.status}; wait for it to finish before retrying.`);
3319
3439
  }
3320
- if (!hostedRunRecordFailed(run)) {
3321
- throw new UsageError(`Run ${run.id} did not fail; use workbench ${run.workflow ?? "eval"} --hosted to intentionally run it again.`);
3440
+ if (!remoteRunRecordFailed(run)) {
3441
+ throw new UsageError(`Run ${run.id} did not fail; use workbench ${run.workflow ?? "eval"} --remote to intentionally run it again.`);
3322
3442
  }
3323
3443
  if (run.workflow === "eval") {
3324
- const candidateId = hostedRunEvaluationCandidateId(run, detail.jobs);
3444
+ const candidateId = remoteRunEvaluationCandidateId(run, detail.jobs);
3325
3445
  if (!candidateId) {
3326
3446
  throw new UsageError(`Run ${run.id} has no candidate id to retry.`);
3327
3447
  }
@@ -3330,17 +3450,18 @@ async function resolveHostedRetryTarget(target, targetId) {
3330
3450
  sourceKind: "run",
3331
3451
  workflow: "eval",
3332
3452
  request: {
3453
+ schema: "workbench.remote.run.request.v1",
3333
3454
  workflow: "eval",
3334
3455
  samples: run.samples ?? 1,
3335
3456
  candidateId,
3336
- sourceYaml: hostedRetrySourceYaml(run, run.id),
3457
+ sourceYaml: remoteRetrySourceYaml(run, run.id),
3337
3458
  preserveActive: true,
3338
3459
  ...retrySampleSelectionFromJobs(detail.jobs),
3339
3460
  },
3340
3461
  };
3341
3462
  }
3342
3463
  if (run.workflow === "improve") {
3343
- const baseCandidateId = stringValue(readRecord(run.input)?.baseCandidateId);
3464
+ const baseCandidateId = stringValue(readRecord(run.retry)?.baseCandidateId);
3344
3465
  if (!baseCandidateId) {
3345
3466
  throw new UsageError(`Run ${run.id} is missing its base candidate id.`);
3346
3467
  }
@@ -3349,41 +3470,43 @@ async function resolveHostedRetryTarget(target, targetId) {
3349
3470
  sourceKind: "run",
3350
3471
  workflow: "improve",
3351
3472
  request: {
3473
+ schema: "workbench.remote.run.request.v1",
3352
3474
  workflow: "improve",
3353
3475
  samples: run.samples ?? 1,
3354
3476
  budget: run.budget ?? run.attemptsRequested ?? 1,
3355
3477
  candidateId: baseCandidateId,
3356
- sourceYaml: hostedRetrySourceYaml(run, run.id),
3478
+ sourceYaml: remoteRetrySourceYaml(run, run.id),
3357
3479
  preserveActive: true,
3358
3480
  },
3359
3481
  };
3360
3482
  }
3361
3483
  throw new UsageError(`Run ${run.id} has no retryable workflow.`);
3362
3484
  }
3363
- async function resolveHostedEvaluationRetryTarget(target, evaluationId) {
3485
+ async function resolveRemoteEvaluationRetryTarget(target, evaluationId) {
3364
3486
  const snapshot = await apiRequest(projectApiPath(target.projectId, "/workbench/snapshot"), {}, target.baseUrl);
3365
3487
  const evaluation = snapshot.evaluations.find((entry) => entry.id === evaluationId);
3366
3488
  if (!evaluation) {
3367
- throw new UsageError(`Hosted evaluation not found: ${evaluationId}`);
3489
+ throw new UsageError(`Remote evaluation not found: ${evaluationId}`);
3368
3490
  }
3369
3491
  const run = snapshot.runs.find((entry) => entry.id === evaluation.runId) ?? null;
3370
3492
  if (!evaluationScorecardFailed(evaluation, run)) {
3371
- throw new UsageError(`Evaluation ${evaluation.id} did not fail; use workbench eval --hosted to intentionally run it again.`);
3493
+ throw new UsageError(`Evaluation ${evaluation.id} did not fail; use workbench eval --remote to intentionally run it again.`);
3372
3494
  }
3373
3495
  if (!run) {
3374
3496
  throw new UsageError(`Evaluation ${evaluation.id} is missing its run record.`);
3375
3497
  }
3376
- const detail = await readHostedRunDetail(target, run.id);
3498
+ const detail = await readRemoteRunDetail(target, run.id);
3377
3499
  const detailedRun = detail.run;
3378
3500
  return {
3379
3501
  sourceId: evaluationId,
3380
3502
  sourceKind: "evaluation",
3381
3503
  workflow: "eval",
3382
3504
  request: {
3505
+ schema: "workbench.remote.run.request.v1",
3383
3506
  workflow: "eval",
3384
3507
  samples: evaluation.sampleCount || detailedRun.samples || 1,
3385
3508
  candidateId: evaluation.candidateId,
3386
- sourceYaml: hostedRetrySourceYaml(detailedRun, detailedRun.id),
3509
+ sourceYaml: remoteRetrySourceYaml(detailedRun, detailedRun.id),
3387
3510
  preserveActive: true,
3388
3511
  ...retrySampleSelectionFromJobs(detail.jobs),
3389
3512
  },
@@ -3392,7 +3515,7 @@ async function resolveHostedEvaluationRetryTarget(target, evaluationId) {
3392
3515
  function retrySampleSelectionFromJobs(jobs) {
3393
3516
  const selectedSamples = uniqueCaseSamplePairs(jobs
3394
3517
  .filter((job) => job.status !== "succeeded" &&
3395
- executionPurposeFromJobInput(job.input) === "attempt")
3518
+ readRunJobPurpose(job) === "attempt")
3396
3519
  .map(caseSamplePairFromJob)
3397
3520
  .filter((pair) => pair !== null));
3398
3521
  return selectedSamples.length > 0
@@ -3407,10 +3530,10 @@ function uniqueCaseSamplePairs(pairs) {
3407
3530
  return [...byKey.values()].sort((left, right) => left.caseId.localeCompare(right.caseId) ||
3408
3531
  left.sampleIndex - right.sampleIndex);
3409
3532
  }
3410
- async function readHostedRunDetail(target, runId) {
3533
+ async function readRemoteRunDetail(target, runId) {
3411
3534
  return await apiRequest(projectApiPath(target.projectId, `/runs/${encodeURIComponent(runId)}`), {}, target.baseUrl);
3412
3535
  }
3413
- async function tryImportTerminalHostedProjectState(args) {
3536
+ async function tryImportTerminalRemoteProjectState(args) {
3414
3537
  const origin = args.target.origin;
3415
3538
  if (!origin || origin.projectId !== args.target.projectId) {
3416
3539
  return;
@@ -3426,23 +3549,23 @@ async function tryImportTerminalHostedProjectState(args) {
3426
3549
  });
3427
3550
  }
3428
3551
  catch (error) {
3429
- args.io.stderr.write(`Hosted run finished, but local project state was not updated: ${errorMessage(error)}\n`);
3552
+ args.io.stderr.write(`Remote run finished, but local project state was not updated: ${errorMessage(error)}\n`);
3430
3553
  }
3431
3554
  }
3432
- function hostedRetrySourceYaml(run, runId) {
3433
- const sourceYaml = stringValue(readRecord(run.input)?.sourceYaml);
3555
+ function remoteRetrySourceYaml(run, runId) {
3556
+ const sourceYaml = stringValue(readRecord(run.retry)?.sourceYaml);
3434
3557
  if (!sourceYaml) {
3435
3558
  throw new UsageError(`Run ${runId} is missing its recorded source configuration.`);
3436
3559
  }
3437
3560
  return sourceYaml;
3438
3561
  }
3439
- function hostedRunRecordFailed(run) {
3562
+ function remoteRunRecordFailed(run) {
3440
3563
  return run.outcome === "error" ||
3441
3564
  run.outcome === "cancelled" ||
3442
3565
  (run.failedJobCount ?? 0) > 0 ||
3443
3566
  Boolean(run.error);
3444
3567
  }
3445
- async function startHostedWorkflow(workflow, argv, io) {
3568
+ async function startRemoteWorkflow(workflow, argv, io) {
3446
3569
  const parsed = parseArgs(argv);
3447
3570
  const allowedFlags = new Set([
3448
3571
  "dir",
@@ -3465,7 +3588,7 @@ async function startHostedWorkflow(workflow, argv, io) {
3465
3588
  }
3466
3589
  rejectUnknownFlags(parsed, allowedFlags);
3467
3590
  if (parsed.positionals.length > 1) {
3468
- throw new UsageError(`workbench ${workflow} --hosted accepts at most one source file or directory argument.`);
3591
+ throw new UsageError(`workbench ${workflow} --remote accepts at most one source file or directory argument.`);
3469
3592
  }
3470
3593
  const sourceArg = resolveSourceDir(parsed);
3471
3594
  const samples = parsePositiveInt(parsed.flags.samples, 1, "samples");
@@ -3480,13 +3603,13 @@ async function startHostedWorkflow(workflow, argv, io) {
3480
3603
  const defaultProjectSource = await readLocalProjectSource(path.resolve(sourceArg));
3481
3604
  const selectedRunIds = workflow === "eval"
3482
3605
  ? resolveCandidateRunSelection(defaultProjectSource, runsFlag)
3483
- : [singleRequestedRunId(runsFlag, `workbench ${workflow} --hosted`) ?? defaultProjectSource.candidateRunId];
3606
+ : [singleRequestedRunId(runsFlag, `workbench ${workflow} --remote`) ?? defaultProjectSource.candidateRunId];
3484
3607
  if (workflow === "eval" && selectedRunIds.length > 1) {
3485
3608
  let failed = 0;
3486
3609
  const results = [];
3487
3610
  for (const runId of selectedRunIds) {
3488
3611
  const captured = createCapturingIo(io);
3489
- const code = await startHostedWorkflow(workflow, hostedWorkflowArgsForRun({
3612
+ const code = await startRemoteWorkflow(workflow, remoteWorkflowArgsForRun({
3490
3613
  parsed,
3491
3614
  sourceDir: defaultProjectSource.dir,
3492
3615
  runId,
@@ -3501,7 +3624,7 @@ async function startHostedWorkflow(workflow, argv, io) {
3501
3624
  candidateRunIds: selectedRunIds,
3502
3625
  failedRunCount: failed,
3503
3626
  results,
3504
- }, parsed, io, () => `Processed ${selectedRunIds.length} hosted candidate run(s); ${failed} failed.`);
3627
+ }, parsed, io, () => `Processed ${selectedRunIds.length} remote candidate run(s); ${failed} failed.`);
3505
3628
  return failed === 0 ? 0 : 1;
3506
3629
  }
3507
3630
  const selectedCandidateId = workflow === "eval"
@@ -3509,12 +3632,14 @@ async function startHostedWorkflow(workflow, argv, io) {
3509
3632
  : asOptionalString(parsed.flags.base);
3510
3633
  const request = workflow === "improve"
3511
3634
  ? {
3635
+ schema: "workbench.remote.run.request.v1",
3512
3636
  workflow,
3513
3637
  budget,
3514
3638
  samples,
3515
3639
  ...(selectedCandidateId ? { candidateId: selectedCandidateId } : {}),
3516
3640
  }
3517
3641
  : {
3642
+ schema: "workbench.remote.run.request.v1",
3518
3643
  workflow,
3519
3644
  samples,
3520
3645
  ...(selectedCandidateId ? { candidateId: selectedCandidateId } : {}),
@@ -3538,7 +3663,7 @@ async function startHostedWorkflow(workflow, argv, io) {
3538
3663
  : undefined;
3539
3664
  const dryRun = parsed.flags["dry-run"] === true;
3540
3665
  if (dryRun) {
3541
- const target = await resolveHostedDryRunTarget(parsed, { sourceDir: projectSource.dir });
3666
+ const target = await resolveRemoteDryRunTarget(parsed, { sourceDir: projectSource.dir });
3542
3667
  writeOutput({
3543
3668
  ok: true,
3544
3669
  dryRun: true,
@@ -3547,20 +3672,21 @@ async function startHostedWorkflow(workflow, argv, io) {
3547
3672
  dir: target.dir,
3548
3673
  baseUrl: target.baseUrl,
3549
3674
  request,
3550
- }, parsed, io, () => `Would start hosted ${workflow} for ${target.projectRef}.`);
3675
+ }, parsed, io, () => `Would start remote ${workflow} for ${target.projectRef}.`);
3551
3676
  return 0;
3552
3677
  }
3553
- const target = await resolveHostedTarget(parsed, {
3678
+ const target = await resolveRemoteTarget(parsed, {
3554
3679
  requireProjectIdentity: true,
3555
3680
  sourceDir: projectSource.dir,
3556
3681
  });
3557
3682
  if (workflow === "improve") {
3558
- request.candidateId = await ensureHostedImproveBaseCandidate({
3683
+ request.candidateId = await ensureRemoteImproveBaseCandidate({
3559
3684
  parsed,
3560
3685
  target,
3561
3686
  samples: request.samples,
3562
3687
  candidateId: selectedCandidateId,
3563
3688
  sourceYaml: projectSource.specSource,
3689
+ candidateFiles: projectSource.candidateFiles,
3564
3690
  adapterFiles: projectSource.adapterFiles,
3565
3691
  intervalMs: watchIntervalMs ?? 1000,
3566
3692
  timeoutMs: watchTimeoutMs,
@@ -3571,53 +3697,53 @@ async function startHostedWorkflow(workflow, argv, io) {
3571
3697
  method: "POST",
3572
3698
  body: request,
3573
3699
  }, target.baseUrl);
3574
- const runTarget = hostedTargetForRunStartResponse(target, response);
3700
+ const runTarget = remoteTargetForRunStartResponse(target, response);
3575
3701
  const startedRun = withRunUrls(runTarget, response.run);
3576
3702
  const startedRunOutput = response.reused === true
3577
3703
  ? { ...startedRun, reused: true }
3578
3704
  : startedRun;
3579
3705
  if (response.reused === true && response.run.status === "finished") {
3580
- await tryImportTerminalHostedProjectState({ target: runTarget, io });
3706
+ await tryImportTerminalRemoteProjectState({ target: runTarget, io });
3581
3707
  writeOutput({
3582
- ok: hostedRunSucceeded(response.run),
3708
+ ok: remoteRunSucceeded(response.run),
3583
3709
  reused: true,
3584
3710
  workflow,
3585
3711
  runId: startedRun.id,
3586
3712
  ...startedRun,
3587
- }, parsed, io, () => `Reused hosted ${workflow} ${startedRun.id}. Use --rerun to intentionally run it again.`);
3588
- return hostedRunSucceeded(response.run) ? 0 : 1;
3713
+ }, parsed, io, () => `Reused remote ${workflow} ${startedRun.id}. Use --rerun to intentionally run it again.`);
3714
+ return remoteRunSucceeded(response.run) ? 0 : 1;
3589
3715
  }
3590
3716
  if (parsed.flags.watch === true) {
3591
3717
  if (parsed.flags.json !== true) {
3592
- io.stdout.write(`${formatHostedRunStarted(startedRun, workflow).trimEnd()}\n${HOSTED_WATCH_LIFECYCLE_NOTE}\n`);
3718
+ io.stdout.write(`${formatRemoteRunStarted(startedRun, workflow).trimEnd()}\n${REMOTE_WATCH_LIFECYCLE_NOTE}\n`);
3593
3719
  }
3594
- const watched = await watchHostedRun({
3720
+ const watched = await watchRemoteRun({
3595
3721
  parsed,
3596
3722
  target: runTarget,
3597
3723
  runId: response.run.id,
3598
3724
  intervalMs: watchIntervalMs ?? 1000,
3599
3725
  timeoutMs: watchTimeoutMs,
3600
3726
  });
3601
- const outputRun = await withHostedRunFailureSummary(runTarget, watched);
3602
- await tryImportTerminalHostedProjectState({ target: runTarget, io });
3603
- writeOutput(withRunUrls(runTarget, outputRun), parsed, io, formatHostedRunResult);
3604
- return hostedRunSucceeded(watched) ? 0 : 1;
3727
+ const outputRun = await withRemoteRunFailureSummary(runTarget, watched);
3728
+ await tryImportTerminalRemoteProjectState({ target: runTarget, io });
3729
+ writeOutput(withRunUrls(runTarget, outputRun), parsed, io, formatRemoteRunResult);
3730
+ return remoteRunSucceeded(watched) ? 0 : 1;
3605
3731
  }
3606
- writeOutput(startedRunOutput, parsed, io, (run) => formatHostedRunStarted(run, workflow).trimEnd());
3732
+ writeOutput(startedRunOutput, parsed, io, (run) => formatRemoteRunStarted(run, workflow).trimEnd());
3607
3733
  return 0;
3608
3734
  }
3609
- async function ensureHostedImproveBaseCandidate(args) {
3735
+ async function ensureRemoteImproveBaseCandidate(args) {
3610
3736
  if (args.candidateId) {
3611
- const candidate = await readHostedCandidateSummary(args.target, args.candidateId);
3737
+ const candidate = await readRemoteCandidateSummary(args.target, args.candidateId);
3612
3738
  if (!candidate) {
3613
3739
  throw new UsageError(`Base candidate ${args.candidateId} was not found for the current benchmark.`);
3614
3740
  }
3615
- if (hostedCandidateIsEvaluated(candidate)) {
3741
+ if (remoteCandidateIsEvaluated(candidate)) {
3616
3742
  return args.candidateId;
3617
3743
  }
3618
3744
  }
3619
3745
  else {
3620
- const activeCandidate = await readEvaluatedActiveHostedCandidate(args.target);
3746
+ const activeCandidate = await readEvaluatedActiveRemoteCandidate(args.target);
3621
3747
  if (activeCandidate) {
3622
3748
  return activeCandidate.id;
3623
3749
  }
@@ -3625,31 +3751,33 @@ async function ensureHostedImproveBaseCandidate(args) {
3625
3751
  const response = await apiRequest(projectApiPath(args.target.projectId, "/runs"), {
3626
3752
  method: "POST",
3627
3753
  body: {
3754
+ schema: "workbench.remote.run.request.v1",
3628
3755
  workflow: "eval",
3629
3756
  samples: args.samples,
3630
3757
  ...(args.candidateId ? { candidateId: args.candidateId } : {}),
3631
3758
  sourceYaml: args.sourceYaml,
3759
+ ...(args.candidateId ? {} : { candidateFiles: args.candidateFiles }),
3632
3760
  ...(args.adapterFiles.length > 0 ? { adapterFiles: args.adapterFiles } : {}),
3633
3761
  },
3634
3762
  }, args.target.baseUrl);
3635
- const runTarget = hostedTargetForRunStartResponse(args.target, response);
3636
- const watched = await watchHostedRun({
3763
+ const runTarget = remoteTargetForRunStartResponse(args.target, response);
3764
+ const watched = await watchRemoteRun({
3637
3765
  parsed: args.parsed,
3638
3766
  target: runTarget,
3639
3767
  runId: response.run.id,
3640
3768
  intervalMs: args.intervalMs,
3641
3769
  timeoutMs: args.timeoutMs,
3642
3770
  });
3643
- if (!hostedRunSucceeded(watched)) {
3771
+ if (!remoteRunSucceeded(watched)) {
3644
3772
  throw new UsageError(`Parent candidate eval ${watched.id} failed; improve was not started.`);
3645
3773
  }
3646
3774
  if (!watched.candidateId) {
3647
3775
  throw new UsageError(`Parent candidate eval ${watched.id} did not produce a candidate.`);
3648
3776
  }
3649
- await tryImportTerminalHostedProjectState({ target: runTarget, io: args.io });
3777
+ await tryImportTerminalRemoteProjectState({ target: runTarget, io: args.io });
3650
3778
  return watched.candidateId;
3651
3779
  }
3652
- function hostedWorkflowArgsForRun(args) {
3780
+ function remoteWorkflowArgsForRun(args) {
3653
3781
  const next = ["--dir", args.sourceDir, "--runs", args.runId, "--json"];
3654
3782
  appendStringFlag(next, "benchmark", asOptionalString(args.parsed.flags.benchmark));
3655
3783
  appendStringFlag(next, "candidate", asOptionalString(args.parsed.flags.candidate));
@@ -3672,27 +3800,27 @@ function appendStringFlag(args, name, value) {
3672
3800
  args.push(`--${name}`, value);
3673
3801
  }
3674
3802
  }
3675
- async function readHostedCandidateSummary(target, candidateId) {
3803
+ async function readRemoteCandidateSummary(target, candidateId) {
3676
3804
  const response = await apiRequest(projectApiPath(target.projectId, "/candidates"), {}, target.baseUrl);
3677
3805
  return response.candidates.find((entry) => entry.id === candidateId) ?? null;
3678
3806
  }
3679
- async function readEvaluatedActiveHostedCandidate(target) {
3807
+ async function readEvaluatedActiveRemoteCandidate(target) {
3680
3808
  const response = await apiRequest(projectApiPath(target.projectId), {}, target.baseUrl);
3681
3809
  const activeCandidateId = response.benchmark.activeCandidateId;
3682
3810
  if (!activeCandidateId) {
3683
3811
  return null;
3684
3812
  }
3685
- const candidate = await readHostedCandidateSummary(target, activeCandidateId);
3686
- return candidate && hostedCandidateIsEvaluated(candidate) ? candidate : null;
3813
+ const candidate = await readRemoteCandidateSummary(target, activeCandidateId);
3814
+ return candidate && remoteCandidateIsEvaluated(candidate) ? candidate : null;
3687
3815
  }
3688
- function hostedCandidateIsEvaluated(candidate) {
3816
+ function remoteCandidateIsEvaluated(candidate) {
3689
3817
  return candidate.status === "evaluated" || candidate.eval != null;
3690
3818
  }
3691
3819
  async function openWorkbench(argv, io) {
3692
3820
  const parsed = parseArgs(argv);
3693
3821
  rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "no-open", "json"]));
3694
3822
  if (parsed.positionals.length > 1) {
3695
- throw new UsageError(`Unexpected argument for workbench open --hosted: ${parsed.positionals.slice(1).join(" ")}`);
3823
+ throw new UsageError(`Unexpected argument for workbench open --remote: ${parsed.positionals.slice(1).join(" ")}`);
3696
3824
  }
3697
3825
  const target = await resolveOpenTarget(parsed);
3698
3826
  const ref = target.openRef;
@@ -3722,7 +3850,7 @@ function buildWorkbenchWebUrl(target, ref) {
3722
3850
  }
3723
3851
  return buildWorkbenchResourceUrls(target, { candidateId: ref }).candidateEvaluation;
3724
3852
  }
3725
- async function resolveHostedTarget(parsed, options = {}) {
3853
+ async function resolveRemoteTarget(parsed, options = {}) {
3726
3854
  if (options.sourceArg !== undefined && parsed.flags.dir !== undefined) {
3727
3855
  throw new UsageError("Use either --dir or SOURCE, not both.");
3728
3856
  }
@@ -3731,7 +3859,7 @@ async function resolveHostedTarget(parsed, options = {}) {
3731
3859
  : resolveDir(parsed, options.sourceArg);
3732
3860
  const origin = await readWorkbenchOrigin(dir);
3733
3861
  const explicitProject = asOptionalString(parsed.flags.benchmark);
3734
- const baseUrl = await effectiveBaseUrl(origin?.baseUrl);
3862
+ const baseUrl = await effectiveOriginBaseUrl(origin?.baseUrl);
3735
3863
  if (explicitProject && (!isRemoteProjectId(explicitProject) || options.requireProjectIdentity === true)) {
3736
3864
  const project = await resolveRemoteProject(explicitProject, baseUrl);
3737
3865
  return {
@@ -3745,7 +3873,7 @@ async function resolveHostedTarget(parsed, options = {}) {
3745
3873
  }
3746
3874
  const projectId = explicitProject ?? origin?.projectId;
3747
3875
  if (!projectId) {
3748
- throw new UsageError("Missing hosted benchmark. Run workbench push, workbench clone, or pass --benchmark OWNER/BENCHMARK.");
3876
+ throw new UsageError("Missing remote benchmark. Run workbench push, workbench clone, or pass --benchmark OWNER/BENCHMARK.");
3749
3877
  }
3750
3878
  const originRemote = origin ? parseOriginRemote(origin) : null;
3751
3879
  return {
@@ -3759,7 +3887,7 @@ async function resolveHostedTarget(parsed, options = {}) {
3759
3887
  origin,
3760
3888
  };
3761
3889
  }
3762
- async function resolveHostedDryRunTarget(parsed, options = {}) {
3890
+ async function resolveRemoteDryRunTarget(parsed, options = {}) {
3763
3891
  if (options.sourceArg !== undefined && parsed.flags.dir !== undefined) {
3764
3892
  throw new UsageError("Use either --dir or SOURCE, not both.");
3765
3893
  }
@@ -3768,7 +3896,7 @@ async function resolveHostedDryRunTarget(parsed, options = {}) {
3768
3896
  : resolveDir(parsed, options.sourceArg);
3769
3897
  const origin = await readWorkbenchOrigin(dir);
3770
3898
  const explicitProject = asOptionalString(parsed.flags.benchmark);
3771
- const baseUrl = await effectiveBaseUrl(origin?.baseUrl);
3899
+ const baseUrl = await effectiveOriginBaseUrl(origin?.baseUrl);
3772
3900
  if (explicitProject) {
3773
3901
  if (isRemoteProjectId(explicitProject)) {
3774
3902
  return {
@@ -3801,7 +3929,7 @@ async function resolveHostedDryRunTarget(parsed, options = {}) {
3801
3929
  origin,
3802
3930
  };
3803
3931
  }
3804
- throw new UsageError("Missing hosted benchmark. Run workbench push, workbench clone, or pass --benchmark OWNER/BENCHMARK.");
3932
+ throw new UsageError("Missing remote benchmark. Run workbench push, workbench clone, or pass --benchmark OWNER/BENCHMARK.");
3805
3933
  }
3806
3934
  async function resolveOpenTarget(parsed) {
3807
3935
  const ref = parsed.positionals[0];
@@ -3830,7 +3958,7 @@ async function resolveOpenTarget(parsed) {
3830
3958
  };
3831
3959
  }
3832
3960
  return {
3833
- ...(await resolveHostedTarget(parsed, { requireProjectIdentity: true })),
3961
+ ...(await resolveRemoteTarget(parsed, { requireProjectIdentity: true })),
3834
3962
  ...(ref ? { openRef: ref } : {}),
3835
3963
  };
3836
3964
  }
@@ -3901,7 +4029,7 @@ function withRunUrls(target, run) {
3901
4029
  }),
3902
4030
  };
3903
4031
  }
3904
- function hostedTargetForRunStartResponse(target, response) {
4032
+ function remoteTargetForRunStartResponse(target, response) {
3905
4033
  const projectId = response.benchmark?.id ?? response.run.projectId ?? target.projectId;
3906
4034
  if (projectId === target.projectId && !response.benchmark) {
3907
4035
  return target;
@@ -3926,7 +4054,7 @@ function hostedTargetForRunStartResponse(target, response) {
3926
4054
  }
3927
4055
  return next;
3928
4056
  }
3929
- function hostedRunEvaluationCandidateId(run, jobs = []) {
4057
+ function remoteRunEvaluationCandidateId(run, jobs = []) {
3930
4058
  if (run.outputCandidateId) {
3931
4059
  return run.outputCandidateId;
3932
4060
  }
@@ -3986,7 +4114,7 @@ function runtimeBundleForProjectVisibility(runtime, visibility) {
3986
4114
  };
3987
4115
  }
3988
4116
  function localProjectStateSource(source) {
3989
- const request = hostedProjectSourceRequest(source);
4117
+ const request = remoteProjectSourceRequest(source);
3990
4118
  const stateSource = {
3991
4119
  source: request.source,
3992
4120
  files: source.sourceFiles.map((file) => ({ ...file })),
@@ -4014,7 +4142,7 @@ function toSurfaceSnapshotFile(file) {
4014
4142
  executable: file.executable === true,
4015
4143
  };
4016
4144
  }
4017
- function hostedProjectSummaryFromState(state) {
4145
+ function remoteProjectSummaryFromState(state) {
4018
4146
  return {
4019
4147
  id: state.project.id,
4020
4148
  ownerUsername: state.project.ownerUsername,
@@ -4025,12 +4153,12 @@ function hostedProjectSummaryFromState(state) {
4025
4153
  function sourceFileCount(source) {
4026
4154
  return source.sourceFiles.length;
4027
4155
  }
4028
- function hostedProjectSourceRequest(source) {
4029
- const { network, resources } = hostedEnvironmentOptions(source);
4156
+ function remoteProjectSourceRequest(source) {
4157
+ const { network, resources } = remoteEnvironmentOptions(source);
4030
4158
  return {
4031
4159
  source: source.specSource,
4032
4160
  candidateFiles: source.candidateFiles,
4033
- engineResolveFiles: hostedEngineResolveFiles(source),
4161
+ engineResolveFiles: remoteEngineResolveFiles(source),
4034
4162
  engineResolveBinding: engineResolveBindingForSpec(source.spec),
4035
4163
  adapterFiles: source.adapterFiles,
4036
4164
  dockerfile: source.dockerfile,
@@ -4043,7 +4171,7 @@ function hostedProjectSourceRequest(source) {
4043
4171
  function isRemoteProjectId(value) {
4044
4172
  return /^wb_[a-f0-9]{12}$/u.test(value);
4045
4173
  }
4046
- function hostedEnvironmentOptions(source) {
4174
+ function remoteEnvironmentOptions(source) {
4047
4175
  return {
4048
4176
  network: source.spec.environment.network?.egress === "open"
4049
4177
  ? "on"
@@ -4051,7 +4179,7 @@ function hostedEnvironmentOptions(source) {
4051
4179
  resources: runtimeResources(source.spec.environment),
4052
4180
  };
4053
4181
  }
4054
- async function watchHostedRun(args) {
4182
+ async function watchRemoteRun(args) {
4055
4183
  const deadline = args.timeoutMs === undefined ? undefined : Date.now() + args.timeoutMs;
4056
4184
  let lastRun = null;
4057
4185
  while (true) {
@@ -4079,7 +4207,7 @@ async function watchHostedRun(args) {
4079
4207
  await sleep(args.intervalMs);
4080
4208
  }
4081
4209
  }
4082
- function formatHostedRunResult(run) {
4210
+ function formatRemoteRunResult(run) {
4083
4211
  const candidateId = run.outputCandidateId ?? run.candidateId;
4084
4212
  const activeDetail = run.activeCandidateId && candidateId && run.activeCandidateId !== candidateId
4085
4213
  ? `; active ${run.activeCandidateId}`
@@ -4097,7 +4225,7 @@ function formatRetryCommandResult(result) {
4097
4225
  const runId = run?.id ?? result.runId ?? "unknown";
4098
4226
  const scope = `${result.retried.kind} ${result.retried.id}`;
4099
4227
  const verb = run
4100
- ? run.status === "finished" ? "finished as hosted run" : "started as hosted run"
4228
+ ? run.status === "finished" ? "finished as remote run" : "started as remote run"
4101
4229
  : "finished as local run";
4102
4230
  return [
4103
4231
  `Retry of ${scope} ${verb} ${runId}.`,
@@ -4113,7 +4241,7 @@ function formatRetryCommandResult(result) {
4113
4241
  : result.urls?.benchmark ? [`Open benchmark: ${result.urls.benchmark}`] : []),
4114
4242
  ].join("\n");
4115
4243
  }
4116
- function formatHostedRunStarted(run, fallbackWorkflow) {
4244
+ function formatRemoteRunStarted(run, fallbackWorkflow) {
4117
4245
  const candidateId = run.outputCandidateId ?? run.candidateId;
4118
4246
  return [
4119
4247
  `Started ${run.workflow ?? fallbackWorkflow} run ${run.id}; ${candidateId ? `candidate ${candidateId}` : `${run.jobCount ?? 0} jobs queued`}.`,
@@ -4124,10 +4252,7 @@ function formatHostedRunStarted(run, fallbackWorkflow) {
4124
4252
  ].join("\n");
4125
4253
  }
4126
4254
  function readRunJobPurpose(job) {
4127
- const input = readRecord(job.input);
4128
- const execution = readRecord(input?.execution);
4129
- const purpose = execution?.purpose;
4130
- return typeof purpose === "string" && purpose ? purpose : null;
4255
+ return job.purpose && job.purpose.trim() ? job.purpose : null;
4131
4256
  }
4132
4257
  function readRecord(value) {
4133
4258
  return value && typeof value === "object" && !Array.isArray(value)
@@ -4146,24 +4271,24 @@ function integerValue(value) {
4146
4271
  function readFiniteNumber(value) {
4147
4272
  return typeof value === "number" && Number.isFinite(value) ? value : null;
4148
4273
  }
4149
- async function withHostedRunFailureSummary(target, run) {
4150
- if (hostedRunSucceeded(run) || run.error || (run.failedJobCount ?? 0) <= 0) {
4274
+ async function withRemoteRunFailureSummary(target, run) {
4275
+ if (remoteRunSucceeded(run) || run.error || (run.failedJobCount ?? 0) <= 0) {
4151
4276
  return run;
4152
4277
  }
4153
- const error = await readHostedRunFailureSummary(target, run.id);
4278
+ const error = await readRemoteRunFailureSummary(target, run.id);
4154
4279
  return error ? { ...run, error } : run;
4155
4280
  }
4156
- async function readHostedRunFailureSummary(target, runId) {
4281
+ async function readRemoteRunFailureSummary(target, runId) {
4157
4282
  try {
4158
- const project = await apiRequest(projectApiPath(target.projectId), {}, target.baseUrl);
4159
- const failed = project.benchmark.jobs.find((job) => job.runId === runId && job.status === "failed" && job.error);
4283
+ const detail = await readRemoteRunDetail(target, runId);
4284
+ const failed = detail.jobs.find((job) => job.status === "failed" && job.error);
4160
4285
  return failed?.error ? `First failed job ${failed.id}: ${failed.error}` : null;
4161
4286
  }
4162
4287
  catch {
4163
4288
  return null;
4164
4289
  }
4165
4290
  }
4166
- function hostedRunSucceeded(run) {
4291
+ function remoteRunSucceeded(run) {
4167
4292
  if (run.status !== "finished") {
4168
4293
  return false;
4169
4294
  }
@@ -4179,16 +4304,6 @@ async function readWorkbenchOrigin(dir) {
4179
4304
  throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
4180
4305
  }
4181
4306
  const originRecord = parsed;
4182
- const keys = Object.keys(originRecord).sort();
4183
- const expectedKeys = [
4184
- "baseUrl",
4185
- "linkedAt",
4186
- "projectId",
4187
- "remote",
4188
- "runtimeFingerprint",
4189
- "sourceFingerprint",
4190
- "sourceRevisionId",
4191
- ];
4192
4307
  if (typeof originRecord.projectId !== "string" ||
4193
4308
  typeof originRecord.baseUrl !== "string" ||
4194
4309
  typeof originRecord.remote !== "string" ||
@@ -4202,9 +4317,6 @@ async function readWorkbenchOrigin(dir) {
4202
4317
  originRecord.runtimeFingerprint.length === 0) {
4203
4318
  throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
4204
4319
  }
4205
- if (JSON.stringify(keys) !== JSON.stringify(expectedKeys)) {
4206
- throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
4207
- }
4208
4320
  return {
4209
4321
  baseUrl: normalizeBaseUrl(originRecord.baseUrl),
4210
4322
  remote: normalizeOriginRemote(originRecord.remote),
@@ -4254,7 +4366,7 @@ async function writeWorkbenchOriginFromState(dir, args) {
4254
4366
  const runtimeFingerprint = args.state.base.runtimeFingerprint ??
4255
4367
  workbenchRuntimeBundleFingerprint(args.state.runtime);
4256
4368
  if (!sourceRevisionId || !sourceFingerprint || !runtimeFingerprint) {
4257
- throw new UsageError("Hosted project state is missing required origin metadata.");
4369
+ throw new UsageError("Remote project state is missing required origin metadata.");
4258
4370
  }
4259
4371
  return await writeWorkbenchOrigin(dir, {
4260
4372
  baseUrl: args.baseUrl,
@@ -4290,18 +4402,29 @@ function originRemoteUrlParts(origin) {
4290
4402
  function workbenchOriginPath(dir) {
4291
4403
  return path.join(dir, ".workbench", "origin.json");
4292
4404
  }
4293
- async function effectiveBaseUrl(preferred) {
4405
+ async function effectiveBaseUrl() {
4294
4406
  const config = await loadConfig();
4295
- return normalizeBaseUrl(process.env.WORKBENCH_API_URL ??
4296
- preferred ??
4297
- config.baseUrl ??
4407
+ return selectWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
4408
+ }
4409
+ async function effectiveOriginBaseUrl(originBaseUrl) {
4410
+ const config = await loadConfig();
4411
+ return selectWorkbenchBaseUrl({
4412
+ originBaseUrl,
4413
+ configBaseUrl: config.baseUrl,
4414
+ });
4415
+ }
4416
+ function selectWorkbenchBaseUrl(input = {}) {
4417
+ return normalizeBaseUrl(input.explicitBaseUrl ??
4418
+ input.originBaseUrl ??
4419
+ process.env.WORKBENCH_API_URL ??
4420
+ input.configBaseUrl ??
4298
4421
  DEFAULT_BASE_URL);
4299
4422
  }
4300
4423
  async function readWorkbenchProfileStatus(config) {
4301
4424
  if (!config.accessToken) {
4302
4425
  return { authenticated: false, profile: null };
4303
4426
  }
4304
- const baseUrl = await effectiveBaseUrl(config.baseUrl);
4427
+ const baseUrl = selectWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
4305
4428
  try {
4306
4429
  const response = await fetch(`${baseUrl}/api/workbench/profile`, {
4307
4430
  headers: {
@@ -4327,10 +4450,9 @@ async function readWorkbenchProfileStatus(config) {
4327
4450
  }
4328
4451
  async function apiRequest(apiPath, options = {}, baseUrlOverride) {
4329
4452
  const config = await loadConfig();
4330
- const baseUrl = normalizeBaseUrl(baseUrlOverride ??
4331
- process.env.WORKBENCH_API_URL ??
4332
- config.baseUrl ??
4333
- DEFAULT_BASE_URL);
4453
+ const baseUrl = baseUrlOverride !== undefined
4454
+ ? normalizeBaseUrl(baseUrlOverride)
4455
+ : selectWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
4334
4456
  const method = options.method ?? "GET";
4335
4457
  const canRetry = method === "GET";
4336
4458
  let lastError = null;
@@ -4868,6 +4990,24 @@ function formatMetricValue(value) {
4868
4990
  }
4869
4991
  return value.toFixed(2);
4870
4992
  }
4993
+ function formatNullableMetric(value) {
4994
+ return typeof value === "number" && Number.isFinite(value)
4995
+ ? formatMetricValue(value)
4996
+ : "n/a";
4997
+ }
4998
+ function formatFailureLine(failure) {
4999
+ return [
5000
+ failure.kind,
5001
+ failure.id,
5002
+ failure.status ?? "failed",
5003
+ failure.runId ? `run=${failure.runId}` : null,
5004
+ failure.candidateId ? `candidate=${failure.candidateId}` : null,
5005
+ failure.jobId ? `job=${failure.jobId}` : null,
5006
+ failure.caseId ? `case=${failure.caseId}` : null,
5007
+ typeof failure.sampleIndex === "number" ? `sample=${failure.sampleIndex}` : null,
5008
+ failure.error ?? null,
5009
+ ].filter(Boolean).join("\t");
5010
+ }
4871
5011
  function resolveDir(parsed, positionalDir) {
4872
5012
  const resolved = path.resolve(asOptionalString(parsed.flags.dir) ?? positionalDir ?? process.cwd());
4873
5013
  return isWorkbenchSourceYamlPath(resolved) ? path.dirname(resolved) : resolved;