@workbench-ai/workbench 0.0.63 → 0.0.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark-fingerprint.js +2 -2
- package/dist/command-model.d.ts +1 -1
- package/dist/command-model.d.ts.map +1 -1
- package/dist/command-model.js +106 -35
- package/dist/dev-open/client.js +109 -109
- package/dist/dev-open-server.d.ts +2 -37
- package/dist/dev-open-server.d.ts.map +1 -1
- package/dist/dev-open-server.js +39 -322
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +333 -193
- package/dist/local-archive.d.ts +4 -4
- package/dist/local-archive.d.ts.map +1 -1
- package/dist/local-archive.js +3 -7
- package/dist/local-inspection.d.ts +9 -0
- package/dist/local-inspection.d.ts.map +1 -0
- package/dist/local-inspection.js +317 -0
- package/dist/project-source.d.ts +6 -6
- package/dist/project-source.js +6 -6
- package/package.json +4 -4
package/dist/index.js
CHANGED
|
@@ -5,17 +5,18 @@ import { createRequire } from "node:module";
|
|
|
5
5
|
import os from "node:os";
|
|
6
6
|
import path from "node:path";
|
|
7
7
|
import { Writable } from "node:stream";
|
|
8
|
-
import {
|
|
8
|
+
import { createBaselineCandidateJob as createRuntimeBaselineCandidateJob, evaluationScorecardId, evaluationMeanMetrics, executeWorkbenchExecutionJob, engineResolveBindingForSpec, filterOptimizerTraceJobsForCaseIds, filterCandidateSourceFiles, formatWorkbenchCaseSelector, formatWorkbenchSelectionPolicy, workbenchCaseSelectorUsesAllCases, workbenchExecutionPurpose, workbenchRunExecutionFingerprint, createWorkbenchAdapterAuthBundle, createOptimizerTraceInputFiles, DOCKER_SANDBOX_BACKEND, localWorkbenchAdapterAuthStore, materializeWorkbenchRunResult, normalizeSurfaceFiles, planWorkbenchExecutionJobsForPurpose, runWorkbenchExecutionDag, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, runtimeResources, validateWorkbenchRunEnvelope, parseWorkbenchAdapterAuthTarget, workbenchEngineCaseIdsForImproveEvaluation, workbenchEngineCaseIdsForSelector, workbenchImproveOptimizeSelector, workbenchImproveSelectionPolicy, workbenchProjectSourceFingerprint, workbenchRuntimeBundleFingerprint, workbenchRuntimeExplicitActiveId, } from "@workbench-ai/workbench-core";
|
|
9
9
|
import { assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterAuthRequirements, normalizeWorkbenchAdapterOperationRequest, readWorkbenchAdapterOperationResult, workbenchAdapterOperationCommand, workbenchAdapterOperationResultPath, withDefaultWorkbenchAdapterAuthProfiles as applyDefaultWorkbenchAdapterAuthProfiles, } from "@workbench-ai/workbench-protocol";
|
|
10
10
|
import { builtinLocalTraceAdapter, builtinLocalTraceAdapters, sortLocalTraceRefs, } from "@workbench-ai/workbench-built-in-adapters/local-traces";
|
|
11
|
-
import { commandUsage,
|
|
11
|
+
import { commandUsage, REMOTE_WATCH_LIFECYCLE_NOTE, LOCAL_DEV_OPEN_LIFECYCLE_NOTE, rootUsage, } from "./command-model.js";
|
|
12
12
|
import { startLocalWorkbenchDevServer } from "./dev-open-server.js";
|
|
13
|
+
import { createLocalWorkbenchInspection } from "./local-inspection.js";
|
|
13
14
|
import { createWorkbenchInitScaffold, } from "./init-scaffold.js";
|
|
14
15
|
import { defaultAdapterManifests, composeRuntimeDockerfileWithAdapters, resolveDefaultWorkbenchAdapter, resolveProjectAdapterSource, resolveWorkbenchAdaptersForProject, WORKBENCH_ADAPTER_MANIFEST_FILE, } from "./adapter-project.js";
|
|
15
16
|
import { createAdapterCommandEnv } from "./adapter-command-env.js";
|
|
16
17
|
import { loadLocalArchive, loadLocalArchiveIndex, exportLocalRuntimeBundle, importLocalRuntimeBundle, runtimeBundleStats, materializeCandidateRoot, readLocalCandidate, readLocalCandidateFiles, readLocalJobs, saveLocalArchive, saveLocalJobs, setLocalActive, upsertLocalRun, upsertLocalCandidate, upsertLocalEvaluation, } from "./local-archive.js";
|
|
17
18
|
import { WorkspaceSnapshotError, } from "./workspace-snapshot.js";
|
|
18
|
-
import {
|
|
19
|
+
import { remoteEngineResolveFiles, readLocalProjectSource, WORKBENCH_BENCHMARK_FILE, } from "./project-source.js";
|
|
19
20
|
import { localBenchmarkFingerprint, localCandidateFingerprint, projectStateBenchmarkFingerprint, } from "./benchmark-fingerprint.js";
|
|
20
21
|
const require = createRequire(import.meta.url);
|
|
21
22
|
function getCliVersion() {
|
|
@@ -82,31 +83,31 @@ export async function runCli(argv, io = {
|
|
|
82
83
|
return await pushBenchmark(argv.slice(1), io);
|
|
83
84
|
}
|
|
84
85
|
if (argv[0] === "eval") {
|
|
85
|
-
const
|
|
86
|
-
return
|
|
87
|
-
? await
|
|
88
|
-
: await localEvaluateCandidate(
|
|
86
|
+
const remote = extractRemoteFlag(argv.slice(1));
|
|
87
|
+
return remote.enabled
|
|
88
|
+
? await startRemoteWorkflow("eval", remote.argv, io)
|
|
89
|
+
: await localEvaluateCandidate(remote.argv, io, runtimeOptions);
|
|
89
90
|
}
|
|
90
91
|
if (argv[0] === "retry") {
|
|
91
|
-
const
|
|
92
|
-
return
|
|
93
|
-
? await
|
|
94
|
-
: await localRetry(
|
|
92
|
+
const remote = extractRemoteFlag(argv.slice(1));
|
|
93
|
+
return remote.enabled
|
|
94
|
+
? await retryRemoteWorkflow(remote.argv, io)
|
|
95
|
+
: await localRetry(remote.argv, io, runtimeOptions);
|
|
95
96
|
}
|
|
96
97
|
if (argv[0] === "improve") {
|
|
97
|
-
const
|
|
98
|
-
return
|
|
99
|
-
? await
|
|
100
|
-
: await localRun(
|
|
98
|
+
const remote = extractRemoteFlag(argv.slice(1));
|
|
99
|
+
return remote.enabled
|
|
100
|
+
? await startRemoteWorkflow("improve", remote.argv, io)
|
|
101
|
+
: await localRun(remote.argv, io, runtimeOptions);
|
|
101
102
|
}
|
|
102
103
|
if (argv[0] === "restore") {
|
|
103
104
|
return await localRestore(argv.slice(1), io);
|
|
104
105
|
}
|
|
105
106
|
if (argv[0] === "open") {
|
|
106
|
-
const
|
|
107
|
-
return
|
|
108
|
-
? await openWorkbench(
|
|
109
|
-
: await localDevOpen(
|
|
107
|
+
const remote = extractRemoteFlag(argv.slice(1));
|
|
108
|
+
return remote.enabled
|
|
109
|
+
? await openWorkbench(remote.argv, io)
|
|
110
|
+
: await localDevOpen(remote.argv, io);
|
|
110
111
|
}
|
|
111
112
|
if (argv[0] === "auth") {
|
|
112
113
|
return await runAuthCommand(argv.slice(1), io);
|
|
@@ -117,6 +118,9 @@ export async function runCli(argv, io = {
|
|
|
117
118
|
if (argv[0] === "traces") {
|
|
118
119
|
return await runTracesCommand(argv.slice(1), io);
|
|
119
120
|
}
|
|
121
|
+
if (argv[0] === "diagnose") {
|
|
122
|
+
return await localDiagnose(argv.slice(1), io);
|
|
123
|
+
}
|
|
120
124
|
const commandPath = argv.slice(0, 2).join(" ");
|
|
121
125
|
const rest = argv.slice(2);
|
|
122
126
|
switch (commandPath) {
|
|
@@ -124,6 +128,12 @@ export async function runCli(argv, io = {
|
|
|
124
128
|
return await localRunList(rest, io);
|
|
125
129
|
case "runs show":
|
|
126
130
|
return await localRunShow(rest, io);
|
|
131
|
+
case "evaluations list":
|
|
132
|
+
return await localEvaluationList(rest, io);
|
|
133
|
+
case "evaluations show":
|
|
134
|
+
return await localEvaluationShow(rest, io);
|
|
135
|
+
case "executions trace":
|
|
136
|
+
return await localExecutionTrace(rest, io);
|
|
127
137
|
case "candidates list":
|
|
128
138
|
return await localCandidateList(rest, io);
|
|
129
139
|
case "candidates show":
|
|
@@ -167,17 +177,25 @@ function commandPathForHelp(argv) {
|
|
|
167
177
|
["list", "show"].includes(positionals[1] ?? "")) {
|
|
168
178
|
return positionals.slice(0, 2).join(" ");
|
|
169
179
|
}
|
|
180
|
+
if (positionals[0] === "evaluations" &&
|
|
181
|
+
["list", "show"].includes(positionals[1] ?? "")) {
|
|
182
|
+
return positionals.slice(0, 2).join(" ");
|
|
183
|
+
}
|
|
184
|
+
if (positionals[0] === "executions" &&
|
|
185
|
+
["trace"].includes(positionals[1] ?? "")) {
|
|
186
|
+
return positionals.slice(0, 2).join(" ");
|
|
187
|
+
}
|
|
170
188
|
if (positionals[0] === "candidates" &&
|
|
171
189
|
["list", "show", "files", "preview"].includes(positionals[1] ?? "")) {
|
|
172
190
|
return positionals.slice(0, 2).join(" ");
|
|
173
191
|
}
|
|
174
192
|
return positionals[0] ?? "";
|
|
175
193
|
}
|
|
176
|
-
function
|
|
194
|
+
function extractRemoteFlag(argv) {
|
|
177
195
|
let enabled = false;
|
|
178
196
|
const next = [];
|
|
179
197
|
for (const arg of argv) {
|
|
180
|
-
if (arg === "--
|
|
198
|
+
if (arg === "--remote") {
|
|
181
199
|
enabled = true;
|
|
182
200
|
}
|
|
183
201
|
else {
|
|
@@ -1498,6 +1516,9 @@ function latestCompletedAttemptJobsByPair(jobs, desiredKeys) {
|
|
|
1498
1516
|
return byPair;
|
|
1499
1517
|
}
|
|
1500
1518
|
function caseSamplePairFromJob(job) {
|
|
1519
|
+
if (job.caseId && Number.isSafeInteger(job.sampleIndex) && job.sampleIndex >= 0) {
|
|
1520
|
+
return { caseId: job.caseId, sampleIndex: job.sampleIndex };
|
|
1521
|
+
}
|
|
1501
1522
|
const input = readRecord(job.input);
|
|
1502
1523
|
const execution = readRecord(input?.execution);
|
|
1503
1524
|
const metadata = readRecord(execution?.metadata);
|
|
@@ -1593,7 +1614,7 @@ function resolveProjectPath(root, filePath) {
|
|
|
1593
1614
|
}
|
|
1594
1615
|
async function executeLocalDevelopmentJob(args) {
|
|
1595
1616
|
return await executeWorkbenchExecutionJob(args, {
|
|
1596
|
-
|
|
1617
|
+
sandboxBackend: DOCKER_SANDBOX_BACKEND,
|
|
1597
1618
|
loadLocalAdapterAuthProfiles: true,
|
|
1598
1619
|
});
|
|
1599
1620
|
}
|
|
@@ -1604,7 +1625,7 @@ async function executeLocalDevelopmentDag(args) {
|
|
|
1604
1625
|
const result = await runWorkbenchExecutionDag({
|
|
1605
1626
|
jobs: args.jobs,
|
|
1606
1627
|
capacity: args.capacity,
|
|
1607
|
-
|
|
1628
|
+
sandboxBackend: DOCKER_SANDBOX_BACKEND,
|
|
1608
1629
|
executeJob: async (job) => {
|
|
1609
1630
|
return await executeLocalDevelopmentJob({
|
|
1610
1631
|
job,
|
|
@@ -1802,11 +1823,16 @@ async function localRestore(argv, io) {
|
|
|
1802
1823
|
writeOutput({ ok: true, activeCandidateId: candidateId, changedPaths }, parsed, io, () => `Restored ${candidateId} to ${candidateRoot}.`);
|
|
1803
1824
|
return 0;
|
|
1804
1825
|
}
|
|
1826
|
+
function localInspectionFromParsed(parsed) {
|
|
1827
|
+
return createLocalWorkbenchInspection({ workspace: resolveDir(parsed) });
|
|
1828
|
+
}
|
|
1805
1829
|
async function localCandidateList(argv, io) {
|
|
1806
1830
|
const parsed = parseArgs(argv);
|
|
1807
1831
|
rejectUnknownFlags(parsed, new Set(["dir", "json"]));
|
|
1808
|
-
const
|
|
1809
|
-
|
|
1832
|
+
const inspection = localInspectionFromParsed(parsed);
|
|
1833
|
+
const snapshot = await inspection.snapshot();
|
|
1834
|
+
const candidates = await Promise.all(snapshot.summaries.map((candidate) => inspection.candidate({ id: candidate.id })));
|
|
1835
|
+
writeOutput(candidates, parsed, io, (candidates) => candidates
|
|
1810
1836
|
.map((candidate) => `${candidate.id}\t${candidate.status}\tevaluation ${formatCandidateEvaluationScore(candidate)}${snapshot.activeId === candidate.id ? "\tactive" : ""}`)
|
|
1811
1837
|
.join("\n") || "No candidates.");
|
|
1812
1838
|
return 0;
|
|
@@ -1814,13 +1840,14 @@ async function localCandidateList(argv, io) {
|
|
|
1814
1840
|
async function localCandidateShow(argv, io) {
|
|
1815
1841
|
const parsed = parseArgs(argv);
|
|
1816
1842
|
rejectUnknownFlags(parsed, new Set(["dir", "candidate", "json"]));
|
|
1817
|
-
const
|
|
1843
|
+
const inspection = localInspectionFromParsed(parsed);
|
|
1844
|
+
const snapshot = await inspection.snapshot();
|
|
1818
1845
|
const candidateId = readCandidateIdFlag(parsed, snapshot);
|
|
1819
|
-
const candidate =
|
|
1846
|
+
const candidate = await inspection.candidate({ id: candidateId });
|
|
1820
1847
|
writeOutput(candidate, parsed, io, (record) => [
|
|
1821
1848
|
`${record.id}\t${record.status}`,
|
|
1822
1849
|
`benchmark\t${record.benchmarkFingerprint}`,
|
|
1823
|
-
`candidate\t${record.candidateFingerprint
|
|
1850
|
+
`candidate\t${record.candidateFingerprint}`,
|
|
1824
1851
|
`evaluation\t${formatCandidateEvaluationSummary(record)}`,
|
|
1825
1852
|
...(record.baseId ? [`base\t${record.baseId}`] : []),
|
|
1826
1853
|
].join("\n"));
|
|
@@ -1829,10 +1856,10 @@ async function localCandidateShow(argv, io) {
|
|
|
1829
1856
|
async function localCandidateFiles(argv, io) {
|
|
1830
1857
|
const parsed = parseArgs(argv);
|
|
1831
1858
|
rejectUnknownFlags(parsed, new Set(["dir", "candidate", "json"]));
|
|
1832
|
-
const
|
|
1859
|
+
const inspection = localInspectionFromParsed(parsed);
|
|
1860
|
+
const snapshot = await inspection.snapshot();
|
|
1833
1861
|
const candidateId = readCandidateIdFlag(parsed, snapshot);
|
|
1834
|
-
const
|
|
1835
|
-
const files = summarizeCandidateFiles(readLocalCandidateFiles(snapshot, candidateId), candidate.fileChanges);
|
|
1862
|
+
const files = await inspection.candidateFiles({ id: candidateId });
|
|
1836
1863
|
writeOutput(files, parsed, io, (records) => records
|
|
1837
1864
|
.map((file) => `${file.path}\t${file.status}\t${file.preview_kind}`)
|
|
1838
1865
|
.join("\n") || "No files.");
|
|
@@ -1841,10 +1868,11 @@ async function localCandidateFiles(argv, io) {
|
|
|
1841
1868
|
async function localCandidatePreview(argv, io) {
|
|
1842
1869
|
const parsed = parseArgs(argv);
|
|
1843
1870
|
rejectUnknownFlags(parsed, new Set(["dir", "candidate", "path", "output", "view", "json"]));
|
|
1844
|
-
const
|
|
1871
|
+
const inspection = localInspectionFromParsed(parsed);
|
|
1872
|
+
const snapshot = await inspection.snapshot();
|
|
1845
1873
|
const candidateId = readCandidateIdFlag(parsed, snapshot);
|
|
1846
|
-
const preview =
|
|
1847
|
-
|
|
1874
|
+
const preview = await inspection.candidatePreview({
|
|
1875
|
+
id: candidateId,
|
|
1848
1876
|
path: requireFlag(parsed, "path"),
|
|
1849
1877
|
view: readPreviewMode(parsed),
|
|
1850
1878
|
});
|
|
@@ -1865,7 +1893,7 @@ async function localCandidatePreview(argv, io) {
|
|
|
1865
1893
|
async function localRunList(argv, io) {
|
|
1866
1894
|
const parsed = parseArgs(argv);
|
|
1867
1895
|
rejectUnknownFlags(parsed, new Set(["dir", "json"]));
|
|
1868
|
-
const snapshot = await
|
|
1896
|
+
const snapshot = await localInspectionFromParsed(parsed).snapshot();
|
|
1869
1897
|
writeOutput(snapshot.runs, parsed, io, (runs) => runs
|
|
1870
1898
|
.map((run) => `${run.id}\t${run.workflow}\t${run.status}\t${run.outcome ?? "pending"}\t${run.attemptsExecuted ?? 0}/${run.attemptsRequested ?? 0}`)
|
|
1871
1899
|
.join("\n") || "No runs.");
|
|
@@ -1873,26 +1901,114 @@ async function localRunList(argv, io) {
|
|
|
1873
1901
|
}
|
|
1874
1902
|
async function localRunShow(argv, io) {
|
|
1875
1903
|
const parsed = parseArgs(argv);
|
|
1876
|
-
rejectUnknownFlags(parsed, new Set(["dir", "json"]));
|
|
1904
|
+
rejectUnknownFlags(parsed, new Set(["dir", "jobs", "failures", "json"]));
|
|
1877
1905
|
const runId = parsed.positionals[0];
|
|
1878
1906
|
if (!runId) {
|
|
1879
1907
|
throw new UsageError("workbench runs show requires RUN_ID.");
|
|
1880
1908
|
}
|
|
1881
|
-
const
|
|
1882
|
-
const
|
|
1883
|
-
|
|
1884
|
-
|
|
1885
|
-
}
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1909
|
+
const inspection = localInspectionFromParsed(parsed);
|
|
1910
|
+
const detail = await inspection.run({
|
|
1911
|
+
id: runId,
|
|
1912
|
+
includeJobs: parsed.flags.jobs === true || parsed.flags.failures === true,
|
|
1913
|
+
});
|
|
1914
|
+
const diagnosis = parsed.flags.failures === true
|
|
1915
|
+
? await inspection.diagnose({ targetId: runId })
|
|
1916
|
+
: null;
|
|
1917
|
+
writeOutput(parsed.flags.failures === true
|
|
1918
|
+
? { ...detail, diagnosis }
|
|
1919
|
+
: detail, parsed, io, (record) => {
|
|
1920
|
+
const run = record.run;
|
|
1921
|
+
const jobs = "jobs" in record && Array.isArray(record.jobs)
|
|
1922
|
+
? record.jobs
|
|
1923
|
+
: [];
|
|
1924
|
+
const failures = "diagnosis" in record && record.diagnosis
|
|
1925
|
+
? record.diagnosis.failures
|
|
1926
|
+
: [];
|
|
1927
|
+
return [
|
|
1928
|
+
`${run.id}\t${run.workflow}\t${run.status}`,
|
|
1929
|
+
`outcome\t${run.outcome ?? "pending"}`,
|
|
1930
|
+
`started\t${run.startedAt}`,
|
|
1931
|
+
...(run.finishedAt ? [`finished\t${run.finishedAt}`] : []),
|
|
1932
|
+
`attempts\t${run.attemptsExecuted ?? 0}/${run.attemptsRequested ?? 0}`,
|
|
1933
|
+
`samples\t${run.samples ?? 0}`,
|
|
1934
|
+
...(jobs.length > 0
|
|
1935
|
+
? [
|
|
1936
|
+
"jobs",
|
|
1937
|
+
...jobs.map((job) => `${job.id}\t${job.kind}\t${job.status}${job.error ? `\t${job.error}` : ""}`),
|
|
1938
|
+
]
|
|
1939
|
+
: []),
|
|
1940
|
+
...(failures.length > 0
|
|
1941
|
+
? [
|
|
1942
|
+
"failures",
|
|
1943
|
+
...failures.map(formatFailureLine),
|
|
1944
|
+
]
|
|
1945
|
+
: []),
|
|
1946
|
+
].join("\n");
|
|
1947
|
+
});
|
|
1948
|
+
return 0;
|
|
1949
|
+
}
|
|
1950
|
+
async function localEvaluationList(argv, io) {
|
|
1951
|
+
const parsed = parseArgs(argv);
|
|
1952
|
+
rejectUnknownFlags(parsed, new Set(["dir", "json"]));
|
|
1953
|
+
const comparison = await localInspectionFromParsed(parsed).evaluations();
|
|
1954
|
+
writeOutput(comparison, parsed, io, (record) => record.rows
|
|
1955
|
+
.map((row) => `${row.evaluationId}\t${row.status}\t${formatNullableMetric(row.score)}\t${row.candidateLabel}\t${row.configurationLabel}\t${row.runId}`)
|
|
1956
|
+
.join("\n") || "No evaluations.");
|
|
1957
|
+
return 0;
|
|
1958
|
+
}
|
|
1959
|
+
async function localEvaluationShow(argv, io) {
|
|
1960
|
+
const parsed = parseArgs(argv);
|
|
1961
|
+
rejectUnknownFlags(parsed, new Set(["dir", "json"]));
|
|
1962
|
+
const evaluationId = parsed.positionals[0];
|
|
1963
|
+
if (!evaluationId) {
|
|
1964
|
+
throw new UsageError("workbench evaluations show requires EVALUATION_ID.");
|
|
1965
|
+
}
|
|
1966
|
+
const evaluation = await localInspectionFromParsed(parsed).evaluation({ id: evaluationId });
|
|
1967
|
+
writeOutput(evaluation, parsed, io, (record) => [
|
|
1968
|
+
`${record.id}\t${record.status}`,
|
|
1969
|
+
`candidate\t${record.candidateName ?? record.candidateId}`,
|
|
1970
|
+
`run\t${record.runId}`,
|
|
1971
|
+
`samples\t${record.completedSampleCount}/${record.sampleCount}`,
|
|
1972
|
+
`errors\t${record.errorSampleCount}`,
|
|
1973
|
+
`score\t${formatNullableMetric(record.metrics?.score?.mean ?? null)}`,
|
|
1974
|
+
...(record.error ? [`error\t${record.error}`] : []),
|
|
1975
|
+
...(record.evaluation.cases?.length
|
|
1976
|
+
? [
|
|
1977
|
+
"cases",
|
|
1978
|
+
...record.evaluation.cases.map((entry) => `${entry.id}\t${entry.status ?? "unknown"}\t${formatNullableMetric(entry.metrics?.score?.mean ?? null)}`),
|
|
1979
|
+
]
|
|
1980
|
+
: []),
|
|
1893
1981
|
].join("\n"));
|
|
1894
1982
|
return 0;
|
|
1895
1983
|
}
|
|
1984
|
+
async function localExecutionTrace(argv, io) {
|
|
1985
|
+
const parsed = parseArgs(argv);
|
|
1986
|
+
rejectUnknownFlags(parsed, new Set(["dir", "run", "job", "json"]));
|
|
1987
|
+
const runId = requireFlag(parsed, "run");
|
|
1988
|
+
const jobId = requireFlag(parsed, "job");
|
|
1989
|
+
const detail = await localInspectionFromParsed(parsed).executionTrace({ runId, jobId });
|
|
1990
|
+
writeOutput(detail, parsed, io, (record) => record.executions
|
|
1991
|
+
.map((execution) => [
|
|
1992
|
+
`${execution.id}\t${execution.kind}\t${execution.status}`,
|
|
1993
|
+
`jobs\t${execution.jobIds.join(",")}`,
|
|
1994
|
+
`sessions\t${execution.sessions.length}`,
|
|
1995
|
+
`spans\t${execution.trace.spans.length}`,
|
|
1996
|
+
`events\t${execution.trace.events.length}`,
|
|
1997
|
+
`summaries\t${execution.trace.summaries.length}`,
|
|
1998
|
+
].join("\n"))
|
|
1999
|
+
.join("\n\n") || "No execution trace.");
|
|
2000
|
+
return 0;
|
|
2001
|
+
}
|
|
2002
|
+
async function localDiagnose(argv, io) {
|
|
2003
|
+
const parsed = parseArgs(argv);
|
|
2004
|
+
rejectUnknownFlags(parsed, new Set(["dir", "json"]));
|
|
2005
|
+
rejectUnexpectedPositionals(parsed, "workbench diagnose", 1);
|
|
2006
|
+
const diagnosis = await localInspectionFromParsed(parsed).diagnose({ targetId: parsed.positionals[0] ?? null });
|
|
2007
|
+
writeOutput(diagnosis, parsed, io, (record) => record.failures.length > 0
|
|
2008
|
+
? record.failures.map(formatFailureLine).join("\n")
|
|
2009
|
+
: "No failures.");
|
|
2010
|
+
return 0;
|
|
2011
|
+
}
|
|
1896
2012
|
async function runAuthCommand(argv, io) {
|
|
1897
2013
|
const command = argv[0];
|
|
1898
2014
|
const rest = argv.slice(1);
|
|
@@ -2446,7 +2562,11 @@ fs.writeFileSync(resultPath, JSON.stringify({
|
|
|
2446
2562
|
async function login(argv, io) {
|
|
2447
2563
|
const parsed = parseArgs(argv);
|
|
2448
2564
|
rejectUnknownFlags(parsed, new Set(["base-url", "no-open", "json"]));
|
|
2449
|
-
const
|
|
2565
|
+
const config = await loadConfig();
|
|
2566
|
+
const baseUrl = selectWorkbenchBaseUrl({
|
|
2567
|
+
explicitBaseUrl: asOptionalString(parsed.flags["base-url"]),
|
|
2568
|
+
configBaseUrl: config.baseUrl,
|
|
2569
|
+
});
|
|
2450
2570
|
const authorization = await requestDeviceAuthorization(baseUrl);
|
|
2451
2571
|
if (parsed.flags.json === true) {
|
|
2452
2572
|
writeJson({ ok: true, status: "authorization_pending", ...authorization }, io);
|
|
@@ -2472,7 +2592,7 @@ async function logout(argv, io) {
|
|
|
2472
2592
|
const parsed = parseArgs(argv);
|
|
2473
2593
|
rejectUnknownFlags(parsed, new Set(["json"]));
|
|
2474
2594
|
const config = await loadConfig();
|
|
2475
|
-
const baseUrl =
|
|
2595
|
+
const baseUrl = selectWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
|
|
2476
2596
|
if (config.accessToken) {
|
|
2477
2597
|
await fetch(`${baseUrl}/api/oauth/revoke`, {
|
|
2478
2598
|
method: "POST",
|
|
@@ -2493,8 +2613,8 @@ async function authStatus(argv, io) {
|
|
|
2493
2613
|
const baseUrl = await effectiveBaseUrl();
|
|
2494
2614
|
const profileStatus = await readWorkbenchProfileStatus(config);
|
|
2495
2615
|
const adapterStatuses = await localWorkbenchAdapterAuthStore().listStatus();
|
|
2496
|
-
const
|
|
2497
|
-
? await
|
|
2616
|
+
const remoteAuth = profileStatus.authenticated
|
|
2617
|
+
? await readRemoteAdapterAuthStatuses().catch((error) => ({
|
|
2498
2618
|
adapters: [],
|
|
2499
2619
|
error: error instanceof Error ? error.message : String(error),
|
|
2500
2620
|
}))
|
|
@@ -2503,7 +2623,7 @@ async function authStatus(argv, io) {
|
|
|
2503
2623
|
error: "not_authenticated",
|
|
2504
2624
|
};
|
|
2505
2625
|
const dir = resolveDir(parsed);
|
|
2506
|
-
const adapterAuth = await projectAdapterAuthStatus(dir, adapterStatuses,
|
|
2626
|
+
const adapterAuth = await projectAdapterAuthStatus(dir, adapterStatuses, remoteAuth.adapters).catch(() => []);
|
|
2507
2627
|
const result = {
|
|
2508
2628
|
ok: true,
|
|
2509
2629
|
workbench: {
|
|
@@ -2512,7 +2632,7 @@ async function authStatus(argv, io) {
|
|
|
2512
2632
|
username: profileStatus.profile?.username ?? null,
|
|
2513
2633
|
},
|
|
2514
2634
|
adapterStatuses,
|
|
2515
|
-
|
|
2635
|
+
remoteAuth,
|
|
2516
2636
|
adapterAuth,
|
|
2517
2637
|
};
|
|
2518
2638
|
writeOutput(result, parsed, io, (record) => {
|
|
@@ -2525,28 +2645,28 @@ async function authStatus(argv, io) {
|
|
|
2525
2645
|
? [
|
|
2526
2646
|
"",
|
|
2527
2647
|
"Required adapter auth:",
|
|
2528
|
-
...value.adapterAuth.map((adapter) => `${adapter.adapter}${adapter.profile !== "default" ? ` profile ${adapter.profile}` : ""}: local ${adapter.local.status}${adapter.local.method ? ` (${adapter.local.method})` : ""}${adapter.local.reason ? ` (${adapter.local.reason})` : ""},
|
|
2648
|
+
...value.adapterAuth.map((adapter) => `${adapter.adapter}${adapter.profile !== "default" ? ` profile ${adapter.profile}` : ""}: local ${adapter.local.status}${adapter.local.method ? ` (${adapter.local.method})` : ""}${adapter.local.reason ? ` (${adapter.local.reason})` : ""}, remote ${adapter.remote.status}${adapter.remote.method ? ` (${adapter.remote.method})` : ""}${adapter.remote.reason ? ` (${adapter.remote.reason})` : ""}`),
|
|
2529
2649
|
]
|
|
2530
2650
|
: []),
|
|
2531
2651
|
].join("\n");
|
|
2532
2652
|
});
|
|
2533
2653
|
return 0;
|
|
2534
2654
|
}
|
|
2535
|
-
async function projectAdapterAuthStatus(dir, adapterStatuses,
|
|
2655
|
+
async function projectAdapterAuthStatus(dir, adapterStatuses, remoteAdapters) {
|
|
2536
2656
|
const spec = (await readLocalProjectSource(dir)).spec;
|
|
2537
2657
|
const adapters = await resolveWorkbenchAdaptersForProject(dir, spec);
|
|
2538
2658
|
const adapterStatusMap = new Map(adapterStatuses.map((status) => [
|
|
2539
2659
|
adapterAuthStatusKey(status.adapterId, status.slot, status.profile),
|
|
2540
2660
|
status,
|
|
2541
2661
|
]));
|
|
2542
|
-
const
|
|
2662
|
+
const remoteAdapterStatusMap = new Map(remoteAdapters.map((status) => [
|
|
2543
2663
|
adapterAuthStatusKey(status.adapterId, status.slot, status.profile),
|
|
2544
2664
|
status,
|
|
2545
2665
|
]));
|
|
2546
2666
|
const adapterById = new Map(adapters.map((adapter) => [adapter.manifest.id, adapter]));
|
|
2547
2667
|
return requiredAuthTargetsForSpec(spec, adapterById).map((target) => {
|
|
2548
2668
|
const adapterStatus = adapterStatusMap.get(adapterAuthStatusKey(target.adapter, target.slot, target.profile));
|
|
2549
|
-
const
|
|
2669
|
+
const remoteAdapterStatus = remoteAdapterStatusMap.get(adapterAuthStatusKey(target.adapter, target.slot, target.profile));
|
|
2550
2670
|
return {
|
|
2551
2671
|
...target,
|
|
2552
2672
|
local: adapterStatus
|
|
@@ -2556,17 +2676,17 @@ async function projectAdapterAuthStatus(dir, adapterStatuses, hostedAdapters) {
|
|
|
2556
2676
|
...(adapterStatus.reason ? { reason: adapterStatus.reason } : {}),
|
|
2557
2677
|
}
|
|
2558
2678
|
: { status: "disconnected" },
|
|
2559
|
-
|
|
2679
|
+
remote: remoteAdapterStatus
|
|
2560
2680
|
? {
|
|
2561
|
-
status:
|
|
2562
|
-
...(
|
|
2563
|
-
...(
|
|
2681
|
+
status: remoteAdapterStatus.status,
|
|
2682
|
+
...(remoteAdapterStatus.method ? { method: remoteAdapterStatus.method } : {}),
|
|
2683
|
+
...(remoteAdapterStatus.reason ? { reason: remoteAdapterStatus.reason } : {}),
|
|
2564
2684
|
}
|
|
2565
2685
|
: { status: "disconnected" },
|
|
2566
2686
|
};
|
|
2567
2687
|
});
|
|
2568
2688
|
}
|
|
2569
|
-
async function
|
|
2689
|
+
async function readRemoteAdapterAuthStatuses() {
|
|
2570
2690
|
const adapterResponse = await apiRequest("/api/workbench/auth/adapters");
|
|
2571
2691
|
return {
|
|
2572
2692
|
adapters: adapterResponse.adapters ?? [],
|
|
@@ -2941,7 +3061,7 @@ async function pushBenchmark(argv, io) {
|
|
|
2941
3061
|
const dir = resolveSourceDir(parsed);
|
|
2942
3062
|
const source = await readLocalProjectSource(dir);
|
|
2943
3063
|
const origin = await readWorkbenchOrigin(dir);
|
|
2944
|
-
const baseUrl = await
|
|
3064
|
+
const baseUrl = await effectiveOriginBaseUrl(origin?.baseUrl);
|
|
2945
3065
|
const visibility = readOptionalBenchmarkVisibility(parsed.flags.visibility);
|
|
2946
3066
|
const createVisibility = visibility ?? "public";
|
|
2947
3067
|
const dryRun = parsed.flags["dry-run"] === true;
|
|
@@ -2972,7 +3092,7 @@ async function pushBenchmark(argv, io) {
|
|
|
2972
3092
|
}, parsed, io, () => `Would push benchmark ${source.spec.name}.`);
|
|
2973
3093
|
return 0;
|
|
2974
3094
|
}
|
|
2975
|
-
const { project, origin: nextOrigin, result } = await
|
|
3095
|
+
const { project, origin: nextOrigin, result } = await createRemoteBenchmarkFromState({
|
|
2976
3096
|
baseUrl,
|
|
2977
3097
|
dir,
|
|
2978
3098
|
state,
|
|
@@ -3001,7 +3121,7 @@ async function pushBenchmark(argv, io) {
|
|
|
3001
3121
|
}
|
|
3002
3122
|
const projectId = origin.projectId;
|
|
3003
3123
|
if (!projectId) {
|
|
3004
|
-
throw new UsageError("Missing
|
|
3124
|
+
throw new UsageError("Missing remote benchmark. Run workbench push from a source directory.");
|
|
3005
3125
|
}
|
|
3006
3126
|
if (dryRun) {
|
|
3007
3127
|
const remoteProject = await verifyLinkedPushDryRunTarget({
|
|
@@ -3017,7 +3137,7 @@ async function pushBenchmark(argv, io) {
|
|
|
3017
3137
|
baseUrl,
|
|
3018
3138
|
benchmarkId: projectId,
|
|
3019
3139
|
remote: origin.remote,
|
|
3020
|
-
benchmark:
|
|
3140
|
+
benchmark: remoteProjectSummaryForOutput(remoteProject),
|
|
3021
3141
|
benchmarkName: source.spec.name,
|
|
3022
3142
|
visibility: visibility ?? "unchanged",
|
|
3023
3143
|
sourceFileCount: sourceFileCount(source),
|
|
@@ -3031,7 +3151,7 @@ async function pushBenchmark(argv, io) {
|
|
|
3031
3151
|
method: "PUT",
|
|
3032
3152
|
body: state,
|
|
3033
3153
|
}, baseUrl);
|
|
3034
|
-
const responseProject =
|
|
3154
|
+
const responseProject = remoteProjectSummaryFromState(response.state);
|
|
3035
3155
|
const publishedProject = await applyRequestedProjectVisibility({
|
|
3036
3156
|
baseUrl,
|
|
3037
3157
|
projectId: responseProject.id,
|
|
@@ -3079,7 +3199,7 @@ async function verifyLinkedPushDryRunTarget(args) {
|
|
|
3079
3199
|
}
|
|
3080
3200
|
return response.benchmark;
|
|
3081
3201
|
}
|
|
3082
|
-
function
|
|
3202
|
+
function remoteProjectSummaryForOutput(project) {
|
|
3083
3203
|
return {
|
|
3084
3204
|
...(project.id ? { id: project.id } : {}),
|
|
3085
3205
|
...(project.ownerUsername ? { ownerUsername: project.ownerUsername } : {}),
|
|
@@ -3089,12 +3209,12 @@ function hostedProjectSummaryForOutput(project) {
|
|
|
3089
3209
|
...(typeof project.starCount === "number" ? { starCount: project.starCount } : {}),
|
|
3090
3210
|
};
|
|
3091
3211
|
}
|
|
3092
|
-
async function
|
|
3212
|
+
async function createRemoteBenchmarkFromState(args) {
|
|
3093
3213
|
const result = await apiRequest("/api/workbench/benchmarks/state", {
|
|
3094
3214
|
method: "POST",
|
|
3095
3215
|
body: args.state,
|
|
3096
3216
|
}, args.baseUrl);
|
|
3097
|
-
const project =
|
|
3217
|
+
const project = remoteProjectSummaryFromState(result.state);
|
|
3098
3218
|
const applied = await acceptPushedProjectStateToLocal({
|
|
3099
3219
|
dir: args.dir,
|
|
3100
3220
|
baseUrl: args.baseUrl,
|
|
@@ -3168,7 +3288,7 @@ async function pullProject(argv, io) {
|
|
|
3168
3288
|
}
|
|
3169
3289
|
const dir = resolveDir(parsed);
|
|
3170
3290
|
const origin = await requireWorkbenchOrigin(dir);
|
|
3171
|
-
const baseUrl = await
|
|
3291
|
+
const baseUrl = await effectiveOriginBaseUrl(origin.baseUrl);
|
|
3172
3292
|
const remoteRef = parseOriginRemote(origin);
|
|
3173
3293
|
const state = await apiRequest(publicProjectStateApiPath(remoteRef), {}, baseUrl);
|
|
3174
3294
|
if (parsed.flags["dry-run"] === true) {
|
|
@@ -3228,7 +3348,7 @@ async function acceptPushedProjectStateToLocal(args) {
|
|
|
3228
3348
|
});
|
|
3229
3349
|
return { origin, runtime: runtime.stats };
|
|
3230
3350
|
}
|
|
3231
|
-
async function
|
|
3351
|
+
async function retryRemoteWorkflow(argv, io) {
|
|
3232
3352
|
const parsed = parseArgs(argv);
|
|
3233
3353
|
rejectUnknownFlags(parsed, new Set([
|
|
3234
3354
|
"dir",
|
|
@@ -3238,7 +3358,7 @@ async function retryHostedWorkflow(argv, io) {
|
|
|
3238
3358
|
"timeout-ms",
|
|
3239
3359
|
"json",
|
|
3240
3360
|
]));
|
|
3241
|
-
rejectUnexpectedPositionals(parsed, "workbench retry --
|
|
3361
|
+
rejectUnexpectedPositionals(parsed, "workbench retry --remote", 1);
|
|
3242
3362
|
const targetId = parsed.positionals[0];
|
|
3243
3363
|
if (!targetId) {
|
|
3244
3364
|
throw new UsageError("Missing required TARGET_ID.");
|
|
@@ -3247,8 +3367,8 @@ async function retryHostedWorkflow(argv, io) {
|
|
|
3247
3367
|
parsed.flags["timeout-ms"] !== undefined)) {
|
|
3248
3368
|
throw new UsageError("--interval-ms and --timeout-ms require --watch.");
|
|
3249
3369
|
}
|
|
3250
|
-
const target = await
|
|
3251
|
-
const retryTarget = await
|
|
3370
|
+
const target = await resolveRemoteTarget(parsed, { requireProjectIdentity: true });
|
|
3371
|
+
const retryTarget = await resolveRemoteRetryTarget(target, targetId);
|
|
3252
3372
|
const watchIntervalMs = parsed.flags.watch === true
|
|
3253
3373
|
? parsePositiveInt(parsed.flags["interval-ms"], 1000, "interval-ms")
|
|
3254
3374
|
: undefined;
|
|
@@ -3259,23 +3379,23 @@ async function retryHostedWorkflow(argv, io) {
|
|
|
3259
3379
|
method: "POST",
|
|
3260
3380
|
body: retryTarget.request,
|
|
3261
3381
|
}, target.baseUrl);
|
|
3262
|
-
const runTarget =
|
|
3382
|
+
const runTarget = remoteTargetForRunStartResponse(target, response);
|
|
3263
3383
|
const startedRun = withRunUrls(runTarget, response.run);
|
|
3264
3384
|
if (parsed.flags.watch === true) {
|
|
3265
3385
|
if (parsed.flags.json !== true) {
|
|
3266
|
-
io.stdout.write(`${
|
|
3386
|
+
io.stdout.write(`${formatRemoteRunStarted(startedRun, retryTarget.workflow).trimEnd()}\n${REMOTE_WATCH_LIFECYCLE_NOTE}\n`);
|
|
3267
3387
|
}
|
|
3268
|
-
const watched = await
|
|
3388
|
+
const watched = await watchRemoteRun({
|
|
3269
3389
|
parsed,
|
|
3270
3390
|
target: runTarget,
|
|
3271
3391
|
runId: response.run.id,
|
|
3272
3392
|
intervalMs: watchIntervalMs ?? 1000,
|
|
3273
3393
|
timeoutMs: watchTimeoutMs,
|
|
3274
3394
|
});
|
|
3275
|
-
const outputRun = withRunUrls(runTarget, await
|
|
3276
|
-
await
|
|
3395
|
+
const outputRun = withRunUrls(runTarget, await withRemoteRunFailureSummary(runTarget, watched));
|
|
3396
|
+
await tryImportTerminalRemoteProjectState({ target: runTarget, io });
|
|
3277
3397
|
const result = {
|
|
3278
|
-
ok:
|
|
3398
|
+
ok: remoteRunSucceeded(watched),
|
|
3279
3399
|
retried: {
|
|
3280
3400
|
id: retryTarget.sourceId,
|
|
3281
3401
|
kind: retryTarget.sourceKind,
|
|
@@ -3290,7 +3410,7 @@ async function retryHostedWorkflow(argv, io) {
|
|
|
3290
3410
|
...(outputRun.error ? { error: outputRun.error } : {}),
|
|
3291
3411
|
};
|
|
3292
3412
|
writeOutput(result, parsed, io, formatRetryCommandResult);
|
|
3293
|
-
return
|
|
3413
|
+
return remoteRunSucceeded(watched) ? 0 : 1;
|
|
3294
3414
|
}
|
|
3295
3415
|
const result = {
|
|
3296
3416
|
ok: true,
|
|
@@ -3308,20 +3428,20 @@ async function retryHostedWorkflow(argv, io) {
|
|
|
3308
3428
|
writeOutput(result, parsed, io, formatRetryCommandResult);
|
|
3309
3429
|
return 0;
|
|
3310
3430
|
}
|
|
3311
|
-
async function
|
|
3431
|
+
async function resolveRemoteRetryTarget(target, targetId) {
|
|
3312
3432
|
if (targetId.startsWith("eval_")) {
|
|
3313
|
-
return await
|
|
3433
|
+
return await resolveRemoteEvaluationRetryTarget(target, targetId);
|
|
3314
3434
|
}
|
|
3315
|
-
const detail = await
|
|
3435
|
+
const detail = await readRemoteRunDetail(target, targetId);
|
|
3316
3436
|
const run = detail.run;
|
|
3317
3437
|
if (run.status !== "finished") {
|
|
3318
3438
|
throw new UsageError(`Run ${run.id} is ${run.status}; wait for it to finish before retrying.`);
|
|
3319
3439
|
}
|
|
3320
|
-
if (!
|
|
3321
|
-
throw new UsageError(`Run ${run.id} did not fail; use workbench ${run.workflow ?? "eval"} --
|
|
3440
|
+
if (!remoteRunRecordFailed(run)) {
|
|
3441
|
+
throw new UsageError(`Run ${run.id} did not fail; use workbench ${run.workflow ?? "eval"} --remote to intentionally run it again.`);
|
|
3322
3442
|
}
|
|
3323
3443
|
if (run.workflow === "eval") {
|
|
3324
|
-
const candidateId =
|
|
3444
|
+
const candidateId = remoteRunEvaluationCandidateId(run, detail.jobs);
|
|
3325
3445
|
if (!candidateId) {
|
|
3326
3446
|
throw new UsageError(`Run ${run.id} has no candidate id to retry.`);
|
|
3327
3447
|
}
|
|
@@ -3330,17 +3450,18 @@ async function resolveHostedRetryTarget(target, targetId) {
|
|
|
3330
3450
|
sourceKind: "run",
|
|
3331
3451
|
workflow: "eval",
|
|
3332
3452
|
request: {
|
|
3453
|
+
schema: "workbench.remote.run.request.v1",
|
|
3333
3454
|
workflow: "eval",
|
|
3334
3455
|
samples: run.samples ?? 1,
|
|
3335
3456
|
candidateId,
|
|
3336
|
-
sourceYaml:
|
|
3457
|
+
sourceYaml: remoteRetrySourceYaml(run, run.id),
|
|
3337
3458
|
preserveActive: true,
|
|
3338
3459
|
...retrySampleSelectionFromJobs(detail.jobs),
|
|
3339
3460
|
},
|
|
3340
3461
|
};
|
|
3341
3462
|
}
|
|
3342
3463
|
if (run.workflow === "improve") {
|
|
3343
|
-
const baseCandidateId = stringValue(readRecord(run.
|
|
3464
|
+
const baseCandidateId = stringValue(readRecord(run.retry)?.baseCandidateId);
|
|
3344
3465
|
if (!baseCandidateId) {
|
|
3345
3466
|
throw new UsageError(`Run ${run.id} is missing its base candidate id.`);
|
|
3346
3467
|
}
|
|
@@ -3349,41 +3470,43 @@ async function resolveHostedRetryTarget(target, targetId) {
|
|
|
3349
3470
|
sourceKind: "run",
|
|
3350
3471
|
workflow: "improve",
|
|
3351
3472
|
request: {
|
|
3473
|
+
schema: "workbench.remote.run.request.v1",
|
|
3352
3474
|
workflow: "improve",
|
|
3353
3475
|
samples: run.samples ?? 1,
|
|
3354
3476
|
budget: run.budget ?? run.attemptsRequested ?? 1,
|
|
3355
3477
|
candidateId: baseCandidateId,
|
|
3356
|
-
sourceYaml:
|
|
3478
|
+
sourceYaml: remoteRetrySourceYaml(run, run.id),
|
|
3357
3479
|
preserveActive: true,
|
|
3358
3480
|
},
|
|
3359
3481
|
};
|
|
3360
3482
|
}
|
|
3361
3483
|
throw new UsageError(`Run ${run.id} has no retryable workflow.`);
|
|
3362
3484
|
}
|
|
3363
|
-
async function
|
|
3485
|
+
async function resolveRemoteEvaluationRetryTarget(target, evaluationId) {
|
|
3364
3486
|
const snapshot = await apiRequest(projectApiPath(target.projectId, "/workbench/snapshot"), {}, target.baseUrl);
|
|
3365
3487
|
const evaluation = snapshot.evaluations.find((entry) => entry.id === evaluationId);
|
|
3366
3488
|
if (!evaluation) {
|
|
3367
|
-
throw new UsageError(`
|
|
3489
|
+
throw new UsageError(`Remote evaluation not found: ${evaluationId}`);
|
|
3368
3490
|
}
|
|
3369
3491
|
const run = snapshot.runs.find((entry) => entry.id === evaluation.runId) ?? null;
|
|
3370
3492
|
if (!evaluationScorecardFailed(evaluation, run)) {
|
|
3371
|
-
throw new UsageError(`Evaluation ${evaluation.id} did not fail; use workbench eval --
|
|
3493
|
+
throw new UsageError(`Evaluation ${evaluation.id} did not fail; use workbench eval --remote to intentionally run it again.`);
|
|
3372
3494
|
}
|
|
3373
3495
|
if (!run) {
|
|
3374
3496
|
throw new UsageError(`Evaluation ${evaluation.id} is missing its run record.`);
|
|
3375
3497
|
}
|
|
3376
|
-
const detail = await
|
|
3498
|
+
const detail = await readRemoteRunDetail(target, run.id);
|
|
3377
3499
|
const detailedRun = detail.run;
|
|
3378
3500
|
return {
|
|
3379
3501
|
sourceId: evaluationId,
|
|
3380
3502
|
sourceKind: "evaluation",
|
|
3381
3503
|
workflow: "eval",
|
|
3382
3504
|
request: {
|
|
3505
|
+
schema: "workbench.remote.run.request.v1",
|
|
3383
3506
|
workflow: "eval",
|
|
3384
3507
|
samples: evaluation.sampleCount || detailedRun.samples || 1,
|
|
3385
3508
|
candidateId: evaluation.candidateId,
|
|
3386
|
-
sourceYaml:
|
|
3509
|
+
sourceYaml: remoteRetrySourceYaml(detailedRun, detailedRun.id),
|
|
3387
3510
|
preserveActive: true,
|
|
3388
3511
|
...retrySampleSelectionFromJobs(detail.jobs),
|
|
3389
3512
|
},
|
|
@@ -3392,7 +3515,7 @@ async function resolveHostedEvaluationRetryTarget(target, evaluationId) {
|
|
|
3392
3515
|
function retrySampleSelectionFromJobs(jobs) {
|
|
3393
3516
|
const selectedSamples = uniqueCaseSamplePairs(jobs
|
|
3394
3517
|
.filter((job) => job.status !== "succeeded" &&
|
|
3395
|
-
|
|
3518
|
+
readRunJobPurpose(job) === "attempt")
|
|
3396
3519
|
.map(caseSamplePairFromJob)
|
|
3397
3520
|
.filter((pair) => pair !== null));
|
|
3398
3521
|
return selectedSamples.length > 0
|
|
@@ -3407,10 +3530,10 @@ function uniqueCaseSamplePairs(pairs) {
|
|
|
3407
3530
|
return [...byKey.values()].sort((left, right) => left.caseId.localeCompare(right.caseId) ||
|
|
3408
3531
|
left.sampleIndex - right.sampleIndex);
|
|
3409
3532
|
}
|
|
3410
|
-
async function
|
|
3533
|
+
async function readRemoteRunDetail(target, runId) {
|
|
3411
3534
|
return await apiRequest(projectApiPath(target.projectId, `/runs/${encodeURIComponent(runId)}`), {}, target.baseUrl);
|
|
3412
3535
|
}
|
|
3413
|
-
async function
|
|
3536
|
+
async function tryImportTerminalRemoteProjectState(args) {
|
|
3414
3537
|
const origin = args.target.origin;
|
|
3415
3538
|
if (!origin || origin.projectId !== args.target.projectId) {
|
|
3416
3539
|
return;
|
|
@@ -3426,23 +3549,23 @@ async function tryImportTerminalHostedProjectState(args) {
|
|
|
3426
3549
|
});
|
|
3427
3550
|
}
|
|
3428
3551
|
catch (error) {
|
|
3429
|
-
args.io.stderr.write(`
|
|
3552
|
+
args.io.stderr.write(`Remote run finished, but local project state was not updated: ${errorMessage(error)}\n`);
|
|
3430
3553
|
}
|
|
3431
3554
|
}
|
|
3432
|
-
function
|
|
3433
|
-
const sourceYaml = stringValue(readRecord(run.
|
|
3555
|
+
function remoteRetrySourceYaml(run, runId) {
|
|
3556
|
+
const sourceYaml = stringValue(readRecord(run.retry)?.sourceYaml);
|
|
3434
3557
|
if (!sourceYaml) {
|
|
3435
3558
|
throw new UsageError(`Run ${runId} is missing its recorded source configuration.`);
|
|
3436
3559
|
}
|
|
3437
3560
|
return sourceYaml;
|
|
3438
3561
|
}
|
|
3439
|
-
function
|
|
3562
|
+
function remoteRunRecordFailed(run) {
|
|
3440
3563
|
return run.outcome === "error" ||
|
|
3441
3564
|
run.outcome === "cancelled" ||
|
|
3442
3565
|
(run.failedJobCount ?? 0) > 0 ||
|
|
3443
3566
|
Boolean(run.error);
|
|
3444
3567
|
}
|
|
3445
|
-
async function
|
|
3568
|
+
async function startRemoteWorkflow(workflow, argv, io) {
|
|
3446
3569
|
const parsed = parseArgs(argv);
|
|
3447
3570
|
const allowedFlags = new Set([
|
|
3448
3571
|
"dir",
|
|
@@ -3465,7 +3588,7 @@ async function startHostedWorkflow(workflow, argv, io) {
|
|
|
3465
3588
|
}
|
|
3466
3589
|
rejectUnknownFlags(parsed, allowedFlags);
|
|
3467
3590
|
if (parsed.positionals.length > 1) {
|
|
3468
|
-
throw new UsageError(`workbench ${workflow} --
|
|
3591
|
+
throw new UsageError(`workbench ${workflow} --remote accepts at most one source file or directory argument.`);
|
|
3469
3592
|
}
|
|
3470
3593
|
const sourceArg = resolveSourceDir(parsed);
|
|
3471
3594
|
const samples = parsePositiveInt(parsed.flags.samples, 1, "samples");
|
|
@@ -3480,13 +3603,13 @@ async function startHostedWorkflow(workflow, argv, io) {
|
|
|
3480
3603
|
const defaultProjectSource = await readLocalProjectSource(path.resolve(sourceArg));
|
|
3481
3604
|
const selectedRunIds = workflow === "eval"
|
|
3482
3605
|
? resolveCandidateRunSelection(defaultProjectSource, runsFlag)
|
|
3483
|
-
: [singleRequestedRunId(runsFlag, `workbench ${workflow} --
|
|
3606
|
+
: [singleRequestedRunId(runsFlag, `workbench ${workflow} --remote`) ?? defaultProjectSource.candidateRunId];
|
|
3484
3607
|
if (workflow === "eval" && selectedRunIds.length > 1) {
|
|
3485
3608
|
let failed = 0;
|
|
3486
3609
|
const results = [];
|
|
3487
3610
|
for (const runId of selectedRunIds) {
|
|
3488
3611
|
const captured = createCapturingIo(io);
|
|
3489
|
-
const code = await
|
|
3612
|
+
const code = await startRemoteWorkflow(workflow, remoteWorkflowArgsForRun({
|
|
3490
3613
|
parsed,
|
|
3491
3614
|
sourceDir: defaultProjectSource.dir,
|
|
3492
3615
|
runId,
|
|
@@ -3501,7 +3624,7 @@ async function startHostedWorkflow(workflow, argv, io) {
|
|
|
3501
3624
|
candidateRunIds: selectedRunIds,
|
|
3502
3625
|
failedRunCount: failed,
|
|
3503
3626
|
results,
|
|
3504
|
-
}, parsed, io, () => `Processed ${selectedRunIds.length}
|
|
3627
|
+
}, parsed, io, () => `Processed ${selectedRunIds.length} remote candidate run(s); ${failed} failed.`);
|
|
3505
3628
|
return failed === 0 ? 0 : 1;
|
|
3506
3629
|
}
|
|
3507
3630
|
const selectedCandidateId = workflow === "eval"
|
|
@@ -3509,12 +3632,14 @@ async function startHostedWorkflow(workflow, argv, io) {
|
|
|
3509
3632
|
: asOptionalString(parsed.flags.base);
|
|
3510
3633
|
const request = workflow === "improve"
|
|
3511
3634
|
? {
|
|
3635
|
+
schema: "workbench.remote.run.request.v1",
|
|
3512
3636
|
workflow,
|
|
3513
3637
|
budget,
|
|
3514
3638
|
samples,
|
|
3515
3639
|
...(selectedCandidateId ? { candidateId: selectedCandidateId } : {}),
|
|
3516
3640
|
}
|
|
3517
3641
|
: {
|
|
3642
|
+
schema: "workbench.remote.run.request.v1",
|
|
3518
3643
|
workflow,
|
|
3519
3644
|
samples,
|
|
3520
3645
|
...(selectedCandidateId ? { candidateId: selectedCandidateId } : {}),
|
|
@@ -3538,7 +3663,7 @@ async function startHostedWorkflow(workflow, argv, io) {
|
|
|
3538
3663
|
: undefined;
|
|
3539
3664
|
const dryRun = parsed.flags["dry-run"] === true;
|
|
3540
3665
|
if (dryRun) {
|
|
3541
|
-
const target = await
|
|
3666
|
+
const target = await resolveRemoteDryRunTarget(parsed, { sourceDir: projectSource.dir });
|
|
3542
3667
|
writeOutput({
|
|
3543
3668
|
ok: true,
|
|
3544
3669
|
dryRun: true,
|
|
@@ -3547,20 +3672,21 @@ async function startHostedWorkflow(workflow, argv, io) {
|
|
|
3547
3672
|
dir: target.dir,
|
|
3548
3673
|
baseUrl: target.baseUrl,
|
|
3549
3674
|
request,
|
|
3550
|
-
}, parsed, io, () => `Would start
|
|
3675
|
+
}, parsed, io, () => `Would start remote ${workflow} for ${target.projectRef}.`);
|
|
3551
3676
|
return 0;
|
|
3552
3677
|
}
|
|
3553
|
-
const target = await
|
|
3678
|
+
const target = await resolveRemoteTarget(parsed, {
|
|
3554
3679
|
requireProjectIdentity: true,
|
|
3555
3680
|
sourceDir: projectSource.dir,
|
|
3556
3681
|
});
|
|
3557
3682
|
if (workflow === "improve") {
|
|
3558
|
-
request.candidateId = await
|
|
3683
|
+
request.candidateId = await ensureRemoteImproveBaseCandidate({
|
|
3559
3684
|
parsed,
|
|
3560
3685
|
target,
|
|
3561
3686
|
samples: request.samples,
|
|
3562
3687
|
candidateId: selectedCandidateId,
|
|
3563
3688
|
sourceYaml: projectSource.specSource,
|
|
3689
|
+
candidateFiles: projectSource.candidateFiles,
|
|
3564
3690
|
adapterFiles: projectSource.adapterFiles,
|
|
3565
3691
|
intervalMs: watchIntervalMs ?? 1000,
|
|
3566
3692
|
timeoutMs: watchTimeoutMs,
|
|
@@ -3571,53 +3697,53 @@ async function startHostedWorkflow(workflow, argv, io) {
|
|
|
3571
3697
|
method: "POST",
|
|
3572
3698
|
body: request,
|
|
3573
3699
|
}, target.baseUrl);
|
|
3574
|
-
const runTarget =
|
|
3700
|
+
const runTarget = remoteTargetForRunStartResponse(target, response);
|
|
3575
3701
|
const startedRun = withRunUrls(runTarget, response.run);
|
|
3576
3702
|
const startedRunOutput = response.reused === true
|
|
3577
3703
|
? { ...startedRun, reused: true }
|
|
3578
3704
|
: startedRun;
|
|
3579
3705
|
if (response.reused === true && response.run.status === "finished") {
|
|
3580
|
-
await
|
|
3706
|
+
await tryImportTerminalRemoteProjectState({ target: runTarget, io });
|
|
3581
3707
|
writeOutput({
|
|
3582
|
-
ok:
|
|
3708
|
+
ok: remoteRunSucceeded(response.run),
|
|
3583
3709
|
reused: true,
|
|
3584
3710
|
workflow,
|
|
3585
3711
|
runId: startedRun.id,
|
|
3586
3712
|
...startedRun,
|
|
3587
|
-
}, parsed, io, () => `Reused
|
|
3588
|
-
return
|
|
3713
|
+
}, parsed, io, () => `Reused remote ${workflow} ${startedRun.id}. Use --rerun to intentionally run it again.`);
|
|
3714
|
+
return remoteRunSucceeded(response.run) ? 0 : 1;
|
|
3589
3715
|
}
|
|
3590
3716
|
if (parsed.flags.watch === true) {
|
|
3591
3717
|
if (parsed.flags.json !== true) {
|
|
3592
|
-
io.stdout.write(`${
|
|
3718
|
+
io.stdout.write(`${formatRemoteRunStarted(startedRun, workflow).trimEnd()}\n${REMOTE_WATCH_LIFECYCLE_NOTE}\n`);
|
|
3593
3719
|
}
|
|
3594
|
-
const watched = await
|
|
3720
|
+
const watched = await watchRemoteRun({
|
|
3595
3721
|
parsed,
|
|
3596
3722
|
target: runTarget,
|
|
3597
3723
|
runId: response.run.id,
|
|
3598
3724
|
intervalMs: watchIntervalMs ?? 1000,
|
|
3599
3725
|
timeoutMs: watchTimeoutMs,
|
|
3600
3726
|
});
|
|
3601
|
-
const outputRun = await
|
|
3602
|
-
await
|
|
3603
|
-
writeOutput(withRunUrls(runTarget, outputRun), parsed, io,
|
|
3604
|
-
return
|
|
3727
|
+
const outputRun = await withRemoteRunFailureSummary(runTarget, watched);
|
|
3728
|
+
await tryImportTerminalRemoteProjectState({ target: runTarget, io });
|
|
3729
|
+
writeOutput(withRunUrls(runTarget, outputRun), parsed, io, formatRemoteRunResult);
|
|
3730
|
+
return remoteRunSucceeded(watched) ? 0 : 1;
|
|
3605
3731
|
}
|
|
3606
|
-
writeOutput(startedRunOutput, parsed, io, (run) =>
|
|
3732
|
+
writeOutput(startedRunOutput, parsed, io, (run) => formatRemoteRunStarted(run, workflow).trimEnd());
|
|
3607
3733
|
return 0;
|
|
3608
3734
|
}
|
|
3609
|
-
async function
|
|
3735
|
+
async function ensureRemoteImproveBaseCandidate(args) {
|
|
3610
3736
|
if (args.candidateId) {
|
|
3611
|
-
const candidate = await
|
|
3737
|
+
const candidate = await readRemoteCandidateSummary(args.target, args.candidateId);
|
|
3612
3738
|
if (!candidate) {
|
|
3613
3739
|
throw new UsageError(`Base candidate ${args.candidateId} was not found for the current benchmark.`);
|
|
3614
3740
|
}
|
|
3615
|
-
if (
|
|
3741
|
+
if (remoteCandidateIsEvaluated(candidate)) {
|
|
3616
3742
|
return args.candidateId;
|
|
3617
3743
|
}
|
|
3618
3744
|
}
|
|
3619
3745
|
else {
|
|
3620
|
-
const activeCandidate = await
|
|
3746
|
+
const activeCandidate = await readEvaluatedActiveRemoteCandidate(args.target);
|
|
3621
3747
|
if (activeCandidate) {
|
|
3622
3748
|
return activeCandidate.id;
|
|
3623
3749
|
}
|
|
@@ -3625,31 +3751,33 @@ async function ensureHostedImproveBaseCandidate(args) {
|
|
|
3625
3751
|
const response = await apiRequest(projectApiPath(args.target.projectId, "/runs"), {
|
|
3626
3752
|
method: "POST",
|
|
3627
3753
|
body: {
|
|
3754
|
+
schema: "workbench.remote.run.request.v1",
|
|
3628
3755
|
workflow: "eval",
|
|
3629
3756
|
samples: args.samples,
|
|
3630
3757
|
...(args.candidateId ? { candidateId: args.candidateId } : {}),
|
|
3631
3758
|
sourceYaml: args.sourceYaml,
|
|
3759
|
+
...(args.candidateId ? {} : { candidateFiles: args.candidateFiles }),
|
|
3632
3760
|
...(args.adapterFiles.length > 0 ? { adapterFiles: args.adapterFiles } : {}),
|
|
3633
3761
|
},
|
|
3634
3762
|
}, args.target.baseUrl);
|
|
3635
|
-
const runTarget =
|
|
3636
|
-
const watched = await
|
|
3763
|
+
const runTarget = remoteTargetForRunStartResponse(args.target, response);
|
|
3764
|
+
const watched = await watchRemoteRun({
|
|
3637
3765
|
parsed: args.parsed,
|
|
3638
3766
|
target: runTarget,
|
|
3639
3767
|
runId: response.run.id,
|
|
3640
3768
|
intervalMs: args.intervalMs,
|
|
3641
3769
|
timeoutMs: args.timeoutMs,
|
|
3642
3770
|
});
|
|
3643
|
-
if (!
|
|
3771
|
+
if (!remoteRunSucceeded(watched)) {
|
|
3644
3772
|
throw new UsageError(`Parent candidate eval ${watched.id} failed; improve was not started.`);
|
|
3645
3773
|
}
|
|
3646
3774
|
if (!watched.candidateId) {
|
|
3647
3775
|
throw new UsageError(`Parent candidate eval ${watched.id} did not produce a candidate.`);
|
|
3648
3776
|
}
|
|
3649
|
-
await
|
|
3777
|
+
await tryImportTerminalRemoteProjectState({ target: runTarget, io: args.io });
|
|
3650
3778
|
return watched.candidateId;
|
|
3651
3779
|
}
|
|
3652
|
-
function
|
|
3780
|
+
function remoteWorkflowArgsForRun(args) {
|
|
3653
3781
|
const next = ["--dir", args.sourceDir, "--runs", args.runId, "--json"];
|
|
3654
3782
|
appendStringFlag(next, "benchmark", asOptionalString(args.parsed.flags.benchmark));
|
|
3655
3783
|
appendStringFlag(next, "candidate", asOptionalString(args.parsed.flags.candidate));
|
|
@@ -3672,27 +3800,27 @@ function appendStringFlag(args, name, value) {
|
|
|
3672
3800
|
args.push(`--${name}`, value);
|
|
3673
3801
|
}
|
|
3674
3802
|
}
|
|
3675
|
-
async function
|
|
3803
|
+
async function readRemoteCandidateSummary(target, candidateId) {
|
|
3676
3804
|
const response = await apiRequest(projectApiPath(target.projectId, "/candidates"), {}, target.baseUrl);
|
|
3677
3805
|
return response.candidates.find((entry) => entry.id === candidateId) ?? null;
|
|
3678
3806
|
}
|
|
3679
|
-
async function
|
|
3807
|
+
async function readEvaluatedActiveRemoteCandidate(target) {
|
|
3680
3808
|
const response = await apiRequest(projectApiPath(target.projectId), {}, target.baseUrl);
|
|
3681
3809
|
const activeCandidateId = response.benchmark.activeCandidateId;
|
|
3682
3810
|
if (!activeCandidateId) {
|
|
3683
3811
|
return null;
|
|
3684
3812
|
}
|
|
3685
|
-
const candidate = await
|
|
3686
|
-
return candidate &&
|
|
3813
|
+
const candidate = await readRemoteCandidateSummary(target, activeCandidateId);
|
|
3814
|
+
return candidate && remoteCandidateIsEvaluated(candidate) ? candidate : null;
|
|
3687
3815
|
}
|
|
3688
|
-
function
|
|
3816
|
+
function remoteCandidateIsEvaluated(candidate) {
|
|
3689
3817
|
return candidate.status === "evaluated" || candidate.eval != null;
|
|
3690
3818
|
}
|
|
3691
3819
|
async function openWorkbench(argv, io) {
|
|
3692
3820
|
const parsed = parseArgs(argv);
|
|
3693
3821
|
rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "no-open", "json"]));
|
|
3694
3822
|
if (parsed.positionals.length > 1) {
|
|
3695
|
-
throw new UsageError(`Unexpected argument for workbench open --
|
|
3823
|
+
throw new UsageError(`Unexpected argument for workbench open --remote: ${parsed.positionals.slice(1).join(" ")}`);
|
|
3696
3824
|
}
|
|
3697
3825
|
const target = await resolveOpenTarget(parsed);
|
|
3698
3826
|
const ref = target.openRef;
|
|
@@ -3722,7 +3850,7 @@ function buildWorkbenchWebUrl(target, ref) {
|
|
|
3722
3850
|
}
|
|
3723
3851
|
return buildWorkbenchResourceUrls(target, { candidateId: ref }).candidateEvaluation;
|
|
3724
3852
|
}
|
|
3725
|
-
async function
|
|
3853
|
+
async function resolveRemoteTarget(parsed, options = {}) {
|
|
3726
3854
|
if (options.sourceArg !== undefined && parsed.flags.dir !== undefined) {
|
|
3727
3855
|
throw new UsageError("Use either --dir or SOURCE, not both.");
|
|
3728
3856
|
}
|
|
@@ -3731,7 +3859,7 @@ async function resolveHostedTarget(parsed, options = {}) {
|
|
|
3731
3859
|
: resolveDir(parsed, options.sourceArg);
|
|
3732
3860
|
const origin = await readWorkbenchOrigin(dir);
|
|
3733
3861
|
const explicitProject = asOptionalString(parsed.flags.benchmark);
|
|
3734
|
-
const baseUrl = await
|
|
3862
|
+
const baseUrl = await effectiveOriginBaseUrl(origin?.baseUrl);
|
|
3735
3863
|
if (explicitProject && (!isRemoteProjectId(explicitProject) || options.requireProjectIdentity === true)) {
|
|
3736
3864
|
const project = await resolveRemoteProject(explicitProject, baseUrl);
|
|
3737
3865
|
return {
|
|
@@ -3745,7 +3873,7 @@ async function resolveHostedTarget(parsed, options = {}) {
|
|
|
3745
3873
|
}
|
|
3746
3874
|
const projectId = explicitProject ?? origin?.projectId;
|
|
3747
3875
|
if (!projectId) {
|
|
3748
|
-
throw new UsageError("Missing
|
|
3876
|
+
throw new UsageError("Missing remote benchmark. Run workbench push, workbench clone, or pass --benchmark OWNER/BENCHMARK.");
|
|
3749
3877
|
}
|
|
3750
3878
|
const originRemote = origin ? parseOriginRemote(origin) : null;
|
|
3751
3879
|
return {
|
|
@@ -3759,7 +3887,7 @@ async function resolveHostedTarget(parsed, options = {}) {
|
|
|
3759
3887
|
origin,
|
|
3760
3888
|
};
|
|
3761
3889
|
}
|
|
3762
|
-
async function
|
|
3890
|
+
async function resolveRemoteDryRunTarget(parsed, options = {}) {
|
|
3763
3891
|
if (options.sourceArg !== undefined && parsed.flags.dir !== undefined) {
|
|
3764
3892
|
throw new UsageError("Use either --dir or SOURCE, not both.");
|
|
3765
3893
|
}
|
|
@@ -3768,7 +3896,7 @@ async function resolveHostedDryRunTarget(parsed, options = {}) {
|
|
|
3768
3896
|
: resolveDir(parsed, options.sourceArg);
|
|
3769
3897
|
const origin = await readWorkbenchOrigin(dir);
|
|
3770
3898
|
const explicitProject = asOptionalString(parsed.flags.benchmark);
|
|
3771
|
-
const baseUrl = await
|
|
3899
|
+
const baseUrl = await effectiveOriginBaseUrl(origin?.baseUrl);
|
|
3772
3900
|
if (explicitProject) {
|
|
3773
3901
|
if (isRemoteProjectId(explicitProject)) {
|
|
3774
3902
|
return {
|
|
@@ -3801,7 +3929,7 @@ async function resolveHostedDryRunTarget(parsed, options = {}) {
|
|
|
3801
3929
|
origin,
|
|
3802
3930
|
};
|
|
3803
3931
|
}
|
|
3804
|
-
throw new UsageError("Missing
|
|
3932
|
+
throw new UsageError("Missing remote benchmark. Run workbench push, workbench clone, or pass --benchmark OWNER/BENCHMARK.");
|
|
3805
3933
|
}
|
|
3806
3934
|
async function resolveOpenTarget(parsed) {
|
|
3807
3935
|
const ref = parsed.positionals[0];
|
|
@@ -3830,7 +3958,7 @@ async function resolveOpenTarget(parsed) {
|
|
|
3830
3958
|
};
|
|
3831
3959
|
}
|
|
3832
3960
|
return {
|
|
3833
|
-
...(await
|
|
3961
|
+
...(await resolveRemoteTarget(parsed, { requireProjectIdentity: true })),
|
|
3834
3962
|
...(ref ? { openRef: ref } : {}),
|
|
3835
3963
|
};
|
|
3836
3964
|
}
|
|
@@ -3901,7 +4029,7 @@ function withRunUrls(target, run) {
|
|
|
3901
4029
|
}),
|
|
3902
4030
|
};
|
|
3903
4031
|
}
|
|
3904
|
-
function
|
|
4032
|
+
function remoteTargetForRunStartResponse(target, response) {
|
|
3905
4033
|
const projectId = response.benchmark?.id ?? response.run.projectId ?? target.projectId;
|
|
3906
4034
|
if (projectId === target.projectId && !response.benchmark) {
|
|
3907
4035
|
return target;
|
|
@@ -3926,7 +4054,7 @@ function hostedTargetForRunStartResponse(target, response) {
|
|
|
3926
4054
|
}
|
|
3927
4055
|
return next;
|
|
3928
4056
|
}
|
|
3929
|
-
function
|
|
4057
|
+
function remoteRunEvaluationCandidateId(run, jobs = []) {
|
|
3930
4058
|
if (run.outputCandidateId) {
|
|
3931
4059
|
return run.outputCandidateId;
|
|
3932
4060
|
}
|
|
@@ -3986,7 +4114,7 @@ function runtimeBundleForProjectVisibility(runtime, visibility) {
|
|
|
3986
4114
|
};
|
|
3987
4115
|
}
|
|
3988
4116
|
function localProjectStateSource(source) {
|
|
3989
|
-
const request =
|
|
4117
|
+
const request = remoteProjectSourceRequest(source);
|
|
3990
4118
|
const stateSource = {
|
|
3991
4119
|
source: request.source,
|
|
3992
4120
|
files: source.sourceFiles.map((file) => ({ ...file })),
|
|
@@ -4014,7 +4142,7 @@ function toSurfaceSnapshotFile(file) {
|
|
|
4014
4142
|
executable: file.executable === true,
|
|
4015
4143
|
};
|
|
4016
4144
|
}
|
|
4017
|
-
function
|
|
4145
|
+
function remoteProjectSummaryFromState(state) {
|
|
4018
4146
|
return {
|
|
4019
4147
|
id: state.project.id,
|
|
4020
4148
|
ownerUsername: state.project.ownerUsername,
|
|
@@ -4025,12 +4153,12 @@ function hostedProjectSummaryFromState(state) {
|
|
|
4025
4153
|
function sourceFileCount(source) {
|
|
4026
4154
|
return source.sourceFiles.length;
|
|
4027
4155
|
}
|
|
4028
|
-
function
|
|
4029
|
-
const { network, resources } =
|
|
4156
|
+
function remoteProjectSourceRequest(source) {
|
|
4157
|
+
const { network, resources } = remoteEnvironmentOptions(source);
|
|
4030
4158
|
return {
|
|
4031
4159
|
source: source.specSource,
|
|
4032
4160
|
candidateFiles: source.candidateFiles,
|
|
4033
|
-
engineResolveFiles:
|
|
4161
|
+
engineResolveFiles: remoteEngineResolveFiles(source),
|
|
4034
4162
|
engineResolveBinding: engineResolveBindingForSpec(source.spec),
|
|
4035
4163
|
adapterFiles: source.adapterFiles,
|
|
4036
4164
|
dockerfile: source.dockerfile,
|
|
@@ -4043,7 +4171,7 @@ function hostedProjectSourceRequest(source) {
|
|
|
4043
4171
|
function isRemoteProjectId(value) {
|
|
4044
4172
|
return /^wb_[a-f0-9]{12}$/u.test(value);
|
|
4045
4173
|
}
|
|
4046
|
-
function
|
|
4174
|
+
function remoteEnvironmentOptions(source) {
|
|
4047
4175
|
return {
|
|
4048
4176
|
network: source.spec.environment.network?.egress === "open"
|
|
4049
4177
|
? "on"
|
|
@@ -4051,7 +4179,7 @@ function hostedEnvironmentOptions(source) {
|
|
|
4051
4179
|
resources: runtimeResources(source.spec.environment),
|
|
4052
4180
|
};
|
|
4053
4181
|
}
|
|
4054
|
-
async function
|
|
4182
|
+
async function watchRemoteRun(args) {
|
|
4055
4183
|
const deadline = args.timeoutMs === undefined ? undefined : Date.now() + args.timeoutMs;
|
|
4056
4184
|
let lastRun = null;
|
|
4057
4185
|
while (true) {
|
|
@@ -4079,7 +4207,7 @@ async function watchHostedRun(args) {
|
|
|
4079
4207
|
await sleep(args.intervalMs);
|
|
4080
4208
|
}
|
|
4081
4209
|
}
|
|
4082
|
-
function
|
|
4210
|
+
function formatRemoteRunResult(run) {
|
|
4083
4211
|
const candidateId = run.outputCandidateId ?? run.candidateId;
|
|
4084
4212
|
const activeDetail = run.activeCandidateId && candidateId && run.activeCandidateId !== candidateId
|
|
4085
4213
|
? `; active ${run.activeCandidateId}`
|
|
@@ -4097,7 +4225,7 @@ function formatRetryCommandResult(result) {
|
|
|
4097
4225
|
const runId = run?.id ?? result.runId ?? "unknown";
|
|
4098
4226
|
const scope = `${result.retried.kind} ${result.retried.id}`;
|
|
4099
4227
|
const verb = run
|
|
4100
|
-
? run.status === "finished" ? "finished as
|
|
4228
|
+
? run.status === "finished" ? "finished as remote run" : "started as remote run"
|
|
4101
4229
|
: "finished as local run";
|
|
4102
4230
|
return [
|
|
4103
4231
|
`Retry of ${scope} ${verb} ${runId}.`,
|
|
@@ -4113,7 +4241,7 @@ function formatRetryCommandResult(result) {
|
|
|
4113
4241
|
: result.urls?.benchmark ? [`Open benchmark: ${result.urls.benchmark}`] : []),
|
|
4114
4242
|
].join("\n");
|
|
4115
4243
|
}
|
|
4116
|
-
function
|
|
4244
|
+
function formatRemoteRunStarted(run, fallbackWorkflow) {
|
|
4117
4245
|
const candidateId = run.outputCandidateId ?? run.candidateId;
|
|
4118
4246
|
return [
|
|
4119
4247
|
`Started ${run.workflow ?? fallbackWorkflow} run ${run.id}; ${candidateId ? `candidate ${candidateId}` : `${run.jobCount ?? 0} jobs queued`}.`,
|
|
@@ -4124,10 +4252,7 @@ function formatHostedRunStarted(run, fallbackWorkflow) {
|
|
|
4124
4252
|
].join("\n");
|
|
4125
4253
|
}
|
|
4126
4254
|
function readRunJobPurpose(job) {
|
|
4127
|
-
|
|
4128
|
-
const execution = readRecord(input?.execution);
|
|
4129
|
-
const purpose = execution?.purpose;
|
|
4130
|
-
return typeof purpose === "string" && purpose ? purpose : null;
|
|
4255
|
+
return job.purpose && job.purpose.trim() ? job.purpose : null;
|
|
4131
4256
|
}
|
|
4132
4257
|
function readRecord(value) {
|
|
4133
4258
|
return value && typeof value === "object" && !Array.isArray(value)
|
|
@@ -4146,24 +4271,24 @@ function integerValue(value) {
|
|
|
4146
4271
|
function readFiniteNumber(value) {
|
|
4147
4272
|
return typeof value === "number" && Number.isFinite(value) ? value : null;
|
|
4148
4273
|
}
|
|
4149
|
-
async function
|
|
4150
|
-
if (
|
|
4274
|
+
async function withRemoteRunFailureSummary(target, run) {
|
|
4275
|
+
if (remoteRunSucceeded(run) || run.error || (run.failedJobCount ?? 0) <= 0) {
|
|
4151
4276
|
return run;
|
|
4152
4277
|
}
|
|
4153
|
-
const error = await
|
|
4278
|
+
const error = await readRemoteRunFailureSummary(target, run.id);
|
|
4154
4279
|
return error ? { ...run, error } : run;
|
|
4155
4280
|
}
|
|
4156
|
-
async function
|
|
4281
|
+
async function readRemoteRunFailureSummary(target, runId) {
|
|
4157
4282
|
try {
|
|
4158
|
-
const
|
|
4159
|
-
const failed =
|
|
4283
|
+
const detail = await readRemoteRunDetail(target, runId);
|
|
4284
|
+
const failed = detail.jobs.find((job) => job.status === "failed" && job.error);
|
|
4160
4285
|
return failed?.error ? `First failed job ${failed.id}: ${failed.error}` : null;
|
|
4161
4286
|
}
|
|
4162
4287
|
catch {
|
|
4163
4288
|
return null;
|
|
4164
4289
|
}
|
|
4165
4290
|
}
|
|
4166
|
-
function
|
|
4291
|
+
function remoteRunSucceeded(run) {
|
|
4167
4292
|
if (run.status !== "finished") {
|
|
4168
4293
|
return false;
|
|
4169
4294
|
}
|
|
@@ -4179,16 +4304,6 @@ async function readWorkbenchOrigin(dir) {
|
|
|
4179
4304
|
throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
|
|
4180
4305
|
}
|
|
4181
4306
|
const originRecord = parsed;
|
|
4182
|
-
const keys = Object.keys(originRecord).sort();
|
|
4183
|
-
const expectedKeys = [
|
|
4184
|
-
"baseUrl",
|
|
4185
|
-
"linkedAt",
|
|
4186
|
-
"projectId",
|
|
4187
|
-
"remote",
|
|
4188
|
-
"runtimeFingerprint",
|
|
4189
|
-
"sourceFingerprint",
|
|
4190
|
-
"sourceRevisionId",
|
|
4191
|
-
];
|
|
4192
4307
|
if (typeof originRecord.projectId !== "string" ||
|
|
4193
4308
|
typeof originRecord.baseUrl !== "string" ||
|
|
4194
4309
|
typeof originRecord.remote !== "string" ||
|
|
@@ -4202,9 +4317,6 @@ async function readWorkbenchOrigin(dir) {
|
|
|
4202
4317
|
originRecord.runtimeFingerprint.length === 0) {
|
|
4203
4318
|
throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
|
|
4204
4319
|
}
|
|
4205
|
-
if (JSON.stringify(keys) !== JSON.stringify(expectedKeys)) {
|
|
4206
|
-
throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
|
|
4207
|
-
}
|
|
4208
4320
|
return {
|
|
4209
4321
|
baseUrl: normalizeBaseUrl(originRecord.baseUrl),
|
|
4210
4322
|
remote: normalizeOriginRemote(originRecord.remote),
|
|
@@ -4254,7 +4366,7 @@ async function writeWorkbenchOriginFromState(dir, args) {
|
|
|
4254
4366
|
const runtimeFingerprint = args.state.base.runtimeFingerprint ??
|
|
4255
4367
|
workbenchRuntimeBundleFingerprint(args.state.runtime);
|
|
4256
4368
|
if (!sourceRevisionId || !sourceFingerprint || !runtimeFingerprint) {
|
|
4257
|
-
throw new UsageError("
|
|
4369
|
+
throw new UsageError("Remote project state is missing required origin metadata.");
|
|
4258
4370
|
}
|
|
4259
4371
|
return await writeWorkbenchOrigin(dir, {
|
|
4260
4372
|
baseUrl: args.baseUrl,
|
|
@@ -4290,18 +4402,29 @@ function originRemoteUrlParts(origin) {
|
|
|
4290
4402
|
function workbenchOriginPath(dir) {
|
|
4291
4403
|
return path.join(dir, ".workbench", "origin.json");
|
|
4292
4404
|
}
|
|
4293
|
-
async function effectiveBaseUrl(
|
|
4405
|
+
async function effectiveBaseUrl() {
|
|
4294
4406
|
const config = await loadConfig();
|
|
4295
|
-
return
|
|
4296
|
-
|
|
4297
|
-
|
|
4407
|
+
return selectWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
|
|
4408
|
+
}
|
|
4409
|
+
async function effectiveOriginBaseUrl(originBaseUrl) {
|
|
4410
|
+
const config = await loadConfig();
|
|
4411
|
+
return selectWorkbenchBaseUrl({
|
|
4412
|
+
originBaseUrl,
|
|
4413
|
+
configBaseUrl: config.baseUrl,
|
|
4414
|
+
});
|
|
4415
|
+
}
|
|
4416
|
+
function selectWorkbenchBaseUrl(input = {}) {
|
|
4417
|
+
return normalizeBaseUrl(input.explicitBaseUrl ??
|
|
4418
|
+
input.originBaseUrl ??
|
|
4419
|
+
process.env.WORKBENCH_API_URL ??
|
|
4420
|
+
input.configBaseUrl ??
|
|
4298
4421
|
DEFAULT_BASE_URL);
|
|
4299
4422
|
}
|
|
4300
4423
|
async function readWorkbenchProfileStatus(config) {
|
|
4301
4424
|
if (!config.accessToken) {
|
|
4302
4425
|
return { authenticated: false, profile: null };
|
|
4303
4426
|
}
|
|
4304
|
-
const baseUrl =
|
|
4427
|
+
const baseUrl = selectWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
|
|
4305
4428
|
try {
|
|
4306
4429
|
const response = await fetch(`${baseUrl}/api/workbench/profile`, {
|
|
4307
4430
|
headers: {
|
|
@@ -4327,10 +4450,9 @@ async function readWorkbenchProfileStatus(config) {
|
|
|
4327
4450
|
}
|
|
4328
4451
|
async function apiRequest(apiPath, options = {}, baseUrlOverride) {
|
|
4329
4452
|
const config = await loadConfig();
|
|
4330
|
-
const baseUrl =
|
|
4331
|
-
|
|
4332
|
-
config.baseUrl
|
|
4333
|
-
DEFAULT_BASE_URL);
|
|
4453
|
+
const baseUrl = baseUrlOverride !== undefined
|
|
4454
|
+
? normalizeBaseUrl(baseUrlOverride)
|
|
4455
|
+
: selectWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
|
|
4334
4456
|
const method = options.method ?? "GET";
|
|
4335
4457
|
const canRetry = method === "GET";
|
|
4336
4458
|
let lastError = null;
|
|
@@ -4868,6 +4990,24 @@ function formatMetricValue(value) {
|
|
|
4868
4990
|
}
|
|
4869
4991
|
return value.toFixed(2);
|
|
4870
4992
|
}
|
|
4993
|
+
function formatNullableMetric(value) {
|
|
4994
|
+
return typeof value === "number" && Number.isFinite(value)
|
|
4995
|
+
? formatMetricValue(value)
|
|
4996
|
+
: "n/a";
|
|
4997
|
+
}
|
|
4998
|
+
function formatFailureLine(failure) {
|
|
4999
|
+
return [
|
|
5000
|
+
failure.kind,
|
|
5001
|
+
failure.id,
|
|
5002
|
+
failure.status ?? "failed",
|
|
5003
|
+
failure.runId ? `run=${failure.runId}` : null,
|
|
5004
|
+
failure.candidateId ? `candidate=${failure.candidateId}` : null,
|
|
5005
|
+
failure.jobId ? `job=${failure.jobId}` : null,
|
|
5006
|
+
failure.caseId ? `case=${failure.caseId}` : null,
|
|
5007
|
+
typeof failure.sampleIndex === "number" ? `sample=${failure.sampleIndex}` : null,
|
|
5008
|
+
failure.error ?? null,
|
|
5009
|
+
].filter(Boolean).join("\t");
|
|
5010
|
+
}
|
|
4871
5011
|
function resolveDir(parsed, positionalDir) {
|
|
4872
5012
|
const resolved = path.resolve(asOptionalString(parsed.flags.dir) ?? positionalDir ?? process.cwd());
|
|
4873
5013
|
return isWorkbenchSourceYamlPath(resolved) ? path.dirname(resolved) : resolved;
|