@workbench-ai/workbench 0.0.64 → 0.0.66
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark-fingerprint.js +2 -2
- package/dist/command-model.d.ts +1 -1
- package/dist/command-model.d.ts.map +1 -1
- package/dist/command-model.js +106 -35
- package/dist/dev-open/client.js +109 -109
- package/dist/dev-open-server.d.ts +2 -37
- package/dist/dev-open-server.d.ts.map +1 -1
- package/dist/dev-open-server.js +39 -322
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +364 -263
- package/dist/local-archive.d.ts +4 -4
- package/dist/local-archive.d.ts.map +1 -1
- package/dist/local-archive.js +4 -90
- package/dist/local-inspection.d.ts +9 -0
- package/dist/local-inspection.d.ts.map +1 -0
- package/dist/local-inspection.js +317 -0
- package/dist/project-source.d.ts +6 -6
- package/dist/project-source.js +6 -6
- package/package.json +9 -4
package/dist/index.js
CHANGED
|
@@ -5,17 +5,18 @@ import { createRequire } from "node:module";
|
|
|
5
5
|
import os from "node:os";
|
|
6
6
|
import path from "node:path";
|
|
7
7
|
import { Writable } from "node:stream";
|
|
8
|
-
import {
|
|
8
|
+
import { createBaselineCandidateJob as createRuntimeBaselineCandidateJob, evaluationScorecardId, evaluationMeanMetrics, executeWorkbenchExecutionJob, engineResolveBindingForSpec, filterOptimizerTraceJobsForCaseIds, filterCandidateSourceFiles, formatWorkbenchCaseSelector, formatWorkbenchSelectionPolicy, workbenchCaseSelectorUsesAllCases, workbenchExecutionPurpose, workbenchRunExecutionFingerprint, createWorkbenchAdapterAuthBundle, createOptimizerTraceInputFiles, DOCKER_SANDBOX_BACKEND, localWorkbenchAdapterAuthStore, materializeWorkbenchRunResult, normalizeSurfaceFiles, isSurfaceSnapshotFile, jsonRecord, planWorkbenchExecutionJobsForPurpose, runWorkbenchExecutionDag, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, runtimeResources, validateWorkbenchRunEnvelope, parseWorkbenchAdapterAuthTarget, workbenchEngineCaseIdsForImproveEvaluation, workbenchEngineCaseIdsForSelector, workbenchImproveOptimizeSelector, workbenchImproveSelectionPolicy, workbenchProjectSourceFingerprint, workbenchRuntimeBundleFingerprint, workbenchRuntimeExplicitActiveId, } from "@workbench-ai/workbench-core";
|
|
9
9
|
import { assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterAuthRequirements, normalizeWorkbenchAdapterOperationRequest, readWorkbenchAdapterOperationResult, workbenchAdapterOperationCommand, workbenchAdapterOperationResultPath, withDefaultWorkbenchAdapterAuthProfiles as applyDefaultWorkbenchAdapterAuthProfiles, } from "@workbench-ai/workbench-protocol";
|
|
10
10
|
import { builtinLocalTraceAdapter, builtinLocalTraceAdapters, sortLocalTraceRefs, } from "@workbench-ai/workbench-built-in-adapters/local-traces";
|
|
11
|
-
import { commandUsage,
|
|
11
|
+
import { commandUsage, REMOTE_WATCH_LIFECYCLE_NOTE, LOCAL_DEV_OPEN_LIFECYCLE_NOTE, rootUsage, } from "./command-model.js";
|
|
12
12
|
import { startLocalWorkbenchDevServer } from "./dev-open-server.js";
|
|
13
|
+
import { createLocalWorkbenchInspection } from "./local-inspection.js";
|
|
13
14
|
import { createWorkbenchInitScaffold, } from "./init-scaffold.js";
|
|
14
15
|
import { defaultAdapterManifests, composeRuntimeDockerfileWithAdapters, resolveDefaultWorkbenchAdapter, resolveProjectAdapterSource, resolveWorkbenchAdaptersForProject, WORKBENCH_ADAPTER_MANIFEST_FILE, } from "./adapter-project.js";
|
|
15
16
|
import { createAdapterCommandEnv } from "./adapter-command-env.js";
|
|
16
17
|
import { loadLocalArchive, loadLocalArchiveIndex, exportLocalRuntimeBundle, importLocalRuntimeBundle, runtimeBundleStats, materializeCandidateRoot, readLocalCandidate, readLocalCandidateFiles, readLocalJobs, saveLocalArchive, saveLocalJobs, setLocalActive, upsertLocalRun, upsertLocalCandidate, upsertLocalEvaluation, } from "./local-archive.js";
|
|
17
18
|
import { WorkspaceSnapshotError, } from "./workspace-snapshot.js";
|
|
18
|
-
import {
|
|
19
|
+
import { remoteEngineResolveFiles, readLocalProjectSource, WORKBENCH_BENCHMARK_FILE, } from "./project-source.js";
|
|
19
20
|
import { localBenchmarkFingerprint, localCandidateFingerprint, projectStateBenchmarkFingerprint, } from "./benchmark-fingerprint.js";
|
|
20
21
|
const require = createRequire(import.meta.url);
|
|
21
22
|
function getCliVersion() {
|
|
@@ -34,6 +35,30 @@ class WorkbenchApiRequestError extends Error {
|
|
|
34
35
|
}
|
|
35
36
|
const API_REQUEST_MAX_ATTEMPTS = 3;
|
|
36
37
|
const DEFAULT_BASE_URL = "https://v2.workbench.ai";
|
|
38
|
+
const AUTH_COMMAND_HANDLERS = {
|
|
39
|
+
connect: authConnect,
|
|
40
|
+
disconnect: authDisconnect,
|
|
41
|
+
};
|
|
42
|
+
const ADAPTERS_COMMAND_HANDLERS = {
|
|
43
|
+
create: adaptersCreate,
|
|
44
|
+
inspect: adaptersInspect,
|
|
45
|
+
list: adaptersList,
|
|
46
|
+
test: adaptersTest,
|
|
47
|
+
};
|
|
48
|
+
const TRACES_COMMAND_HANDLERS = {
|
|
49
|
+
collect: localTraceCollect,
|
|
50
|
+
list: localTraceList,
|
|
51
|
+
show: localTraceShow,
|
|
52
|
+
};
|
|
53
|
+
const TWO_SEGMENT_HELP_COMMANDS = {
|
|
54
|
+
adapters: ["create", "list", "inspect", "test"],
|
|
55
|
+
auth: [],
|
|
56
|
+
candidates: ["list", "show", "files", "preview"],
|
|
57
|
+
evaluations: ["list", "show"],
|
|
58
|
+
executions: ["trace"],
|
|
59
|
+
runs: ["list", "show"],
|
|
60
|
+
traces: ["collect", "list", "show"],
|
|
61
|
+
};
|
|
37
62
|
export async function runCli(argv, io = {
|
|
38
63
|
stdin: process.stdin,
|
|
39
64
|
stdout: process.stdout,
|
|
@@ -82,31 +107,31 @@ export async function runCli(argv, io = {
|
|
|
82
107
|
return await pushBenchmark(argv.slice(1), io);
|
|
83
108
|
}
|
|
84
109
|
if (argv[0] === "eval") {
|
|
85
|
-
const
|
|
86
|
-
return
|
|
87
|
-
? await
|
|
88
|
-
: await localEvaluateCandidate(
|
|
110
|
+
const remote = extractRemoteFlag(argv.slice(1));
|
|
111
|
+
return remote.enabled
|
|
112
|
+
? await startRemoteWorkflow("eval", remote.argv, io)
|
|
113
|
+
: await localEvaluateCandidate(remote.argv, io, runtimeOptions);
|
|
89
114
|
}
|
|
90
115
|
if (argv[0] === "retry") {
|
|
91
|
-
const
|
|
92
|
-
return
|
|
93
|
-
? await
|
|
94
|
-
: await localRetry(
|
|
116
|
+
const remote = extractRemoteFlag(argv.slice(1));
|
|
117
|
+
return remote.enabled
|
|
118
|
+
? await retryRemoteWorkflow(remote.argv, io)
|
|
119
|
+
: await localRetry(remote.argv, io, runtimeOptions);
|
|
95
120
|
}
|
|
96
121
|
if (argv[0] === "improve") {
|
|
97
|
-
const
|
|
98
|
-
return
|
|
99
|
-
? await
|
|
100
|
-
: await localRun(
|
|
122
|
+
const remote = extractRemoteFlag(argv.slice(1));
|
|
123
|
+
return remote.enabled
|
|
124
|
+
? await startRemoteWorkflow("improve", remote.argv, io)
|
|
125
|
+
: await localRun(remote.argv, io, runtimeOptions);
|
|
101
126
|
}
|
|
102
127
|
if (argv[0] === "restore") {
|
|
103
128
|
return await localRestore(argv.slice(1), io);
|
|
104
129
|
}
|
|
105
130
|
if (argv[0] === "open") {
|
|
106
|
-
const
|
|
107
|
-
return
|
|
108
|
-
? await openWorkbench(
|
|
109
|
-
: await localDevOpen(
|
|
131
|
+
const remote = extractRemoteFlag(argv.slice(1));
|
|
132
|
+
return remote.enabled
|
|
133
|
+
? await openWorkbench(remote.argv, io)
|
|
134
|
+
: await localDevOpen(remote.argv, io);
|
|
110
135
|
}
|
|
111
136
|
if (argv[0] === "auth") {
|
|
112
137
|
return await runAuthCommand(argv.slice(1), io);
|
|
@@ -117,6 +142,9 @@ export async function runCli(argv, io = {
|
|
|
117
142
|
if (argv[0] === "traces") {
|
|
118
143
|
return await runTracesCommand(argv.slice(1), io);
|
|
119
144
|
}
|
|
145
|
+
if (argv[0] === "diagnose") {
|
|
146
|
+
return await localDiagnose(argv.slice(1), io);
|
|
147
|
+
}
|
|
120
148
|
const commandPath = argv.slice(0, 2).join(" ");
|
|
121
149
|
const rest = argv.slice(2);
|
|
122
150
|
switch (commandPath) {
|
|
@@ -124,6 +152,12 @@ export async function runCli(argv, io = {
|
|
|
124
152
|
return await localRunList(rest, io);
|
|
125
153
|
case "runs show":
|
|
126
154
|
return await localRunShow(rest, io);
|
|
155
|
+
case "evaluations list":
|
|
156
|
+
return await localEvaluationList(rest, io);
|
|
157
|
+
case "evaluations show":
|
|
158
|
+
return await localEvaluationShow(rest, io);
|
|
159
|
+
case "executions trace":
|
|
160
|
+
return await localExecutionTrace(rest, io);
|
|
127
161
|
case "candidates list":
|
|
128
162
|
return await localCandidateList(rest, io);
|
|
129
163
|
case "candidates show":
|
|
@@ -152,32 +186,18 @@ export async function runCli(argv, io = {
|
|
|
152
186
|
}
|
|
153
187
|
function commandPathForHelp(argv) {
|
|
154
188
|
const positionals = argv.filter((arg) => arg !== "--help" && arg !== "-h" && !arg.startsWith("--"));
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
}
|
|
159
|
-
if (positionals[0] === "traces" &&
|
|
160
|
-
["collect", "list", "show"].includes(positionals[1] ?? "")) {
|
|
161
|
-
return positionals.slice(0, 2).join(" ");
|
|
162
|
-
}
|
|
163
|
-
if (positionals[0] === "auth") {
|
|
164
|
-
return positionals.slice(0, 2).join(" ");
|
|
165
|
-
}
|
|
166
|
-
if (positionals[0] === "runs" &&
|
|
167
|
-
["list", "show"].includes(positionals[1] ?? "")) {
|
|
168
|
-
return positionals.slice(0, 2).join(" ");
|
|
169
|
-
}
|
|
170
|
-
if (positionals[0] === "candidates" &&
|
|
171
|
-
["list", "show", "files", "preview"].includes(positionals[1] ?? "")) {
|
|
189
|
+
const command = positionals[0] ?? "";
|
|
190
|
+
const subcommands = TWO_SEGMENT_HELP_COMMANDS[command];
|
|
191
|
+
if (subcommands && (subcommands.length === 0 || subcommands.includes(positionals[1] ?? ""))) {
|
|
172
192
|
return positionals.slice(0, 2).join(" ");
|
|
173
193
|
}
|
|
174
|
-
return
|
|
194
|
+
return command;
|
|
175
195
|
}
|
|
176
|
-
function
|
|
196
|
+
function extractRemoteFlag(argv) {
|
|
177
197
|
let enabled = false;
|
|
178
198
|
const next = [];
|
|
179
199
|
for (const arg of argv) {
|
|
180
|
-
if (arg === "--
|
|
200
|
+
if (arg === "--remote") {
|
|
181
201
|
enabled = true;
|
|
182
202
|
}
|
|
183
203
|
else {
|
|
@@ -1498,6 +1518,9 @@ function latestCompletedAttemptJobsByPair(jobs, desiredKeys) {
|
|
|
1498
1518
|
return byPair;
|
|
1499
1519
|
}
|
|
1500
1520
|
function caseSamplePairFromJob(job) {
|
|
1521
|
+
if (job.caseId && Number.isSafeInteger(job.sampleIndex) && job.sampleIndex >= 0) {
|
|
1522
|
+
return { caseId: job.caseId, sampleIndex: job.sampleIndex };
|
|
1523
|
+
}
|
|
1501
1524
|
const input = readRecord(job.input);
|
|
1502
1525
|
const execution = readRecord(input?.execution);
|
|
1503
1526
|
const metadata = readRecord(execution?.metadata);
|
|
@@ -1593,7 +1616,7 @@ function resolveProjectPath(root, filePath) {
|
|
|
1593
1616
|
}
|
|
1594
1617
|
async function executeLocalDevelopmentJob(args) {
|
|
1595
1618
|
return await executeWorkbenchExecutionJob(args, {
|
|
1596
|
-
|
|
1619
|
+
sandboxBackend: DOCKER_SANDBOX_BACKEND,
|
|
1597
1620
|
loadLocalAdapterAuthProfiles: true,
|
|
1598
1621
|
});
|
|
1599
1622
|
}
|
|
@@ -1604,7 +1627,7 @@ async function executeLocalDevelopmentDag(args) {
|
|
|
1604
1627
|
const result = await runWorkbenchExecutionDag({
|
|
1605
1628
|
jobs: args.jobs,
|
|
1606
1629
|
capacity: args.capacity,
|
|
1607
|
-
|
|
1630
|
+
sandboxBackend: DOCKER_SANDBOX_BACKEND,
|
|
1608
1631
|
executeJob: async (job) => {
|
|
1609
1632
|
return await executeLocalDevelopmentJob({
|
|
1610
1633
|
job,
|
|
@@ -1802,11 +1825,16 @@ async function localRestore(argv, io) {
|
|
|
1802
1825
|
writeOutput({ ok: true, activeCandidateId: candidateId, changedPaths }, parsed, io, () => `Restored ${candidateId} to ${candidateRoot}.`);
|
|
1803
1826
|
return 0;
|
|
1804
1827
|
}
|
|
1828
|
+
function localInspectionFromParsed(parsed) {
|
|
1829
|
+
return createLocalWorkbenchInspection({ workspace: resolveDir(parsed) });
|
|
1830
|
+
}
|
|
1805
1831
|
async function localCandidateList(argv, io) {
|
|
1806
1832
|
const parsed = parseArgs(argv);
|
|
1807
1833
|
rejectUnknownFlags(parsed, new Set(["dir", "json"]));
|
|
1808
|
-
const
|
|
1809
|
-
|
|
1834
|
+
const inspection = localInspectionFromParsed(parsed);
|
|
1835
|
+
const snapshot = await inspection.snapshot();
|
|
1836
|
+
const candidates = await Promise.all(snapshot.summaries.map((candidate) => inspection.candidate({ id: candidate.id })));
|
|
1837
|
+
writeOutput(candidates, parsed, io, (candidates) => candidates
|
|
1810
1838
|
.map((candidate) => `${candidate.id}\t${candidate.status}\tevaluation ${formatCandidateEvaluationScore(candidate)}${snapshot.activeId === candidate.id ? "\tactive" : ""}`)
|
|
1811
1839
|
.join("\n") || "No candidates.");
|
|
1812
1840
|
return 0;
|
|
@@ -1814,13 +1842,14 @@ async function localCandidateList(argv, io) {
|
|
|
1814
1842
|
async function localCandidateShow(argv, io) {
|
|
1815
1843
|
const parsed = parseArgs(argv);
|
|
1816
1844
|
rejectUnknownFlags(parsed, new Set(["dir", "candidate", "json"]));
|
|
1817
|
-
const
|
|
1845
|
+
const inspection = localInspectionFromParsed(parsed);
|
|
1846
|
+
const snapshot = await inspection.snapshot();
|
|
1818
1847
|
const candidateId = readCandidateIdFlag(parsed, snapshot);
|
|
1819
|
-
const candidate =
|
|
1848
|
+
const candidate = await inspection.candidate({ id: candidateId });
|
|
1820
1849
|
writeOutput(candidate, parsed, io, (record) => [
|
|
1821
1850
|
`${record.id}\t${record.status}`,
|
|
1822
1851
|
`benchmark\t${record.benchmarkFingerprint}`,
|
|
1823
|
-
`candidate\t${record.candidateFingerprint
|
|
1852
|
+
`candidate\t${record.candidateFingerprint}`,
|
|
1824
1853
|
`evaluation\t${formatCandidateEvaluationSummary(record)}`,
|
|
1825
1854
|
...(record.baseId ? [`base\t${record.baseId}`] : []),
|
|
1826
1855
|
].join("\n"));
|
|
@@ -1829,10 +1858,10 @@ async function localCandidateShow(argv, io) {
|
|
|
1829
1858
|
async function localCandidateFiles(argv, io) {
|
|
1830
1859
|
const parsed = parseArgs(argv);
|
|
1831
1860
|
rejectUnknownFlags(parsed, new Set(["dir", "candidate", "json"]));
|
|
1832
|
-
const
|
|
1861
|
+
const inspection = localInspectionFromParsed(parsed);
|
|
1862
|
+
const snapshot = await inspection.snapshot();
|
|
1833
1863
|
const candidateId = readCandidateIdFlag(parsed, snapshot);
|
|
1834
|
-
const
|
|
1835
|
-
const files = summarizeCandidateFiles(readLocalCandidateFiles(snapshot, candidateId), candidate.fileChanges);
|
|
1864
|
+
const files = await inspection.candidateFiles({ id: candidateId });
|
|
1836
1865
|
writeOutput(files, parsed, io, (records) => records
|
|
1837
1866
|
.map((file) => `${file.path}\t${file.status}\t${file.preview_kind}`)
|
|
1838
1867
|
.join("\n") || "No files.");
|
|
@@ -1841,10 +1870,11 @@ async function localCandidateFiles(argv, io) {
|
|
|
1841
1870
|
async function localCandidatePreview(argv, io) {
|
|
1842
1871
|
const parsed = parseArgs(argv);
|
|
1843
1872
|
rejectUnknownFlags(parsed, new Set(["dir", "candidate", "path", "output", "view", "json"]));
|
|
1844
|
-
const
|
|
1873
|
+
const inspection = localInspectionFromParsed(parsed);
|
|
1874
|
+
const snapshot = await inspection.snapshot();
|
|
1845
1875
|
const candidateId = readCandidateIdFlag(parsed, snapshot);
|
|
1846
|
-
const preview =
|
|
1847
|
-
|
|
1876
|
+
const preview = await inspection.candidatePreview({
|
|
1877
|
+
id: candidateId,
|
|
1848
1878
|
path: requireFlag(parsed, "path"),
|
|
1849
1879
|
view: readPreviewMode(parsed),
|
|
1850
1880
|
});
|
|
@@ -1865,7 +1895,7 @@ async function localCandidatePreview(argv, io) {
|
|
|
1865
1895
|
async function localRunList(argv, io) {
|
|
1866
1896
|
const parsed = parseArgs(argv);
|
|
1867
1897
|
rejectUnknownFlags(parsed, new Set(["dir", "json"]));
|
|
1868
|
-
const snapshot = await
|
|
1898
|
+
const snapshot = await localInspectionFromParsed(parsed).snapshot();
|
|
1869
1899
|
writeOutput(snapshot.runs, parsed, io, (runs) => runs
|
|
1870
1900
|
.map((run) => `${run.id}\t${run.workflow}\t${run.status}\t${run.outcome ?? "pending"}\t${run.attemptsExecuted ?? 0}/${run.attemptsRequested ?? 0}`)
|
|
1871
1901
|
.join("\n") || "No runs.");
|
|
@@ -1873,67 +1903,130 @@ async function localRunList(argv, io) {
|
|
|
1873
1903
|
}
|
|
1874
1904
|
async function localRunShow(argv, io) {
|
|
1875
1905
|
const parsed = parseArgs(argv);
|
|
1876
|
-
rejectUnknownFlags(parsed, new Set(["dir", "json"]));
|
|
1906
|
+
rejectUnknownFlags(parsed, new Set(["dir", "jobs", "failures", "json"]));
|
|
1877
1907
|
const runId = parsed.positionals[0];
|
|
1878
1908
|
if (!runId) {
|
|
1879
1909
|
throw new UsageError("workbench runs show requires RUN_ID.");
|
|
1880
1910
|
}
|
|
1881
|
-
const
|
|
1882
|
-
const
|
|
1883
|
-
|
|
1884
|
-
|
|
1885
|
-
}
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1911
|
+
const inspection = localInspectionFromParsed(parsed);
|
|
1912
|
+
const detail = await inspection.run({
|
|
1913
|
+
id: runId,
|
|
1914
|
+
includeJobs: parsed.flags.jobs === true || parsed.flags.failures === true,
|
|
1915
|
+
});
|
|
1916
|
+
const diagnosis = parsed.flags.failures === true
|
|
1917
|
+
? await inspection.diagnose({ targetId: runId })
|
|
1918
|
+
: null;
|
|
1919
|
+
writeOutput(parsed.flags.failures === true
|
|
1920
|
+
? { ...detail, diagnosis }
|
|
1921
|
+
: detail, parsed, io, (record) => {
|
|
1922
|
+
const run = record.run;
|
|
1923
|
+
const jobs = "jobs" in record && Array.isArray(record.jobs)
|
|
1924
|
+
? record.jobs
|
|
1925
|
+
: [];
|
|
1926
|
+
const failures = "diagnosis" in record && record.diagnosis
|
|
1927
|
+
? record.diagnosis.failures
|
|
1928
|
+
: [];
|
|
1929
|
+
return [
|
|
1930
|
+
`${run.id}\t${run.workflow}\t${run.status}`,
|
|
1931
|
+
`outcome\t${run.outcome ?? "pending"}`,
|
|
1932
|
+
`started\t${run.startedAt}`,
|
|
1933
|
+
...(run.finishedAt ? [`finished\t${run.finishedAt}`] : []),
|
|
1934
|
+
`attempts\t${run.attemptsExecuted ?? 0}/${run.attemptsRequested ?? 0}`,
|
|
1935
|
+
`samples\t${run.samples ?? 0}`,
|
|
1936
|
+
...(jobs.length > 0
|
|
1937
|
+
? [
|
|
1938
|
+
"jobs",
|
|
1939
|
+
...jobs.map((job) => `${job.id}\t${job.kind}\t${job.status}${job.error ? `\t${job.error}` : ""}`),
|
|
1940
|
+
]
|
|
1941
|
+
: []),
|
|
1942
|
+
...(failures.length > 0
|
|
1943
|
+
? [
|
|
1944
|
+
"failures",
|
|
1945
|
+
...failures.map(formatFailureLine),
|
|
1946
|
+
]
|
|
1947
|
+
: []),
|
|
1948
|
+
].join("\n");
|
|
1949
|
+
});
|
|
1950
|
+
return 0;
|
|
1951
|
+
}
|
|
1952
|
+
async function localEvaluationList(argv, io) {
|
|
1953
|
+
const parsed = parseArgs(argv);
|
|
1954
|
+
rejectUnknownFlags(parsed, new Set(["dir", "json"]));
|
|
1955
|
+
const comparison = await localInspectionFromParsed(parsed).evaluations();
|
|
1956
|
+
writeOutput(comparison, parsed, io, (record) => record.rows
|
|
1957
|
+
.map((row) => `${row.evaluationId}\t${row.status}\t${formatNullableMetric(row.score)}\t${row.candidateLabel}\t${row.configurationLabel}\t${row.runId}`)
|
|
1958
|
+
.join("\n") || "No evaluations.");
|
|
1959
|
+
return 0;
|
|
1960
|
+
}
|
|
1961
|
+
async function localEvaluationShow(argv, io) {
|
|
1962
|
+
const parsed = parseArgs(argv);
|
|
1963
|
+
rejectUnknownFlags(parsed, new Set(["dir", "json"]));
|
|
1964
|
+
const evaluationId = parsed.positionals[0];
|
|
1965
|
+
if (!evaluationId) {
|
|
1966
|
+
throw new UsageError("workbench evaluations show requires EVALUATION_ID.");
|
|
1967
|
+
}
|
|
1968
|
+
const evaluation = await localInspectionFromParsed(parsed).evaluation({ id: evaluationId });
|
|
1969
|
+
writeOutput(evaluation, parsed, io, (record) => [
|
|
1970
|
+
`${record.id}\t${record.status}`,
|
|
1971
|
+
`candidate\t${record.candidateName ?? record.candidateId}`,
|
|
1972
|
+
`run\t${record.runId}`,
|
|
1973
|
+
`samples\t${record.completedSampleCount}/${record.sampleCount}`,
|
|
1974
|
+
`errors\t${record.errorSampleCount}`,
|
|
1975
|
+
`score\t${formatNullableMetric(record.metrics?.score?.mean ?? null)}`,
|
|
1976
|
+
...(record.error ? [`error\t${record.error}`] : []),
|
|
1977
|
+
...(record.evaluation.cases?.length
|
|
1978
|
+
? [
|
|
1979
|
+
"cases",
|
|
1980
|
+
...record.evaluation.cases.map((entry) => `${entry.id}\t${entry.status ?? "unknown"}\t${formatNullableMetric(entry.metrics?.score?.mean ?? null)}`),
|
|
1981
|
+
]
|
|
1982
|
+
: []),
|
|
1893
1983
|
].join("\n"));
|
|
1894
1984
|
return 0;
|
|
1895
1985
|
}
|
|
1986
|
+
async function localExecutionTrace(argv, io) {
|
|
1987
|
+
const parsed = parseArgs(argv);
|
|
1988
|
+
rejectUnknownFlags(parsed, new Set(["dir", "run", "job", "json"]));
|
|
1989
|
+
const runId = requireFlag(parsed, "run");
|
|
1990
|
+
const jobId = requireFlag(parsed, "job");
|
|
1991
|
+
const detail = await localInspectionFromParsed(parsed).executionTrace({ runId, jobId });
|
|
1992
|
+
writeOutput(detail, parsed, io, (record) => record.executions
|
|
1993
|
+
.map((execution) => [
|
|
1994
|
+
`${execution.id}\t${execution.kind}\t${execution.status}`,
|
|
1995
|
+
`jobs\t${execution.jobIds.join(",")}`,
|
|
1996
|
+
`sessions\t${execution.sessions.length}`,
|
|
1997
|
+
`spans\t${execution.trace.spans.length}`,
|
|
1998
|
+
`events\t${execution.trace.events.length}`,
|
|
1999
|
+
`summaries\t${execution.trace.summaries.length}`,
|
|
2000
|
+
].join("\n"))
|
|
2001
|
+
.join("\n\n") || "No execution trace.");
|
|
2002
|
+
return 0;
|
|
2003
|
+
}
|
|
2004
|
+
async function localDiagnose(argv, io) {
|
|
2005
|
+
const parsed = parseArgs(argv);
|
|
2006
|
+
rejectUnknownFlags(parsed, new Set(["dir", "json"]));
|
|
2007
|
+
rejectUnexpectedPositionals(parsed, "workbench diagnose", 1);
|
|
2008
|
+
const diagnosis = await localInspectionFromParsed(parsed).diagnose({ targetId: parsed.positionals[0] ?? null });
|
|
2009
|
+
writeOutput(diagnosis, parsed, io, (record) => record.failures.length > 0
|
|
2010
|
+
? record.failures.map(formatFailureLine).join("\n")
|
|
2011
|
+
: "No failures.");
|
|
2012
|
+
return 0;
|
|
2013
|
+
}
|
|
1896
2014
|
async function runAuthCommand(argv, io) {
|
|
1897
|
-
|
|
1898
|
-
const rest = argv.slice(1);
|
|
1899
|
-
switch (command) {
|
|
1900
|
-
case "connect":
|
|
1901
|
-
return await authConnect(rest, io);
|
|
1902
|
-
case "disconnect":
|
|
1903
|
-
return await authDisconnect(rest, io);
|
|
1904
|
-
default:
|
|
1905
|
-
throw new UsageError(`Unknown command: auth ${argv.join(" ")}`);
|
|
1906
|
-
}
|
|
2015
|
+
return await runSubCommand("auth", AUTH_COMMAND_HANDLERS, argv, io);
|
|
1907
2016
|
}
|
|
1908
2017
|
async function runAdaptersCommand(argv, io) {
|
|
1909
|
-
|
|
1910
|
-
const rest = argv.slice(1);
|
|
1911
|
-
switch (command) {
|
|
1912
|
-
case "create":
|
|
1913
|
-
return await adaptersCreate(rest, io);
|
|
1914
|
-
case "list":
|
|
1915
|
-
return await adaptersList(rest, io);
|
|
1916
|
-
case "inspect":
|
|
1917
|
-
return await adaptersInspect(rest, io);
|
|
1918
|
-
case "test":
|
|
1919
|
-
return await adaptersTest(rest, io);
|
|
1920
|
-
default:
|
|
1921
|
-
throw new UsageError(`Unknown command: adapters ${argv.join(" ")}`);
|
|
1922
|
-
}
|
|
2018
|
+
return await runSubCommand("adapters", ADAPTERS_COMMAND_HANDLERS, argv, io);
|
|
1923
2019
|
}
|
|
1924
2020
|
async function runTracesCommand(argv, io) {
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
case "show":
|
|
1933
|
-
return await localTraceShow(rest, io);
|
|
1934
|
-
default:
|
|
1935
|
-
throw new UsageError(`Unknown command: traces ${argv.join(" ")}`);
|
|
2021
|
+
return await runSubCommand("traces", TRACES_COMMAND_HANDLERS, argv, io);
|
|
2022
|
+
}
|
|
2023
|
+
async function runSubCommand(group, handlers, argv, io) {
|
|
2024
|
+
const command = argv[0] ?? "";
|
|
2025
|
+
const handler = handlers[command];
|
|
2026
|
+
if (!handler) {
|
|
2027
|
+
throw new UsageError(`Unknown command: ${group} ${argv.join(" ")}`);
|
|
1936
2028
|
}
|
|
2029
|
+
return await handler(argv.slice(1), io);
|
|
1937
2030
|
}
|
|
1938
2031
|
const DEFAULT_LOCAL_TRACE_LIMIT = 3;
|
|
1939
2032
|
const LOCAL_TRACE_WINDOW_FLAGS = new Set(["providers", "since", "workspace", "limit", "json"]);
|
|
@@ -2446,7 +2539,11 @@ fs.writeFileSync(resultPath, JSON.stringify({
|
|
|
2446
2539
|
async function login(argv, io) {
|
|
2447
2540
|
const parsed = parseArgs(argv);
|
|
2448
2541
|
rejectUnknownFlags(parsed, new Set(["base-url", "no-open", "json"]));
|
|
2449
|
-
const
|
|
2542
|
+
const config = await loadConfig();
|
|
2543
|
+
const baseUrl = selectWorkbenchBaseUrl({
|
|
2544
|
+
explicitBaseUrl: asOptionalString(parsed.flags["base-url"]),
|
|
2545
|
+
configBaseUrl: config.baseUrl,
|
|
2546
|
+
});
|
|
2450
2547
|
const authorization = await requestDeviceAuthorization(baseUrl);
|
|
2451
2548
|
if (parsed.flags.json === true) {
|
|
2452
2549
|
writeJson({ ok: true, status: "authorization_pending", ...authorization }, io);
|
|
@@ -2472,7 +2569,7 @@ async function logout(argv, io) {
|
|
|
2472
2569
|
const parsed = parseArgs(argv);
|
|
2473
2570
|
rejectUnknownFlags(parsed, new Set(["json"]));
|
|
2474
2571
|
const config = await loadConfig();
|
|
2475
|
-
const baseUrl =
|
|
2572
|
+
const baseUrl = selectWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
|
|
2476
2573
|
if (config.accessToken) {
|
|
2477
2574
|
await fetch(`${baseUrl}/api/oauth/revoke`, {
|
|
2478
2575
|
method: "POST",
|
|
@@ -2493,8 +2590,8 @@ async function authStatus(argv, io) {
|
|
|
2493
2590
|
const baseUrl = await effectiveBaseUrl();
|
|
2494
2591
|
const profileStatus = await readWorkbenchProfileStatus(config);
|
|
2495
2592
|
const adapterStatuses = await localWorkbenchAdapterAuthStore().listStatus();
|
|
2496
|
-
const
|
|
2497
|
-
? await
|
|
2593
|
+
const remoteAuth = profileStatus.authenticated
|
|
2594
|
+
? await readRemoteAdapterAuthStatuses().catch((error) => ({
|
|
2498
2595
|
adapters: [],
|
|
2499
2596
|
error: error instanceof Error ? error.message : String(error),
|
|
2500
2597
|
}))
|
|
@@ -2503,7 +2600,7 @@ async function authStatus(argv, io) {
|
|
|
2503
2600
|
error: "not_authenticated",
|
|
2504
2601
|
};
|
|
2505
2602
|
const dir = resolveDir(parsed);
|
|
2506
|
-
const adapterAuth = await projectAdapterAuthStatus(dir, adapterStatuses,
|
|
2603
|
+
const adapterAuth = await projectAdapterAuthStatus(dir, adapterStatuses, remoteAuth.adapters).catch(() => []);
|
|
2507
2604
|
const result = {
|
|
2508
2605
|
ok: true,
|
|
2509
2606
|
workbench: {
|
|
@@ -2512,7 +2609,7 @@ async function authStatus(argv, io) {
|
|
|
2512
2609
|
username: profileStatus.profile?.username ?? null,
|
|
2513
2610
|
},
|
|
2514
2611
|
adapterStatuses,
|
|
2515
|
-
|
|
2612
|
+
remoteAuth,
|
|
2516
2613
|
adapterAuth,
|
|
2517
2614
|
};
|
|
2518
2615
|
writeOutput(result, parsed, io, (record) => {
|
|
@@ -2525,28 +2622,28 @@ async function authStatus(argv, io) {
|
|
|
2525
2622
|
? [
|
|
2526
2623
|
"",
|
|
2527
2624
|
"Required adapter auth:",
|
|
2528
|
-
...value.adapterAuth.map((adapter) => `${adapter.adapter}${adapter.profile !== "default" ? ` profile ${adapter.profile}` : ""}: local ${adapter.local.status}${adapter.local.method ? ` (${adapter.local.method})` : ""}${adapter.local.reason ? ` (${adapter.local.reason})` : ""},
|
|
2625
|
+
...value.adapterAuth.map((adapter) => `${adapter.adapter}${adapter.profile !== "default" ? ` profile ${adapter.profile}` : ""}: local ${adapter.local.status}${adapter.local.method ? ` (${adapter.local.method})` : ""}${adapter.local.reason ? ` (${adapter.local.reason})` : ""}, remote ${adapter.remote.status}${adapter.remote.method ? ` (${adapter.remote.method})` : ""}${adapter.remote.reason ? ` (${adapter.remote.reason})` : ""}`),
|
|
2529
2626
|
]
|
|
2530
2627
|
: []),
|
|
2531
2628
|
].join("\n");
|
|
2532
2629
|
});
|
|
2533
2630
|
return 0;
|
|
2534
2631
|
}
|
|
2535
|
-
async function projectAdapterAuthStatus(dir, adapterStatuses,
|
|
2632
|
+
async function projectAdapterAuthStatus(dir, adapterStatuses, remoteAdapters) {
|
|
2536
2633
|
const spec = (await readLocalProjectSource(dir)).spec;
|
|
2537
2634
|
const adapters = await resolveWorkbenchAdaptersForProject(dir, spec);
|
|
2538
2635
|
const adapterStatusMap = new Map(adapterStatuses.map((status) => [
|
|
2539
2636
|
adapterAuthStatusKey(status.adapterId, status.slot, status.profile),
|
|
2540
2637
|
status,
|
|
2541
2638
|
]));
|
|
2542
|
-
const
|
|
2639
|
+
const remoteAdapterStatusMap = new Map(remoteAdapters.map((status) => [
|
|
2543
2640
|
adapterAuthStatusKey(status.adapterId, status.slot, status.profile),
|
|
2544
2641
|
status,
|
|
2545
2642
|
]));
|
|
2546
2643
|
const adapterById = new Map(adapters.map((adapter) => [adapter.manifest.id, adapter]));
|
|
2547
2644
|
return requiredAuthTargetsForSpec(spec, adapterById).map((target) => {
|
|
2548
2645
|
const adapterStatus = adapterStatusMap.get(adapterAuthStatusKey(target.adapter, target.slot, target.profile));
|
|
2549
|
-
const
|
|
2646
|
+
const remoteAdapterStatus = remoteAdapterStatusMap.get(adapterAuthStatusKey(target.adapter, target.slot, target.profile));
|
|
2550
2647
|
return {
|
|
2551
2648
|
...target,
|
|
2552
2649
|
local: adapterStatus
|
|
@@ -2556,17 +2653,17 @@ async function projectAdapterAuthStatus(dir, adapterStatuses, hostedAdapters) {
|
|
|
2556
2653
|
...(adapterStatus.reason ? { reason: adapterStatus.reason } : {}),
|
|
2557
2654
|
}
|
|
2558
2655
|
: { status: "disconnected" },
|
|
2559
|
-
|
|
2656
|
+
remote: remoteAdapterStatus
|
|
2560
2657
|
? {
|
|
2561
|
-
status:
|
|
2562
|
-
...(
|
|
2563
|
-
...(
|
|
2658
|
+
status: remoteAdapterStatus.status,
|
|
2659
|
+
...(remoteAdapterStatus.method ? { method: remoteAdapterStatus.method } : {}),
|
|
2660
|
+
...(remoteAdapterStatus.reason ? { reason: remoteAdapterStatus.reason } : {}),
|
|
2564
2661
|
}
|
|
2565
2662
|
: { status: "disconnected" },
|
|
2566
2663
|
};
|
|
2567
2664
|
});
|
|
2568
2665
|
}
|
|
2569
|
-
async function
|
|
2666
|
+
async function readRemoteAdapterAuthStatuses() {
|
|
2570
2667
|
const adapterResponse = await apiRequest("/api/workbench/auth/adapters");
|
|
2571
2668
|
return {
|
|
2572
2669
|
adapters: adapterResponse.adapters ?? [],
|
|
@@ -2941,7 +3038,7 @@ async function pushBenchmark(argv, io) {
|
|
|
2941
3038
|
const dir = resolveSourceDir(parsed);
|
|
2942
3039
|
const source = await readLocalProjectSource(dir);
|
|
2943
3040
|
const origin = await readWorkbenchOrigin(dir);
|
|
2944
|
-
const baseUrl = await
|
|
3041
|
+
const baseUrl = await effectiveOriginBaseUrl(origin?.baseUrl);
|
|
2945
3042
|
const visibility = readOptionalBenchmarkVisibility(parsed.flags.visibility);
|
|
2946
3043
|
const createVisibility = visibility ?? "public";
|
|
2947
3044
|
const dryRun = parsed.flags["dry-run"] === true;
|
|
@@ -2972,7 +3069,7 @@ async function pushBenchmark(argv, io) {
|
|
|
2972
3069
|
}, parsed, io, () => `Would push benchmark ${source.spec.name}.`);
|
|
2973
3070
|
return 0;
|
|
2974
3071
|
}
|
|
2975
|
-
const { project, origin: nextOrigin, result } = await
|
|
3072
|
+
const { project, origin: nextOrigin, result } = await createRemoteBenchmarkFromState({
|
|
2976
3073
|
baseUrl,
|
|
2977
3074
|
dir,
|
|
2978
3075
|
state,
|
|
@@ -3001,7 +3098,7 @@ async function pushBenchmark(argv, io) {
|
|
|
3001
3098
|
}
|
|
3002
3099
|
const projectId = origin.projectId;
|
|
3003
3100
|
if (!projectId) {
|
|
3004
|
-
throw new UsageError("Missing
|
|
3101
|
+
throw new UsageError("Missing remote benchmark. Run workbench push from a source directory.");
|
|
3005
3102
|
}
|
|
3006
3103
|
if (dryRun) {
|
|
3007
3104
|
const remoteProject = await verifyLinkedPushDryRunTarget({
|
|
@@ -3017,7 +3114,7 @@ async function pushBenchmark(argv, io) {
|
|
|
3017
3114
|
baseUrl,
|
|
3018
3115
|
benchmarkId: projectId,
|
|
3019
3116
|
remote: origin.remote,
|
|
3020
|
-
benchmark:
|
|
3117
|
+
benchmark: remoteProjectSummaryForOutput(remoteProject),
|
|
3021
3118
|
benchmarkName: source.spec.name,
|
|
3022
3119
|
visibility: visibility ?? "unchanged",
|
|
3023
3120
|
sourceFileCount: sourceFileCount(source),
|
|
@@ -3031,7 +3128,7 @@ async function pushBenchmark(argv, io) {
|
|
|
3031
3128
|
method: "PUT",
|
|
3032
3129
|
body: state,
|
|
3033
3130
|
}, baseUrl);
|
|
3034
|
-
const responseProject =
|
|
3131
|
+
const responseProject = remoteProjectSummaryFromState(response.state);
|
|
3035
3132
|
const publishedProject = await applyRequestedProjectVisibility({
|
|
3036
3133
|
baseUrl,
|
|
3037
3134
|
projectId: responseProject.id,
|
|
@@ -3079,7 +3176,7 @@ async function verifyLinkedPushDryRunTarget(args) {
|
|
|
3079
3176
|
}
|
|
3080
3177
|
return response.benchmark;
|
|
3081
3178
|
}
|
|
3082
|
-
function
|
|
3179
|
+
function remoteProjectSummaryForOutput(project) {
|
|
3083
3180
|
return {
|
|
3084
3181
|
...(project.id ? { id: project.id } : {}),
|
|
3085
3182
|
...(project.ownerUsername ? { ownerUsername: project.ownerUsername } : {}),
|
|
@@ -3089,12 +3186,12 @@ function hostedProjectSummaryForOutput(project) {
|
|
|
3089
3186
|
...(typeof project.starCount === "number" ? { starCount: project.starCount } : {}),
|
|
3090
3187
|
};
|
|
3091
3188
|
}
|
|
3092
|
-
async function
|
|
3189
|
+
async function createRemoteBenchmarkFromState(args) {
|
|
3093
3190
|
const result = await apiRequest("/api/workbench/benchmarks/state", {
|
|
3094
3191
|
method: "POST",
|
|
3095
3192
|
body: args.state,
|
|
3096
3193
|
}, args.baseUrl);
|
|
3097
|
-
const project =
|
|
3194
|
+
const project = remoteProjectSummaryFromState(result.state);
|
|
3098
3195
|
const applied = await acceptPushedProjectStateToLocal({
|
|
3099
3196
|
dir: args.dir,
|
|
3100
3197
|
baseUrl: args.baseUrl,
|
|
@@ -3168,7 +3265,7 @@ async function pullProject(argv, io) {
|
|
|
3168
3265
|
}
|
|
3169
3266
|
const dir = resolveDir(parsed);
|
|
3170
3267
|
const origin = await requireWorkbenchOrigin(dir);
|
|
3171
|
-
const baseUrl = await
|
|
3268
|
+
const baseUrl = await effectiveOriginBaseUrl(origin.baseUrl);
|
|
3172
3269
|
const remoteRef = parseOriginRemote(origin);
|
|
3173
3270
|
const state = await apiRequest(publicProjectStateApiPath(remoteRef), {}, baseUrl);
|
|
3174
3271
|
if (parsed.flags["dry-run"] === true) {
|
|
@@ -3228,7 +3325,7 @@ async function acceptPushedProjectStateToLocal(args) {
|
|
|
3228
3325
|
});
|
|
3229
3326
|
return { origin, runtime: runtime.stats };
|
|
3230
3327
|
}
|
|
3231
|
-
async function
|
|
3328
|
+
async function retryRemoteWorkflow(argv, io) {
|
|
3232
3329
|
const parsed = parseArgs(argv);
|
|
3233
3330
|
rejectUnknownFlags(parsed, new Set([
|
|
3234
3331
|
"dir",
|
|
@@ -3238,7 +3335,7 @@ async function retryHostedWorkflow(argv, io) {
|
|
|
3238
3335
|
"timeout-ms",
|
|
3239
3336
|
"json",
|
|
3240
3337
|
]));
|
|
3241
|
-
rejectUnexpectedPositionals(parsed, "workbench retry --
|
|
3338
|
+
rejectUnexpectedPositionals(parsed, "workbench retry --remote", 1);
|
|
3242
3339
|
const targetId = parsed.positionals[0];
|
|
3243
3340
|
if (!targetId) {
|
|
3244
3341
|
throw new UsageError("Missing required TARGET_ID.");
|
|
@@ -3247,8 +3344,8 @@ async function retryHostedWorkflow(argv, io) {
|
|
|
3247
3344
|
parsed.flags["timeout-ms"] !== undefined)) {
|
|
3248
3345
|
throw new UsageError("--interval-ms and --timeout-ms require --watch.");
|
|
3249
3346
|
}
|
|
3250
|
-
const target = await
|
|
3251
|
-
const retryTarget = await
|
|
3347
|
+
const target = await resolveRemoteTarget(parsed, { requireProjectIdentity: true });
|
|
3348
|
+
const retryTarget = await resolveRemoteRetryTarget(target, targetId);
|
|
3252
3349
|
const watchIntervalMs = parsed.flags.watch === true
|
|
3253
3350
|
? parsePositiveInt(parsed.flags["interval-ms"], 1000, "interval-ms")
|
|
3254
3351
|
: undefined;
|
|
@@ -3259,23 +3356,23 @@ async function retryHostedWorkflow(argv, io) {
|
|
|
3259
3356
|
method: "POST",
|
|
3260
3357
|
body: retryTarget.request,
|
|
3261
3358
|
}, target.baseUrl);
|
|
3262
|
-
const runTarget =
|
|
3359
|
+
const runTarget = remoteTargetForRunStartResponse(target, response);
|
|
3263
3360
|
const startedRun = withRunUrls(runTarget, response.run);
|
|
3264
3361
|
if (parsed.flags.watch === true) {
|
|
3265
3362
|
if (parsed.flags.json !== true) {
|
|
3266
|
-
io.stdout.write(`${
|
|
3363
|
+
io.stdout.write(`${formatRemoteRunStarted(startedRun, retryTarget.workflow).trimEnd()}\n${REMOTE_WATCH_LIFECYCLE_NOTE}\n`);
|
|
3267
3364
|
}
|
|
3268
|
-
const watched = await
|
|
3365
|
+
const watched = await watchRemoteRun({
|
|
3269
3366
|
parsed,
|
|
3270
3367
|
target: runTarget,
|
|
3271
3368
|
runId: response.run.id,
|
|
3272
3369
|
intervalMs: watchIntervalMs ?? 1000,
|
|
3273
3370
|
timeoutMs: watchTimeoutMs,
|
|
3274
3371
|
});
|
|
3275
|
-
const outputRun = withRunUrls(runTarget, await
|
|
3276
|
-
await
|
|
3372
|
+
const outputRun = withRunUrls(runTarget, await withRemoteRunFailureSummary(runTarget, watched));
|
|
3373
|
+
await tryImportTerminalRemoteProjectState({ target: runTarget, io });
|
|
3277
3374
|
const result = {
|
|
3278
|
-
ok:
|
|
3375
|
+
ok: remoteRunSucceeded(watched),
|
|
3279
3376
|
retried: {
|
|
3280
3377
|
id: retryTarget.sourceId,
|
|
3281
3378
|
kind: retryTarget.sourceKind,
|
|
@@ -3290,7 +3387,7 @@ async function retryHostedWorkflow(argv, io) {
|
|
|
3290
3387
|
...(outputRun.error ? { error: outputRun.error } : {}),
|
|
3291
3388
|
};
|
|
3292
3389
|
writeOutput(result, parsed, io, formatRetryCommandResult);
|
|
3293
|
-
return
|
|
3390
|
+
return remoteRunSucceeded(watched) ? 0 : 1;
|
|
3294
3391
|
}
|
|
3295
3392
|
const result = {
|
|
3296
3393
|
ok: true,
|
|
@@ -3308,20 +3405,20 @@ async function retryHostedWorkflow(argv, io) {
|
|
|
3308
3405
|
writeOutput(result, parsed, io, formatRetryCommandResult);
|
|
3309
3406
|
return 0;
|
|
3310
3407
|
}
|
|
3311
|
-
async function
|
|
3408
|
+
async function resolveRemoteRetryTarget(target, targetId) {
|
|
3312
3409
|
if (targetId.startsWith("eval_")) {
|
|
3313
|
-
return await
|
|
3410
|
+
return await resolveRemoteEvaluationRetryTarget(target, targetId);
|
|
3314
3411
|
}
|
|
3315
|
-
const detail = await
|
|
3412
|
+
const detail = await readRemoteRunDetail(target, targetId);
|
|
3316
3413
|
const run = detail.run;
|
|
3317
3414
|
if (run.status !== "finished") {
|
|
3318
3415
|
throw new UsageError(`Run ${run.id} is ${run.status}; wait for it to finish before retrying.`);
|
|
3319
3416
|
}
|
|
3320
|
-
if (!
|
|
3321
|
-
throw new UsageError(`Run ${run.id} did not fail; use workbench ${run.workflow ?? "eval"} --
|
|
3417
|
+
if (!remoteRunRecordFailed(run)) {
|
|
3418
|
+
throw new UsageError(`Run ${run.id} did not fail; use workbench ${run.workflow ?? "eval"} --remote to intentionally run it again.`);
|
|
3322
3419
|
}
|
|
3323
3420
|
if (run.workflow === "eval") {
|
|
3324
|
-
const candidateId =
|
|
3421
|
+
const candidateId = remoteRunEvaluationCandidateId(run, detail.jobs);
|
|
3325
3422
|
if (!candidateId) {
|
|
3326
3423
|
throw new UsageError(`Run ${run.id} has no candidate id to retry.`);
|
|
3327
3424
|
}
|
|
@@ -3330,17 +3427,18 @@ async function resolveHostedRetryTarget(target, targetId) {
|
|
|
3330
3427
|
sourceKind: "run",
|
|
3331
3428
|
workflow: "eval",
|
|
3332
3429
|
request: {
|
|
3430
|
+
schema: "workbench.remote.run.request.v1",
|
|
3333
3431
|
workflow: "eval",
|
|
3334
3432
|
samples: run.samples ?? 1,
|
|
3335
3433
|
candidateId,
|
|
3336
|
-
sourceYaml:
|
|
3434
|
+
sourceYaml: remoteRetrySourceYaml(run, run.id),
|
|
3337
3435
|
preserveActive: true,
|
|
3338
3436
|
...retrySampleSelectionFromJobs(detail.jobs),
|
|
3339
3437
|
},
|
|
3340
3438
|
};
|
|
3341
3439
|
}
|
|
3342
3440
|
if (run.workflow === "improve") {
|
|
3343
|
-
const baseCandidateId = stringValue(readRecord(run.
|
|
3441
|
+
const baseCandidateId = stringValue(readRecord(run.retry)?.baseCandidateId);
|
|
3344
3442
|
if (!baseCandidateId) {
|
|
3345
3443
|
throw new UsageError(`Run ${run.id} is missing its base candidate id.`);
|
|
3346
3444
|
}
|
|
@@ -3349,41 +3447,43 @@ async function resolveHostedRetryTarget(target, targetId) {
|
|
|
3349
3447
|
sourceKind: "run",
|
|
3350
3448
|
workflow: "improve",
|
|
3351
3449
|
request: {
|
|
3450
|
+
schema: "workbench.remote.run.request.v1",
|
|
3352
3451
|
workflow: "improve",
|
|
3353
3452
|
samples: run.samples ?? 1,
|
|
3354
3453
|
budget: run.budget ?? run.attemptsRequested ?? 1,
|
|
3355
3454
|
candidateId: baseCandidateId,
|
|
3356
|
-
sourceYaml:
|
|
3455
|
+
sourceYaml: remoteRetrySourceYaml(run, run.id),
|
|
3357
3456
|
preserveActive: true,
|
|
3358
3457
|
},
|
|
3359
3458
|
};
|
|
3360
3459
|
}
|
|
3361
3460
|
throw new UsageError(`Run ${run.id} has no retryable workflow.`);
|
|
3362
3461
|
}
|
|
3363
|
-
async function
|
|
3462
|
+
async function resolveRemoteEvaluationRetryTarget(target, evaluationId) {
|
|
3364
3463
|
const snapshot = await apiRequest(projectApiPath(target.projectId, "/workbench/snapshot"), {}, target.baseUrl);
|
|
3365
3464
|
const evaluation = snapshot.evaluations.find((entry) => entry.id === evaluationId);
|
|
3366
3465
|
if (!evaluation) {
|
|
3367
|
-
throw new UsageError(`
|
|
3466
|
+
throw new UsageError(`Remote evaluation not found: ${evaluationId}`);
|
|
3368
3467
|
}
|
|
3369
3468
|
const run = snapshot.runs.find((entry) => entry.id === evaluation.runId) ?? null;
|
|
3370
3469
|
if (!evaluationScorecardFailed(evaluation, run)) {
|
|
3371
|
-
throw new UsageError(`Evaluation ${evaluation.id} did not fail; use workbench eval --
|
|
3470
|
+
throw new UsageError(`Evaluation ${evaluation.id} did not fail; use workbench eval --remote to intentionally run it again.`);
|
|
3372
3471
|
}
|
|
3373
3472
|
if (!run) {
|
|
3374
3473
|
throw new UsageError(`Evaluation ${evaluation.id} is missing its run record.`);
|
|
3375
3474
|
}
|
|
3376
|
-
const detail = await
|
|
3475
|
+
const detail = await readRemoteRunDetail(target, run.id);
|
|
3377
3476
|
const detailedRun = detail.run;
|
|
3378
3477
|
return {
|
|
3379
3478
|
sourceId: evaluationId,
|
|
3380
3479
|
sourceKind: "evaluation",
|
|
3381
3480
|
workflow: "eval",
|
|
3382
3481
|
request: {
|
|
3482
|
+
schema: "workbench.remote.run.request.v1",
|
|
3383
3483
|
workflow: "eval",
|
|
3384
3484
|
samples: evaluation.sampleCount || detailedRun.samples || 1,
|
|
3385
3485
|
candidateId: evaluation.candidateId,
|
|
3386
|
-
sourceYaml:
|
|
3486
|
+
sourceYaml: remoteRetrySourceYaml(detailedRun, detailedRun.id),
|
|
3387
3487
|
preserveActive: true,
|
|
3388
3488
|
...retrySampleSelectionFromJobs(detail.jobs),
|
|
3389
3489
|
},
|
|
@@ -3392,7 +3492,7 @@ async function resolveHostedEvaluationRetryTarget(target, evaluationId) {
|
|
|
3392
3492
|
function retrySampleSelectionFromJobs(jobs) {
|
|
3393
3493
|
const selectedSamples = uniqueCaseSamplePairs(jobs
|
|
3394
3494
|
.filter((job) => job.status !== "succeeded" &&
|
|
3395
|
-
|
|
3495
|
+
readRunJobPurpose(job) === "attempt")
|
|
3396
3496
|
.map(caseSamplePairFromJob)
|
|
3397
3497
|
.filter((pair) => pair !== null));
|
|
3398
3498
|
return selectedSamples.length > 0
|
|
@@ -3407,10 +3507,10 @@ function uniqueCaseSamplePairs(pairs) {
|
|
|
3407
3507
|
return [...byKey.values()].sort((left, right) => left.caseId.localeCompare(right.caseId) ||
|
|
3408
3508
|
left.sampleIndex - right.sampleIndex);
|
|
3409
3509
|
}
|
|
3410
|
-
async function
|
|
3510
|
+
async function readRemoteRunDetail(target, runId) {
|
|
3411
3511
|
return await apiRequest(projectApiPath(target.projectId, `/runs/${encodeURIComponent(runId)}`), {}, target.baseUrl);
|
|
3412
3512
|
}
|
|
3413
|
-
async function
|
|
3513
|
+
async function tryImportTerminalRemoteProjectState(args) {
|
|
3414
3514
|
const origin = args.target.origin;
|
|
3415
3515
|
if (!origin || origin.projectId !== args.target.projectId) {
|
|
3416
3516
|
return;
|
|
@@ -3426,23 +3526,23 @@ async function tryImportTerminalHostedProjectState(args) {
|
|
|
3426
3526
|
});
|
|
3427
3527
|
}
|
|
3428
3528
|
catch (error) {
|
|
3429
|
-
args.io.stderr.write(`
|
|
3529
|
+
args.io.stderr.write(`Remote run finished, but local project state was not updated: ${errorMessage(error)}\n`);
|
|
3430
3530
|
}
|
|
3431
3531
|
}
|
|
3432
|
-
function
|
|
3433
|
-
const sourceYaml = stringValue(readRecord(run.
|
|
3532
|
+
function remoteRetrySourceYaml(run, runId) {
|
|
3533
|
+
const sourceYaml = stringValue(readRecord(run.retry)?.sourceYaml);
|
|
3434
3534
|
if (!sourceYaml) {
|
|
3435
3535
|
throw new UsageError(`Run ${runId} is missing its recorded source configuration.`);
|
|
3436
3536
|
}
|
|
3437
3537
|
return sourceYaml;
|
|
3438
3538
|
}
|
|
3439
|
-
function
|
|
3539
|
+
function remoteRunRecordFailed(run) {
|
|
3440
3540
|
return run.outcome === "error" ||
|
|
3441
3541
|
run.outcome === "cancelled" ||
|
|
3442
3542
|
(run.failedJobCount ?? 0) > 0 ||
|
|
3443
3543
|
Boolean(run.error);
|
|
3444
3544
|
}
|
|
3445
|
-
async function
|
|
3545
|
+
async function startRemoteWorkflow(workflow, argv, io) {
|
|
3446
3546
|
const parsed = parseArgs(argv);
|
|
3447
3547
|
const allowedFlags = new Set([
|
|
3448
3548
|
"dir",
|
|
@@ -3465,7 +3565,7 @@ async function startHostedWorkflow(workflow, argv, io) {
|
|
|
3465
3565
|
}
|
|
3466
3566
|
rejectUnknownFlags(parsed, allowedFlags);
|
|
3467
3567
|
if (parsed.positionals.length > 1) {
|
|
3468
|
-
throw new UsageError(`workbench ${workflow} --
|
|
3568
|
+
throw new UsageError(`workbench ${workflow} --remote accepts at most one source file or directory argument.`);
|
|
3469
3569
|
}
|
|
3470
3570
|
const sourceArg = resolveSourceDir(parsed);
|
|
3471
3571
|
const samples = parsePositiveInt(parsed.flags.samples, 1, "samples");
|
|
@@ -3480,13 +3580,13 @@ async function startHostedWorkflow(workflow, argv, io) {
|
|
|
3480
3580
|
const defaultProjectSource = await readLocalProjectSource(path.resolve(sourceArg));
|
|
3481
3581
|
const selectedRunIds = workflow === "eval"
|
|
3482
3582
|
? resolveCandidateRunSelection(defaultProjectSource, runsFlag)
|
|
3483
|
-
: [singleRequestedRunId(runsFlag, `workbench ${workflow} --
|
|
3583
|
+
: [singleRequestedRunId(runsFlag, `workbench ${workflow} --remote`) ?? defaultProjectSource.candidateRunId];
|
|
3484
3584
|
if (workflow === "eval" && selectedRunIds.length > 1) {
|
|
3485
3585
|
let failed = 0;
|
|
3486
3586
|
const results = [];
|
|
3487
3587
|
for (const runId of selectedRunIds) {
|
|
3488
3588
|
const captured = createCapturingIo(io);
|
|
3489
|
-
const code = await
|
|
3589
|
+
const code = await startRemoteWorkflow(workflow, remoteWorkflowArgsForRun({
|
|
3490
3590
|
parsed,
|
|
3491
3591
|
sourceDir: defaultProjectSource.dir,
|
|
3492
3592
|
runId,
|
|
@@ -3501,7 +3601,7 @@ async function startHostedWorkflow(workflow, argv, io) {
|
|
|
3501
3601
|
candidateRunIds: selectedRunIds,
|
|
3502
3602
|
failedRunCount: failed,
|
|
3503
3603
|
results,
|
|
3504
|
-
}, parsed, io, () => `Processed ${selectedRunIds.length}
|
|
3604
|
+
}, parsed, io, () => `Processed ${selectedRunIds.length} remote candidate run(s); ${failed} failed.`);
|
|
3505
3605
|
return failed === 0 ? 0 : 1;
|
|
3506
3606
|
}
|
|
3507
3607
|
const selectedCandidateId = workflow === "eval"
|
|
@@ -3509,12 +3609,14 @@ async function startHostedWorkflow(workflow, argv, io) {
|
|
|
3509
3609
|
: asOptionalString(parsed.flags.base);
|
|
3510
3610
|
const request = workflow === "improve"
|
|
3511
3611
|
? {
|
|
3612
|
+
schema: "workbench.remote.run.request.v1",
|
|
3512
3613
|
workflow,
|
|
3513
3614
|
budget,
|
|
3514
3615
|
samples,
|
|
3515
3616
|
...(selectedCandidateId ? { candidateId: selectedCandidateId } : {}),
|
|
3516
3617
|
}
|
|
3517
3618
|
: {
|
|
3619
|
+
schema: "workbench.remote.run.request.v1",
|
|
3518
3620
|
workflow,
|
|
3519
3621
|
samples,
|
|
3520
3622
|
...(selectedCandidateId ? { candidateId: selectedCandidateId } : {}),
|
|
@@ -3538,7 +3640,7 @@ async function startHostedWorkflow(workflow, argv, io) {
|
|
|
3538
3640
|
: undefined;
|
|
3539
3641
|
const dryRun = parsed.flags["dry-run"] === true;
|
|
3540
3642
|
if (dryRun) {
|
|
3541
|
-
const target = await
|
|
3643
|
+
const target = await resolveRemoteDryRunTarget(parsed, { sourceDir: projectSource.dir });
|
|
3542
3644
|
writeOutput({
|
|
3543
3645
|
ok: true,
|
|
3544
3646
|
dryRun: true,
|
|
@@ -3547,20 +3649,21 @@ async function startHostedWorkflow(workflow, argv, io) {
|
|
|
3547
3649
|
dir: target.dir,
|
|
3548
3650
|
baseUrl: target.baseUrl,
|
|
3549
3651
|
request,
|
|
3550
|
-
}, parsed, io, () => `Would start
|
|
3652
|
+
}, parsed, io, () => `Would start remote ${workflow} for ${target.projectRef}.`);
|
|
3551
3653
|
return 0;
|
|
3552
3654
|
}
|
|
3553
|
-
const target = await
|
|
3655
|
+
const target = await resolveRemoteTarget(parsed, {
|
|
3554
3656
|
requireProjectIdentity: true,
|
|
3555
3657
|
sourceDir: projectSource.dir,
|
|
3556
3658
|
});
|
|
3557
3659
|
if (workflow === "improve") {
|
|
3558
|
-
request.candidateId = await
|
|
3660
|
+
request.candidateId = await ensureRemoteImproveBaseCandidate({
|
|
3559
3661
|
parsed,
|
|
3560
3662
|
target,
|
|
3561
3663
|
samples: request.samples,
|
|
3562
3664
|
candidateId: selectedCandidateId,
|
|
3563
3665
|
sourceYaml: projectSource.specSource,
|
|
3666
|
+
candidateFiles: projectSource.candidateFiles,
|
|
3564
3667
|
adapterFiles: projectSource.adapterFiles,
|
|
3565
3668
|
intervalMs: watchIntervalMs ?? 1000,
|
|
3566
3669
|
timeoutMs: watchTimeoutMs,
|
|
@@ -3571,53 +3674,53 @@ async function startHostedWorkflow(workflow, argv, io) {
|
|
|
3571
3674
|
method: "POST",
|
|
3572
3675
|
body: request,
|
|
3573
3676
|
}, target.baseUrl);
|
|
3574
|
-
const runTarget =
|
|
3677
|
+
const runTarget = remoteTargetForRunStartResponse(target, response);
|
|
3575
3678
|
const startedRun = withRunUrls(runTarget, response.run);
|
|
3576
3679
|
const startedRunOutput = response.reused === true
|
|
3577
3680
|
? { ...startedRun, reused: true }
|
|
3578
3681
|
: startedRun;
|
|
3579
3682
|
if (response.reused === true && response.run.status === "finished") {
|
|
3580
|
-
await
|
|
3683
|
+
await tryImportTerminalRemoteProjectState({ target: runTarget, io });
|
|
3581
3684
|
writeOutput({
|
|
3582
|
-
ok:
|
|
3685
|
+
ok: remoteRunSucceeded(response.run),
|
|
3583
3686
|
reused: true,
|
|
3584
3687
|
workflow,
|
|
3585
3688
|
runId: startedRun.id,
|
|
3586
3689
|
...startedRun,
|
|
3587
|
-
}, parsed, io, () => `Reused
|
|
3588
|
-
return
|
|
3690
|
+
}, parsed, io, () => `Reused remote ${workflow} ${startedRun.id}. Use --rerun to intentionally run it again.`);
|
|
3691
|
+
return remoteRunSucceeded(response.run) ? 0 : 1;
|
|
3589
3692
|
}
|
|
3590
3693
|
if (parsed.flags.watch === true) {
|
|
3591
3694
|
if (parsed.flags.json !== true) {
|
|
3592
|
-
io.stdout.write(`${
|
|
3695
|
+
io.stdout.write(`${formatRemoteRunStarted(startedRun, workflow).trimEnd()}\n${REMOTE_WATCH_LIFECYCLE_NOTE}\n`);
|
|
3593
3696
|
}
|
|
3594
|
-
const watched = await
|
|
3697
|
+
const watched = await watchRemoteRun({
|
|
3595
3698
|
parsed,
|
|
3596
3699
|
target: runTarget,
|
|
3597
3700
|
runId: response.run.id,
|
|
3598
3701
|
intervalMs: watchIntervalMs ?? 1000,
|
|
3599
3702
|
timeoutMs: watchTimeoutMs,
|
|
3600
3703
|
});
|
|
3601
|
-
const outputRun = await
|
|
3602
|
-
await
|
|
3603
|
-
writeOutput(withRunUrls(runTarget, outputRun), parsed, io,
|
|
3604
|
-
return
|
|
3704
|
+
const outputRun = await withRemoteRunFailureSummary(runTarget, watched);
|
|
3705
|
+
await tryImportTerminalRemoteProjectState({ target: runTarget, io });
|
|
3706
|
+
writeOutput(withRunUrls(runTarget, outputRun), parsed, io, formatRemoteRunResult);
|
|
3707
|
+
return remoteRunSucceeded(watched) ? 0 : 1;
|
|
3605
3708
|
}
|
|
3606
|
-
writeOutput(startedRunOutput, parsed, io, (run) =>
|
|
3709
|
+
writeOutput(startedRunOutput, parsed, io, (run) => formatRemoteRunStarted(run, workflow).trimEnd());
|
|
3607
3710
|
return 0;
|
|
3608
3711
|
}
|
|
3609
|
-
async function
|
|
3712
|
+
async function ensureRemoteImproveBaseCandidate(args) {
|
|
3610
3713
|
if (args.candidateId) {
|
|
3611
|
-
const candidate = await
|
|
3714
|
+
const candidate = await readRemoteCandidateSummary(args.target, args.candidateId);
|
|
3612
3715
|
if (!candidate) {
|
|
3613
3716
|
throw new UsageError(`Base candidate ${args.candidateId} was not found for the current benchmark.`);
|
|
3614
3717
|
}
|
|
3615
|
-
if (
|
|
3718
|
+
if (remoteCandidateIsEvaluated(candidate)) {
|
|
3616
3719
|
return args.candidateId;
|
|
3617
3720
|
}
|
|
3618
3721
|
}
|
|
3619
3722
|
else {
|
|
3620
|
-
const activeCandidate = await
|
|
3723
|
+
const activeCandidate = await readEvaluatedActiveRemoteCandidate(args.target);
|
|
3621
3724
|
if (activeCandidate) {
|
|
3622
3725
|
return activeCandidate.id;
|
|
3623
3726
|
}
|
|
@@ -3625,31 +3728,33 @@ async function ensureHostedImproveBaseCandidate(args) {
|
|
|
3625
3728
|
const response = await apiRequest(projectApiPath(args.target.projectId, "/runs"), {
|
|
3626
3729
|
method: "POST",
|
|
3627
3730
|
body: {
|
|
3731
|
+
schema: "workbench.remote.run.request.v1",
|
|
3628
3732
|
workflow: "eval",
|
|
3629
3733
|
samples: args.samples,
|
|
3630
3734
|
...(args.candidateId ? { candidateId: args.candidateId } : {}),
|
|
3631
3735
|
sourceYaml: args.sourceYaml,
|
|
3736
|
+
...(args.candidateId ? {} : { candidateFiles: args.candidateFiles }),
|
|
3632
3737
|
...(args.adapterFiles.length > 0 ? { adapterFiles: args.adapterFiles } : {}),
|
|
3633
3738
|
},
|
|
3634
3739
|
}, args.target.baseUrl);
|
|
3635
|
-
const runTarget =
|
|
3636
|
-
const watched = await
|
|
3740
|
+
const runTarget = remoteTargetForRunStartResponse(args.target, response);
|
|
3741
|
+
const watched = await watchRemoteRun({
|
|
3637
3742
|
parsed: args.parsed,
|
|
3638
3743
|
target: runTarget,
|
|
3639
3744
|
runId: response.run.id,
|
|
3640
3745
|
intervalMs: args.intervalMs,
|
|
3641
3746
|
timeoutMs: args.timeoutMs,
|
|
3642
3747
|
});
|
|
3643
|
-
if (!
|
|
3748
|
+
if (!remoteRunSucceeded(watched)) {
|
|
3644
3749
|
throw new UsageError(`Parent candidate eval ${watched.id} failed; improve was not started.`);
|
|
3645
3750
|
}
|
|
3646
3751
|
if (!watched.candidateId) {
|
|
3647
3752
|
throw new UsageError(`Parent candidate eval ${watched.id} did not produce a candidate.`);
|
|
3648
3753
|
}
|
|
3649
|
-
await
|
|
3754
|
+
await tryImportTerminalRemoteProjectState({ target: runTarget, io: args.io });
|
|
3650
3755
|
return watched.candidateId;
|
|
3651
3756
|
}
|
|
3652
|
-
function
|
|
3757
|
+
function remoteWorkflowArgsForRun(args) {
|
|
3653
3758
|
const next = ["--dir", args.sourceDir, "--runs", args.runId, "--json"];
|
|
3654
3759
|
appendStringFlag(next, "benchmark", asOptionalString(args.parsed.flags.benchmark));
|
|
3655
3760
|
appendStringFlag(next, "candidate", asOptionalString(args.parsed.flags.candidate));
|
|
@@ -3672,27 +3777,27 @@ function appendStringFlag(args, name, value) {
|
|
|
3672
3777
|
args.push(`--${name}`, value);
|
|
3673
3778
|
}
|
|
3674
3779
|
}
|
|
3675
|
-
async function
|
|
3780
|
+
async function readRemoteCandidateSummary(target, candidateId) {
|
|
3676
3781
|
const response = await apiRequest(projectApiPath(target.projectId, "/candidates"), {}, target.baseUrl);
|
|
3677
3782
|
return response.candidates.find((entry) => entry.id === candidateId) ?? null;
|
|
3678
3783
|
}
|
|
3679
|
-
async function
|
|
3784
|
+
async function readEvaluatedActiveRemoteCandidate(target) {
|
|
3680
3785
|
const response = await apiRequest(projectApiPath(target.projectId), {}, target.baseUrl);
|
|
3681
3786
|
const activeCandidateId = response.benchmark.activeCandidateId;
|
|
3682
3787
|
if (!activeCandidateId) {
|
|
3683
3788
|
return null;
|
|
3684
3789
|
}
|
|
3685
|
-
const candidate = await
|
|
3686
|
-
return candidate &&
|
|
3790
|
+
const candidate = await readRemoteCandidateSummary(target, activeCandidateId);
|
|
3791
|
+
return candidate && remoteCandidateIsEvaluated(candidate) ? candidate : null;
|
|
3687
3792
|
}
|
|
3688
|
-
function
|
|
3793
|
+
function remoteCandidateIsEvaluated(candidate) {
|
|
3689
3794
|
return candidate.status === "evaluated" || candidate.eval != null;
|
|
3690
3795
|
}
|
|
3691
3796
|
async function openWorkbench(argv, io) {
|
|
3692
3797
|
const parsed = parseArgs(argv);
|
|
3693
3798
|
rejectUnknownFlags(parsed, new Set(["dir", "benchmark", "no-open", "json"]));
|
|
3694
3799
|
if (parsed.positionals.length > 1) {
|
|
3695
|
-
throw new UsageError(`Unexpected argument for workbench open --
|
|
3800
|
+
throw new UsageError(`Unexpected argument for workbench open --remote: ${parsed.positionals.slice(1).join(" ")}`);
|
|
3696
3801
|
}
|
|
3697
3802
|
const target = await resolveOpenTarget(parsed);
|
|
3698
3803
|
const ref = target.openRef;
|
|
@@ -3722,7 +3827,7 @@ function buildWorkbenchWebUrl(target, ref) {
|
|
|
3722
3827
|
}
|
|
3723
3828
|
return buildWorkbenchResourceUrls(target, { candidateId: ref }).candidateEvaluation;
|
|
3724
3829
|
}
|
|
3725
|
-
async function
|
|
3830
|
+
async function resolveRemoteTarget(parsed, options = {}) {
|
|
3726
3831
|
if (options.sourceArg !== undefined && parsed.flags.dir !== undefined) {
|
|
3727
3832
|
throw new UsageError("Use either --dir or SOURCE, not both.");
|
|
3728
3833
|
}
|
|
@@ -3731,7 +3836,7 @@ async function resolveHostedTarget(parsed, options = {}) {
|
|
|
3731
3836
|
: resolveDir(parsed, options.sourceArg);
|
|
3732
3837
|
const origin = await readWorkbenchOrigin(dir);
|
|
3733
3838
|
const explicitProject = asOptionalString(parsed.flags.benchmark);
|
|
3734
|
-
const baseUrl = await
|
|
3839
|
+
const baseUrl = await effectiveOriginBaseUrl(origin?.baseUrl);
|
|
3735
3840
|
if (explicitProject && (!isRemoteProjectId(explicitProject) || options.requireProjectIdentity === true)) {
|
|
3736
3841
|
const project = await resolveRemoteProject(explicitProject, baseUrl);
|
|
3737
3842
|
return {
|
|
@@ -3745,7 +3850,7 @@ async function resolveHostedTarget(parsed, options = {}) {
|
|
|
3745
3850
|
}
|
|
3746
3851
|
const projectId = explicitProject ?? origin?.projectId;
|
|
3747
3852
|
if (!projectId) {
|
|
3748
|
-
throw new UsageError("Missing
|
|
3853
|
+
throw new UsageError("Missing remote benchmark. Run workbench push, workbench clone, or pass --benchmark OWNER/BENCHMARK.");
|
|
3749
3854
|
}
|
|
3750
3855
|
const originRemote = origin ? parseOriginRemote(origin) : null;
|
|
3751
3856
|
return {
|
|
@@ -3759,7 +3864,7 @@ async function resolveHostedTarget(parsed, options = {}) {
|
|
|
3759
3864
|
origin,
|
|
3760
3865
|
};
|
|
3761
3866
|
}
|
|
3762
|
-
async function
|
|
3867
|
+
async function resolveRemoteDryRunTarget(parsed, options = {}) {
|
|
3763
3868
|
if (options.sourceArg !== undefined && parsed.flags.dir !== undefined) {
|
|
3764
3869
|
throw new UsageError("Use either --dir or SOURCE, not both.");
|
|
3765
3870
|
}
|
|
@@ -3768,7 +3873,7 @@ async function resolveHostedDryRunTarget(parsed, options = {}) {
|
|
|
3768
3873
|
: resolveDir(parsed, options.sourceArg);
|
|
3769
3874
|
const origin = await readWorkbenchOrigin(dir);
|
|
3770
3875
|
const explicitProject = asOptionalString(parsed.flags.benchmark);
|
|
3771
|
-
const baseUrl = await
|
|
3876
|
+
const baseUrl = await effectiveOriginBaseUrl(origin?.baseUrl);
|
|
3772
3877
|
if (explicitProject) {
|
|
3773
3878
|
if (isRemoteProjectId(explicitProject)) {
|
|
3774
3879
|
return {
|
|
@@ -3801,7 +3906,7 @@ async function resolveHostedDryRunTarget(parsed, options = {}) {
|
|
|
3801
3906
|
origin,
|
|
3802
3907
|
};
|
|
3803
3908
|
}
|
|
3804
|
-
throw new UsageError("Missing
|
|
3909
|
+
throw new UsageError("Missing remote benchmark. Run workbench push, workbench clone, or pass --benchmark OWNER/BENCHMARK.");
|
|
3805
3910
|
}
|
|
3806
3911
|
async function resolveOpenTarget(parsed) {
|
|
3807
3912
|
const ref = parsed.positionals[0];
|
|
@@ -3830,7 +3935,7 @@ async function resolveOpenTarget(parsed) {
|
|
|
3830
3935
|
};
|
|
3831
3936
|
}
|
|
3832
3937
|
return {
|
|
3833
|
-
...(await
|
|
3938
|
+
...(await resolveRemoteTarget(parsed, { requireProjectIdentity: true })),
|
|
3834
3939
|
...(ref ? { openRef: ref } : {}),
|
|
3835
3940
|
};
|
|
3836
3941
|
}
|
|
@@ -3901,7 +4006,7 @@ function withRunUrls(target, run) {
|
|
|
3901
4006
|
}),
|
|
3902
4007
|
};
|
|
3903
4008
|
}
|
|
3904
|
-
function
|
|
4009
|
+
function remoteTargetForRunStartResponse(target, response) {
|
|
3905
4010
|
const projectId = response.benchmark?.id ?? response.run.projectId ?? target.projectId;
|
|
3906
4011
|
if (projectId === target.projectId && !response.benchmark) {
|
|
3907
4012
|
return target;
|
|
@@ -3926,7 +4031,7 @@ function hostedTargetForRunStartResponse(target, response) {
|
|
|
3926
4031
|
}
|
|
3927
4032
|
return next;
|
|
3928
4033
|
}
|
|
3929
|
-
function
|
|
4034
|
+
function remoteRunEvaluationCandidateId(run, jobs = []) {
|
|
3930
4035
|
if (run.outputCandidateId) {
|
|
3931
4036
|
return run.outputCandidateId;
|
|
3932
4037
|
}
|
|
@@ -3986,7 +4091,7 @@ function runtimeBundleForProjectVisibility(runtime, visibility) {
|
|
|
3986
4091
|
};
|
|
3987
4092
|
}
|
|
3988
4093
|
function localProjectStateSource(source) {
|
|
3989
|
-
const request =
|
|
4094
|
+
const request = remoteProjectSourceRequest(source);
|
|
3990
4095
|
const stateSource = {
|
|
3991
4096
|
source: request.source,
|
|
3992
4097
|
files: source.sourceFiles.map((file) => ({ ...file })),
|
|
@@ -4014,7 +4119,7 @@ function toSurfaceSnapshotFile(file) {
|
|
|
4014
4119
|
executable: file.executable === true,
|
|
4015
4120
|
};
|
|
4016
4121
|
}
|
|
4017
|
-
function
|
|
4122
|
+
function remoteProjectSummaryFromState(state) {
|
|
4018
4123
|
return {
|
|
4019
4124
|
id: state.project.id,
|
|
4020
4125
|
ownerUsername: state.project.ownerUsername,
|
|
@@ -4025,12 +4130,12 @@ function hostedProjectSummaryFromState(state) {
|
|
|
4025
4130
|
function sourceFileCount(source) {
|
|
4026
4131
|
return source.sourceFiles.length;
|
|
4027
4132
|
}
|
|
4028
|
-
function
|
|
4029
|
-
const { network, resources } =
|
|
4133
|
+
function remoteProjectSourceRequest(source) {
|
|
4134
|
+
const { network, resources } = remoteEnvironmentOptions(source);
|
|
4030
4135
|
return {
|
|
4031
4136
|
source: source.specSource,
|
|
4032
4137
|
candidateFiles: source.candidateFiles,
|
|
4033
|
-
engineResolveFiles:
|
|
4138
|
+
engineResolveFiles: remoteEngineResolveFiles(source),
|
|
4034
4139
|
engineResolveBinding: engineResolveBindingForSpec(source.spec),
|
|
4035
4140
|
adapterFiles: source.adapterFiles,
|
|
4036
4141
|
dockerfile: source.dockerfile,
|
|
@@ -4043,7 +4148,7 @@ function hostedProjectSourceRequest(source) {
|
|
|
4043
4148
|
function isRemoteProjectId(value) {
|
|
4044
4149
|
return /^wb_[a-f0-9]{12}$/u.test(value);
|
|
4045
4150
|
}
|
|
4046
|
-
function
|
|
4151
|
+
function remoteEnvironmentOptions(source) {
|
|
4047
4152
|
return {
|
|
4048
4153
|
network: source.spec.environment.network?.egress === "open"
|
|
4049
4154
|
? "on"
|
|
@@ -4051,7 +4156,7 @@ function hostedEnvironmentOptions(source) {
|
|
|
4051
4156
|
resources: runtimeResources(source.spec.environment),
|
|
4052
4157
|
};
|
|
4053
4158
|
}
|
|
4054
|
-
async function
|
|
4159
|
+
async function watchRemoteRun(args) {
|
|
4055
4160
|
const deadline = args.timeoutMs === undefined ? undefined : Date.now() + args.timeoutMs;
|
|
4056
4161
|
let lastRun = null;
|
|
4057
4162
|
while (true) {
|
|
@@ -4079,7 +4184,7 @@ async function watchHostedRun(args) {
|
|
|
4079
4184
|
await sleep(args.intervalMs);
|
|
4080
4185
|
}
|
|
4081
4186
|
}
|
|
4082
|
-
function
|
|
4187
|
+
function formatRemoteRunResult(run) {
|
|
4083
4188
|
const candidateId = run.outputCandidateId ?? run.candidateId;
|
|
4084
4189
|
const activeDetail = run.activeCandidateId && candidateId && run.activeCandidateId !== candidateId
|
|
4085
4190
|
? `; active ${run.activeCandidateId}`
|
|
@@ -4097,7 +4202,7 @@ function formatRetryCommandResult(result) {
|
|
|
4097
4202
|
const runId = run?.id ?? result.runId ?? "unknown";
|
|
4098
4203
|
const scope = `${result.retried.kind} ${result.retried.id}`;
|
|
4099
4204
|
const verb = run
|
|
4100
|
-
? run.status === "finished" ? "finished as
|
|
4205
|
+
? run.status === "finished" ? "finished as remote run" : "started as remote run"
|
|
4101
4206
|
: "finished as local run";
|
|
4102
4207
|
return [
|
|
4103
4208
|
`Retry of ${scope} ${verb} ${runId}.`,
|
|
@@ -4113,7 +4218,7 @@ function formatRetryCommandResult(result) {
|
|
|
4113
4218
|
: result.urls?.benchmark ? [`Open benchmark: ${result.urls.benchmark}`] : []),
|
|
4114
4219
|
].join("\n");
|
|
4115
4220
|
}
|
|
4116
|
-
function
|
|
4221
|
+
function formatRemoteRunStarted(run, fallbackWorkflow) {
|
|
4117
4222
|
const candidateId = run.outputCandidateId ?? run.candidateId;
|
|
4118
4223
|
return [
|
|
4119
4224
|
`Started ${run.workflow ?? fallbackWorkflow} run ${run.id}; ${candidateId ? `candidate ${candidateId}` : `${run.jobCount ?? 0} jobs queued`}.`,
|
|
@@ -4124,10 +4229,7 @@ function formatHostedRunStarted(run, fallbackWorkflow) {
|
|
|
4124
4229
|
].join("\n");
|
|
4125
4230
|
}
|
|
4126
4231
|
function readRunJobPurpose(job) {
|
|
4127
|
-
|
|
4128
|
-
const execution = readRecord(input?.execution);
|
|
4129
|
-
const purpose = execution?.purpose;
|
|
4130
|
-
return typeof purpose === "string" && purpose ? purpose : null;
|
|
4232
|
+
return job.purpose && job.purpose.trim() ? job.purpose : null;
|
|
4131
4233
|
}
|
|
4132
4234
|
function readRecord(value) {
|
|
4133
4235
|
return value && typeof value === "object" && !Array.isArray(value)
|
|
@@ -4146,24 +4248,24 @@ function integerValue(value) {
|
|
|
4146
4248
|
function readFiniteNumber(value) {
|
|
4147
4249
|
return typeof value === "number" && Number.isFinite(value) ? value : null;
|
|
4148
4250
|
}
|
|
4149
|
-
async function
|
|
4150
|
-
if (
|
|
4251
|
+
async function withRemoteRunFailureSummary(target, run) {
|
|
4252
|
+
if (remoteRunSucceeded(run) || run.error || (run.failedJobCount ?? 0) <= 0) {
|
|
4151
4253
|
return run;
|
|
4152
4254
|
}
|
|
4153
|
-
const error = await
|
|
4255
|
+
const error = await readRemoteRunFailureSummary(target, run.id);
|
|
4154
4256
|
return error ? { ...run, error } : run;
|
|
4155
4257
|
}
|
|
4156
|
-
async function
|
|
4258
|
+
async function readRemoteRunFailureSummary(target, runId) {
|
|
4157
4259
|
try {
|
|
4158
|
-
const
|
|
4159
|
-
const failed =
|
|
4260
|
+
const detail = await readRemoteRunDetail(target, runId);
|
|
4261
|
+
const failed = detail.jobs.find((job) => job.status === "failed" && job.error);
|
|
4160
4262
|
return failed?.error ? `First failed job ${failed.id}: ${failed.error}` : null;
|
|
4161
4263
|
}
|
|
4162
4264
|
catch {
|
|
4163
4265
|
return null;
|
|
4164
4266
|
}
|
|
4165
4267
|
}
|
|
4166
|
-
function
|
|
4268
|
+
function remoteRunSucceeded(run) {
|
|
4167
4269
|
if (run.status !== "finished") {
|
|
4168
4270
|
return false;
|
|
4169
4271
|
}
|
|
@@ -4179,16 +4281,6 @@ async function readWorkbenchOrigin(dir) {
|
|
|
4179
4281
|
throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
|
|
4180
4282
|
}
|
|
4181
4283
|
const originRecord = parsed;
|
|
4182
|
-
const keys = Object.keys(originRecord).sort();
|
|
4183
|
-
const expectedKeys = [
|
|
4184
|
-
"baseUrl",
|
|
4185
|
-
"linkedAt",
|
|
4186
|
-
"projectId",
|
|
4187
|
-
"remote",
|
|
4188
|
-
"runtimeFingerprint",
|
|
4189
|
-
"sourceFingerprint",
|
|
4190
|
-
"sourceRevisionId",
|
|
4191
|
-
];
|
|
4192
4284
|
if (typeof originRecord.projectId !== "string" ||
|
|
4193
4285
|
typeof originRecord.baseUrl !== "string" ||
|
|
4194
4286
|
typeof originRecord.remote !== "string" ||
|
|
@@ -4202,9 +4294,6 @@ async function readWorkbenchOrigin(dir) {
|
|
|
4202
4294
|
originRecord.runtimeFingerprint.length === 0) {
|
|
4203
4295
|
throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
|
|
4204
4296
|
}
|
|
4205
|
-
if (JSON.stringify(keys) !== JSON.stringify(expectedKeys)) {
|
|
4206
|
-
throw new UsageError(`Workbench origin is malformed: ${workbenchOriginPath(dir)}`);
|
|
4207
|
-
}
|
|
4208
4297
|
return {
|
|
4209
4298
|
baseUrl: normalizeBaseUrl(originRecord.baseUrl),
|
|
4210
4299
|
remote: normalizeOriginRemote(originRecord.remote),
|
|
@@ -4254,7 +4343,7 @@ async function writeWorkbenchOriginFromState(dir, args) {
|
|
|
4254
4343
|
const runtimeFingerprint = args.state.base.runtimeFingerprint ??
|
|
4255
4344
|
workbenchRuntimeBundleFingerprint(args.state.runtime);
|
|
4256
4345
|
if (!sourceRevisionId || !sourceFingerprint || !runtimeFingerprint) {
|
|
4257
|
-
throw new UsageError("
|
|
4346
|
+
throw new UsageError("Remote project state is missing required origin metadata.");
|
|
4258
4347
|
}
|
|
4259
4348
|
return await writeWorkbenchOrigin(dir, {
|
|
4260
4349
|
baseUrl: args.baseUrl,
|
|
@@ -4290,18 +4379,29 @@ function originRemoteUrlParts(origin) {
|
|
|
4290
4379
|
function workbenchOriginPath(dir) {
|
|
4291
4380
|
return path.join(dir, ".workbench", "origin.json");
|
|
4292
4381
|
}
|
|
4293
|
-
async function effectiveBaseUrl(
|
|
4382
|
+
async function effectiveBaseUrl() {
|
|
4383
|
+
const config = await loadConfig();
|
|
4384
|
+
return selectWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
|
|
4385
|
+
}
|
|
4386
|
+
async function effectiveOriginBaseUrl(originBaseUrl) {
|
|
4294
4387
|
const config = await loadConfig();
|
|
4295
|
-
return
|
|
4296
|
-
|
|
4297
|
-
config.baseUrl
|
|
4388
|
+
return selectWorkbenchBaseUrl({
|
|
4389
|
+
originBaseUrl,
|
|
4390
|
+
configBaseUrl: config.baseUrl,
|
|
4391
|
+
});
|
|
4392
|
+
}
|
|
4393
|
+
function selectWorkbenchBaseUrl(input = {}) {
|
|
4394
|
+
return normalizeBaseUrl(input.explicitBaseUrl ??
|
|
4395
|
+
input.originBaseUrl ??
|
|
4396
|
+
process.env.WORKBENCH_API_URL ??
|
|
4397
|
+
input.configBaseUrl ??
|
|
4298
4398
|
DEFAULT_BASE_URL);
|
|
4299
4399
|
}
|
|
4300
4400
|
async function readWorkbenchProfileStatus(config) {
|
|
4301
4401
|
if (!config.accessToken) {
|
|
4302
4402
|
return { authenticated: false, profile: null };
|
|
4303
4403
|
}
|
|
4304
|
-
const baseUrl =
|
|
4404
|
+
const baseUrl = selectWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
|
|
4305
4405
|
try {
|
|
4306
4406
|
const response = await fetch(`${baseUrl}/api/workbench/profile`, {
|
|
4307
4407
|
headers: {
|
|
@@ -4327,10 +4427,9 @@ async function readWorkbenchProfileStatus(config) {
|
|
|
4327
4427
|
}
|
|
4328
4428
|
async function apiRequest(apiPath, options = {}, baseUrlOverride) {
|
|
4329
4429
|
const config = await loadConfig();
|
|
4330
|
-
const baseUrl =
|
|
4331
|
-
|
|
4332
|
-
config.baseUrl
|
|
4333
|
-
DEFAULT_BASE_URL);
|
|
4430
|
+
const baseUrl = baseUrlOverride !== undefined
|
|
4431
|
+
? normalizeBaseUrl(baseUrlOverride)
|
|
4432
|
+
: selectWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
|
|
4334
4433
|
const method = options.method ?? "GET";
|
|
4335
4434
|
const canRetry = method === "GET";
|
|
4336
4435
|
let lastError = null;
|
|
@@ -4868,6 +4967,24 @@ function formatMetricValue(value) {
|
|
|
4868
4967
|
}
|
|
4869
4968
|
return value.toFixed(2);
|
|
4870
4969
|
}
|
|
4970
|
+
function formatNullableMetric(value) {
|
|
4971
|
+
return typeof value === "number" && Number.isFinite(value)
|
|
4972
|
+
? formatMetricValue(value)
|
|
4973
|
+
: "n/a";
|
|
4974
|
+
}
|
|
4975
|
+
function formatFailureLine(failure) {
|
|
4976
|
+
return [
|
|
4977
|
+
failure.kind,
|
|
4978
|
+
failure.id,
|
|
4979
|
+
failure.status ?? "failed",
|
|
4980
|
+
failure.runId ? `run=${failure.runId}` : null,
|
|
4981
|
+
failure.candidateId ? `candidate=${failure.candidateId}` : null,
|
|
4982
|
+
failure.jobId ? `job=${failure.jobId}` : null,
|
|
4983
|
+
failure.caseId ? `case=${failure.caseId}` : null,
|
|
4984
|
+
typeof failure.sampleIndex === "number" ? `sample=${failure.sampleIndex}` : null,
|
|
4985
|
+
failure.error ?? null,
|
|
4986
|
+
].filter(Boolean).join("\t");
|
|
4987
|
+
}
|
|
4871
4988
|
function resolveDir(parsed, positionalDir) {
|
|
4872
4989
|
const resolved = path.resolve(asOptionalString(parsed.flags.dir) ?? positionalDir ?? process.cwd());
|
|
4873
4990
|
return isWorkbenchSourceYamlPath(resolved) ? path.dirname(resolved) : resolved;
|
|
@@ -4928,28 +5045,12 @@ async function resolveLocalProjectForExecution(workspace, source) {
|
|
|
4928
5045
|
};
|
|
4929
5046
|
}
|
|
4930
5047
|
function completedJobOutputFiles(job) {
|
|
4931
|
-
const output =
|
|
5048
|
+
const output = jsonRecord(job.output);
|
|
4932
5049
|
const files = Array.isArray(output.files)
|
|
4933
5050
|
? output.files.filter(isSurfaceSnapshotFile)
|
|
4934
5051
|
: [];
|
|
4935
5052
|
return normalizeSurfaceFiles(files);
|
|
4936
5053
|
}
|
|
4937
|
-
function asJsonRecord(value) {
|
|
4938
|
-
return value && typeof value === "object" && !Array.isArray(value)
|
|
4939
|
-
? value
|
|
4940
|
-
: {};
|
|
4941
|
-
}
|
|
4942
|
-
function isSurfaceSnapshotFile(value) {
|
|
4943
|
-
const record = asJsonRecord(value);
|
|
4944
|
-
return (typeof record.path === "string" &&
|
|
4945
|
-
typeof record.content === "string" &&
|
|
4946
|
-
(record.kind === undefined ||
|
|
4947
|
-
record.kind === "text" ||
|
|
4948
|
-
record.kind === "binary") &&
|
|
4949
|
-
(record.encoding === undefined ||
|
|
4950
|
-
record.encoding === "utf8" ||
|
|
4951
|
-
record.encoding === "base64"));
|
|
4952
|
-
}
|
|
4953
5054
|
function createLocalEvent(type, at, event) {
|
|
4954
5055
|
return {
|
|
4955
5056
|
id: `evt_${Math.random().toString(36).slice(2, 10)}_${Date.now().toString(36)}`,
|