@workbench-ai/workbench 0.0.73 → 0.0.75
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dev-open/client.css +0 -6
- package/dist/dev-open/client.js +113 -113
- package/dist/fanout.d.ts +13 -0
- package/dist/fanout.d.ts.map +1 -0
- package/dist/fanout.js +223 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +306 -229
- package/dist/install-targets.d.ts +54 -21
- package/dist/install-targets.d.ts.map +1 -1
- package/dist/install-targets.js +333 -118
- package/package.json +7 -6
package/dist/index.js
CHANGED
|
@@ -4,10 +4,11 @@ import { createRequire } from "node:module";
|
|
|
4
4
|
import os from "node:os";
|
|
5
5
|
import path from "node:path";
|
|
6
6
|
import { gzipSync } from "node:zlib";
|
|
7
|
-
import {
|
|
7
|
+
import { addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchInspectionSnapshot, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, prepareWorkbenchCloudEvalRequest, prepareWorkbenchCloudImproveRequest, publishWorkbenchVersion, removeWorkbenchAgent, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
|
|
8
8
|
import { normalizeWorkbenchSkillName } from "@workbench-ai/workbench-contract";
|
|
9
9
|
import { emitError, emitResult } from "./output.js";
|
|
10
|
-
import {
|
|
10
|
+
import { fanOutSkill, manualFanOutCommand } from "./fanout.js";
|
|
11
|
+
import { installedInventoryToJson, installSnapshotToStore, normalizeInstallSnapshotPath, readInstalledSkillsInventory, } from "./install-targets.js";
|
|
11
12
|
import { startWorkbenchOpenServer } from "./open-server.js";
|
|
12
13
|
const require = createRequire(import.meta.url);
|
|
13
14
|
const HELP = [
|
|
@@ -19,11 +20,11 @@ const HELP = [
|
|
|
19
20
|
"",
|
|
20
21
|
"Taught commands:",
|
|
21
22
|
" workbench new [DIR] [--json]",
|
|
22
|
-
" workbench eval [
|
|
23
|
-
" workbench improve [
|
|
23
|
+
" workbench eval [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
24
|
+
" workbench improve [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
24
25
|
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
25
26
|
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
|
|
26
|
-
" workbench install HANDLE_OR_URL
|
|
27
|
+
" workbench install [HANDLE_OR_URL] [--yes] [--dry-run] [--json]",
|
|
27
28
|
"",
|
|
28
29
|
"More:",
|
|
29
30
|
" workbench help --all",
|
|
@@ -32,11 +33,11 @@ const HELP_ALL = [
|
|
|
32
33
|
"Usage:",
|
|
33
34
|
" workbench # = workbench status",
|
|
34
35
|
" workbench new [DIR] [--json]",
|
|
35
|
-
" workbench eval [
|
|
36
|
+
" workbench eval [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
36
37
|
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
37
|
-
" workbench improve [
|
|
38
|
+
" workbench improve [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
38
39
|
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
|
|
39
|
-
" workbench install HANDLE_OR_URL
|
|
40
|
+
" workbench install [HANDLE_OR_URL] [--yes] [--dry-run] [--json]",
|
|
40
41
|
"",
|
|
41
42
|
"Inspect:",
|
|
42
43
|
" workbench status [--dir DIR] [--json]",
|
|
@@ -47,7 +48,6 @@ const HELP_ALL = [
|
|
|
47
48
|
" workbench open [--host HOST] [--port PORT] [--no-open]",
|
|
48
49
|
"",
|
|
49
50
|
"Configure:",
|
|
50
|
-
" workbench case add RUN_ID [--json]",
|
|
51
51
|
" workbench agent add NAME --adapter X [--model M] [--with k=v]... | list | rm NAME [--json]",
|
|
52
52
|
"",
|
|
53
53
|
"Share and auth:",
|
|
@@ -71,7 +71,7 @@ const COMMAND_HELP = {
|
|
|
71
71
|
].join("\n"),
|
|
72
72
|
eval: [
|
|
73
73
|
"Usage:",
|
|
74
|
-
" workbench eval [
|
|
74
|
+
" workbench eval [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
75
75
|
"",
|
|
76
76
|
"Runs eval jobs for the selected version, measured skills, and agents. Omitted selectors use manifest defaults.",
|
|
77
77
|
"",
|
|
@@ -80,7 +80,7 @@ const COMMAND_HELP = {
|
|
|
80
80
|
].join("\n"),
|
|
81
81
|
improve: [
|
|
82
82
|
"Usage:",
|
|
83
|
-
" workbench improve [
|
|
83
|
+
" workbench improve [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
84
84
|
"",
|
|
85
85
|
"Creates one improved child version from evidence. The selected skills and agents must resolve to exactly one entry each.",
|
|
86
86
|
"",
|
|
@@ -98,12 +98,12 @@ const COMMAND_HELP = {
|
|
|
98
98
|
].join("\n"),
|
|
99
99
|
install: [
|
|
100
100
|
"Usage:",
|
|
101
|
-
" workbench install HANDLE_OR_URL
|
|
101
|
+
" workbench install [HANDLE_OR_URL] [--yes] [--dry-run] [--json]",
|
|
102
102
|
"",
|
|
103
|
-
"Installs published
|
|
103
|
+
"Installs published source into the canonical machine skill store, or lists installed skills when no source is given.",
|
|
104
104
|
"",
|
|
105
105
|
"Example:",
|
|
106
|
-
" workbench install acme/earnings-prep
|
|
106
|
+
" workbench install acme/earnings-prep",
|
|
107
107
|
].join("\n"),
|
|
108
108
|
status: [
|
|
109
109
|
"Usage:",
|
|
@@ -169,15 +169,6 @@ const COMMAND_HELP = {
|
|
|
169
169
|
"Example:",
|
|
170
170
|
" workbench open --no-open",
|
|
171
171
|
].join("\n"),
|
|
172
|
-
case: [
|
|
173
|
-
"Usage:",
|
|
174
|
-
" workbench case add RUN_ID [--json]",
|
|
175
|
-
"",
|
|
176
|
-
"Captures a regression case from a recorded run.",
|
|
177
|
-
"",
|
|
178
|
-
"Example:",
|
|
179
|
-
" workbench case add run_abc12345",
|
|
180
|
-
].join("\n"),
|
|
181
172
|
agent: [
|
|
182
173
|
"Usage:",
|
|
183
174
|
" workbench agent list [--json]",
|
|
@@ -256,7 +247,7 @@ const COMMAND_FLAGS = {
|
|
|
256
247
|
samples: "positive-integer",
|
|
257
248
|
skills: "string",
|
|
258
249
|
},
|
|
259
|
-
install: { ...COMMON_FLAGS, ...HELP_FLAG, "dry-run": "boolean",
|
|
250
|
+
install: { ...COMMON_FLAGS, ...HELP_FLAG, "dry-run": "boolean", yes: "boolean" },
|
|
260
251
|
log: { ...PROJECT_FLAGS, ...HELP_FLAG, runs: "boolean", versions: "boolean" },
|
|
261
252
|
login: {
|
|
262
253
|
...COMMON_FLAGS,
|
|
@@ -290,11 +281,6 @@ const COMMAND_FLAGS = {
|
|
|
290
281
|
version: { ...COMMON_FLAGS, ...VERSION_FLAG },
|
|
291
282
|
};
|
|
292
283
|
const SUBCOMMAND_FLAGS = {
|
|
293
|
-
case: {
|
|
294
|
-
flags: {
|
|
295
|
-
add: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
296
|
-
},
|
|
297
|
-
},
|
|
298
284
|
agent: {
|
|
299
285
|
flags: {
|
|
300
286
|
list: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
@@ -345,12 +331,16 @@ export async function runCli(argv, io = {
|
|
|
345
331
|
return await handleStatus(parsed, io);
|
|
346
332
|
}
|
|
347
333
|
if (command === "eval") {
|
|
334
|
+
rejectExtraInput(parsed, {
|
|
335
|
+
maxPositionals: 1,
|
|
336
|
+
message: "workbench eval does not accept a VERSION argument.",
|
|
337
|
+
remediation: "Run workbench eval for current source, or use workbench show VERSION to inspect old source.",
|
|
338
|
+
});
|
|
348
339
|
if (parsed.flags.cloud === true) {
|
|
349
340
|
return await handleCloudEval(parsed, io);
|
|
350
341
|
}
|
|
351
342
|
const runs = await evalWorkbenchSkill({
|
|
352
343
|
...core,
|
|
353
|
-
version: optionalPositional(parsed, 1),
|
|
354
344
|
skill: stringFlag(parsed, "skills"),
|
|
355
345
|
agent: stringFlag(parsed, "agents"),
|
|
356
346
|
samples: intFlag(parsed, "samples"),
|
|
@@ -377,16 +367,18 @@ export async function runCli(argv, io = {
|
|
|
377
367
|
].filter(Boolean).join("\n"));
|
|
378
368
|
}
|
|
379
369
|
if (command === "improve") {
|
|
370
|
+
rejectExtraInput(parsed, {
|
|
371
|
+
maxPositionals: 1,
|
|
372
|
+
message: "workbench improve does not accept a VERSION argument.",
|
|
373
|
+
remediation: "Run workbench improve for current source after recording failed or reviewed eval evidence.",
|
|
374
|
+
});
|
|
380
375
|
if (parsed.flags.cloud === true) {
|
|
381
376
|
return await handleCloudImprove(parsed, io);
|
|
382
377
|
}
|
|
383
|
-
const improverAgent = await resolveLocalImproverAgent(parsed, core);
|
|
384
378
|
const result = await improveWorkbenchSkill({
|
|
385
379
|
...core,
|
|
386
|
-
version: optionalPositional(parsed, 1),
|
|
387
380
|
skill: stringFlag(parsed, "skills"),
|
|
388
381
|
agent: stringFlag(parsed, "agents"),
|
|
389
|
-
...(improverAgent ? { improverAgent } : {}),
|
|
390
382
|
budget: intFlag(parsed, "budget"),
|
|
391
383
|
samples: intFlag(parsed, "samples"),
|
|
392
384
|
});
|
|
@@ -423,9 +415,6 @@ export async function runCli(argv, io = {
|
|
|
423
415
|
if (command === "agent") {
|
|
424
416
|
return await handleAgent(parsed, io);
|
|
425
417
|
}
|
|
426
|
-
if (command === "case") {
|
|
427
|
-
return await handleCase(parsed, io);
|
|
428
|
-
}
|
|
429
418
|
if (command === "sync") {
|
|
430
419
|
const result = await syncWorkbenchRemote({
|
|
431
420
|
...core,
|
|
@@ -505,15 +494,17 @@ async function handleStatus(parsed, io) {
|
|
|
505
494
|
const core = await coreOptions(parsed);
|
|
506
495
|
const status = await workbenchStatusSnapshot(core);
|
|
507
496
|
const auth = await workbenchCliAuthStatus();
|
|
508
|
-
const
|
|
497
|
+
const machine = await workbenchMachineStatus(auth);
|
|
498
|
+
const cliStatus = await statusWithCausalNext(status, auth, core, machine);
|
|
509
499
|
return emitResult("workbench.status.v1", {
|
|
510
500
|
project: cliStatus.project,
|
|
511
501
|
worktree: cliStatus.worktree,
|
|
512
502
|
runs: cliStatus.runs,
|
|
513
503
|
remotes: cliStatus.remotes,
|
|
514
504
|
auth: auth,
|
|
505
|
+
machine: machine,
|
|
515
506
|
next: cliStatus.next,
|
|
516
|
-
}, parsed, io, () => formatStatusSnapshot({ ...cliStatus, auth }));
|
|
507
|
+
}, parsed, io, () => formatStatusSnapshot({ ...cliStatus, auth, machine }));
|
|
517
508
|
}
|
|
518
509
|
async function handleLog(parsed, io) {
|
|
519
510
|
if (parsed.flags.runs === true && parsed.flags.versions === true) {
|
|
@@ -584,7 +575,8 @@ async function handleShow(parsed, io) {
|
|
|
584
575
|
}
|
|
585
576
|
const trace = snapshotObjectByRef(snapshot.traces, objectRef, "trace");
|
|
586
577
|
if (trace) {
|
|
587
|
-
|
|
578
|
+
const files = trace.files.filter(isUserFacingTraceEvidenceFile);
|
|
579
|
+
return output(fileListing("trace", trace.id, files), parsed, io, () => formatFileListing("trace", trace.id, files));
|
|
588
580
|
}
|
|
589
581
|
const artifact = snapshotObjectByRef(snapshot.artifacts, objectRef, "artifact");
|
|
590
582
|
if (artifact) {
|
|
@@ -628,21 +620,6 @@ async function handleAgent(parsed, io) {
|
|
|
628
620
|
}
|
|
629
621
|
throw new WorkbenchUserError(`Unsupported agent command: ${subcommand}`);
|
|
630
622
|
}
|
|
631
|
-
async function handleCase(parsed, io) {
|
|
632
|
-
const subcommand = requiredPositional(parsed, 1, "workbench case requires add.");
|
|
633
|
-
if (subcommand === "add") {
|
|
634
|
-
const core = await coreOptions(parsed);
|
|
635
|
-
const sourceRef = requiredPositional(parsed, 2, "workbench case add requires RUN_ID.");
|
|
636
|
-
rejectExtraInput(parsed, {
|
|
637
|
-
maxPositionals: 3,
|
|
638
|
-
message: "workbench case add accepts one RUN_ID argument.",
|
|
639
|
-
remediation: "Run workbench case add RUN_ID.",
|
|
640
|
-
});
|
|
641
|
-
const record = await addWorkbenchCase({ ...core, fromTraceId: await traceIdForCaseSource(core, sourceRef) });
|
|
642
|
-
return output(record, parsed, io, () => `Added draft case ${record.id}. Edit .workbench/cases/${record.path}/case.yaml before using it as score evidence.`);
|
|
643
|
-
}
|
|
644
|
-
throw new WorkbenchUserError(`Unknown command: workbench case ${subcommand}`);
|
|
645
|
-
}
|
|
646
623
|
async function handleAdapterLogin(provider, parsed, io) {
|
|
647
624
|
const target = parseAuthTarget(provider, authProfileFlag(parsed));
|
|
648
625
|
const method = authMethod(parsed, target.adapterId);
|
|
@@ -862,67 +839,87 @@ async function handleLogout(parsed, io) {
|
|
|
862
839
|
if (tokenPresent) {
|
|
863
840
|
await writeConfig({ schema: CONFIG_SCHEMA, ...(baseUrl ? { baseUrl } : {}) });
|
|
864
841
|
}
|
|
865
|
-
const adapterStatuses = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).listStatus().catch(() => []);
|
|
866
|
-
const adapterAuthRetained = adapterStatuses.length > 0;
|
|
867
842
|
return emitResult("workbench.cli.logout.v1", {
|
|
868
843
|
...(baseUrl ? { baseUrl } : {}),
|
|
869
844
|
tokenPresent,
|
|
870
845
|
revoke,
|
|
871
846
|
configRemoved,
|
|
872
|
-
|
|
847
|
+
adapterAuth: "unchanged",
|
|
873
848
|
}, parsed, io, () => [
|
|
874
849
|
`Logged out of Workbench${baseUrl ? ` (${baseUrl})` : ""}.`,
|
|
875
850
|
`Token: ${tokenPresent ? "present" : "absent"}; revoke ${revoke}; config ${configRemoved ? "removed" : "unchanged"}.`,
|
|
876
|
-
|
|
877
|
-
? "Local adapter auth records were retained; run workbench logout PROVIDER to remove them."
|
|
878
|
-
: "No local adapter auth records remain.",
|
|
851
|
+
"Local adapter auth unchanged; run workbench logout PROVIDER to remove provider credentials.",
|
|
879
852
|
].join("\n"));
|
|
880
853
|
}
|
|
881
854
|
async function handleInstall(parsed, io) {
|
|
882
|
-
const sourceInput =
|
|
855
|
+
const sourceInput = optionalPositional(parsed, 1);
|
|
856
|
+
if (!sourceInput) {
|
|
857
|
+
rejectExtraInput(parsed, {
|
|
858
|
+
maxPositionals: 1,
|
|
859
|
+
message: "workbench install with no HANDLE_OR_URL lists installed skills.",
|
|
860
|
+
remediation: "Run workbench install OWNER/SKILL to install a published skill.",
|
|
861
|
+
});
|
|
862
|
+
if (parsed.flags.yes === true || parsed.flags["dry-run"] === true) {
|
|
863
|
+
throw new WorkbenchCodedError("usage", "workbench install inventory does not accept --yes or --dry-run.", {
|
|
864
|
+
remediation: "Run workbench install --json, or run workbench install OWNER/SKILL --dry-run.",
|
|
865
|
+
exitCode: 2,
|
|
866
|
+
});
|
|
867
|
+
}
|
|
868
|
+
const inventory = await readInstalledSkillsInventory({
|
|
869
|
+
includeUpdates: true,
|
|
870
|
+
lookupLatestVersion: latestInstallVersion,
|
|
871
|
+
});
|
|
872
|
+
return emitResult("workbench.cli.installed.v1", installedInventoryToJson(inventory), parsed, io, () => formatInstalledInventory(inventory));
|
|
873
|
+
}
|
|
883
874
|
rejectExtraInput(parsed, {
|
|
884
875
|
maxPositionals: 2,
|
|
885
876
|
message: "workbench install accepts one HANDLE_OR_URL argument.",
|
|
886
|
-
remediation: "Run workbench install OWNER/SKILL
|
|
877
|
+
remediation: "Run workbench install OWNER/SKILL.",
|
|
887
878
|
});
|
|
888
879
|
const source = await resolveWorkbenchInstallSourceInput(sourceInput);
|
|
889
880
|
const workbenchSource = parseWorkbenchInstallSource(source);
|
|
890
881
|
if (!workbenchSource) {
|
|
891
882
|
throw new WorkbenchCodedError("usage", "workbench install requires a Workbench Cloud source URL.", {
|
|
892
|
-
remediation: "Run workbench install OWNER/SKILL
|
|
883
|
+
remediation: "Run workbench install OWNER/SKILL.",
|
|
893
884
|
exitCode: 2,
|
|
894
885
|
});
|
|
895
886
|
}
|
|
896
887
|
const snapshot = await fetchWorkbenchInstallSourceSnapshot(workbenchSource, source);
|
|
897
888
|
const sourceSummary = workbenchInstallSourceSummary(workbenchSource, snapshot);
|
|
898
|
-
const
|
|
899
|
-
const toTargets = stringsFlag(parsed, "to");
|
|
900
|
-
const selectedTargets = toTargets.length > 0 ? normalizeInstallTargetNames(toTargets) : await defaultInstallTargetNames(config);
|
|
901
|
-
const targets = resolveInstallTargets({
|
|
902
|
-
agents: selectedTargets.filter((target) => target !== "local"),
|
|
903
|
-
local: selectedTargets.some((target) => target === "local"),
|
|
904
|
-
skillName: snapshot.name,
|
|
905
|
-
});
|
|
906
|
-
const result = await installSnapshotToTargets({
|
|
889
|
+
const result = await installSnapshotToStore({
|
|
907
890
|
snapshot,
|
|
908
|
-
targets,
|
|
909
891
|
overwrite: parsed.flags.yes === true,
|
|
910
892
|
dryRun: parsed.flags["dry-run"] === true,
|
|
893
|
+
provenance: {
|
|
894
|
+
handle: `${workbenchSource.owner}/${workbenchSource.skill}`,
|
|
895
|
+
versionId: snapshot.versionId,
|
|
896
|
+
baseUrl: workbenchSource.baseUrl,
|
|
897
|
+
},
|
|
911
898
|
});
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
899
|
+
const fanout = parsed.flags["dry-run"] === true
|
|
900
|
+
? skippedFanOut(result.directoryName, result.destination)
|
|
901
|
+
: await fanOutSkill(result.directoryName, { skillDir: result.destination });
|
|
902
|
+
const next = installNextCommand(fanout);
|
|
915
903
|
return emitResult("workbench.cli.install.v1", {
|
|
916
904
|
source: sourceSummary,
|
|
917
905
|
result: result.result,
|
|
918
|
-
|
|
906
|
+
store: result.store,
|
|
907
|
+
skill: result.directoryName,
|
|
908
|
+
destination: result.destination,
|
|
909
|
+
previous: result.previous,
|
|
919
910
|
filesCopied: result.filesCopied,
|
|
911
|
+
contentHash: result.contentHash,
|
|
912
|
+
provenancePath: result.provenancePath,
|
|
913
|
+
fanout: fanOutToJson(fanout),
|
|
914
|
+
next: next,
|
|
920
915
|
...(parsed.flags["dry-run"] === true ? { dryRun: true } : {}),
|
|
921
916
|
}, parsed, io, () => [
|
|
922
917
|
parsed.flags["dry-run"] === true
|
|
923
|
-
? `Would install ${
|
|
924
|
-
: `Installed ${
|
|
925
|
-
|
|
918
|
+
? `Would install ${result.directoryName} to ${result.destination}: filesCopied=${result.filesCopied}`
|
|
919
|
+
: `Installed ${result.directoryName}: ${result.result}`,
|
|
920
|
+
` machine\t${result.previous}\t${result.destination}`,
|
|
921
|
+
formatFanOut(fanout),
|
|
922
|
+
...(next ? [`next: ${next}`] : []),
|
|
926
923
|
].join("\n"));
|
|
927
924
|
}
|
|
928
925
|
async function handleCloudEval(parsed, io) {
|
|
@@ -1003,44 +1000,67 @@ async function handleCloudImprove(parsed, io) {
|
|
|
1003
1000
|
...(next ? [`next: ${next}`] : []),
|
|
1004
1001
|
].filter(Boolean).join("\n"));
|
|
1005
1002
|
}
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1003
|
+
function skippedFanOut(name, destination) {
|
|
1004
|
+
return {
|
|
1005
|
+
status: "skipped",
|
|
1006
|
+
command: manualFanOutCommand(destination, name),
|
|
1007
|
+
linkedAgents: [],
|
|
1008
|
+
reason: "dry-run",
|
|
1009
|
+
};
|
|
1010
|
+
}
|
|
1011
|
+
function installNextCommand(fanout) {
|
|
1012
|
+
return fanout.status === "failed" || (fanout.status === "skipped" && fanout.reason !== "dry-run")
|
|
1013
|
+
? fanout.command
|
|
1014
|
+
: null;
|
|
1015
|
+
}
|
|
1016
|
+
function fanOutToJson(fanout) {
|
|
1017
|
+
return {
|
|
1018
|
+
status: fanout.status,
|
|
1019
|
+
command: fanout.command,
|
|
1020
|
+
linkedAgents: fanout.linkedAgents,
|
|
1021
|
+
...(fanout.additionalAgents ? { additionalAgents: fanout.additionalAgents } : {}),
|
|
1022
|
+
...(fanout.reason ? { reason: fanout.reason } : {}),
|
|
1023
|
+
...(fanout.exitCode !== undefined ? { exitCode: fanout.exitCode } : {}),
|
|
1024
|
+
};
|
|
1025
|
+
}
|
|
1026
|
+
function formatFanOut(fanout) {
|
|
1027
|
+
if (fanout.status === "skipped") {
|
|
1028
|
+
return fanout.reason === "dry-run"
|
|
1029
|
+
? "fanout: planned"
|
|
1030
|
+
: `fanout skipped: ${fanout.reason ?? "not available"}`;
|
|
1009
1031
|
}
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
if (target.agent === "local") {
|
|
1013
|
-
continue;
|
|
1014
|
-
}
|
|
1015
|
-
const home = path.dirname(path.dirname(target.destination));
|
|
1016
|
-
if (await pathExists(home)) {
|
|
1017
|
-
detected.push(target.agent);
|
|
1018
|
-
}
|
|
1032
|
+
if (fanout.status === "failed") {
|
|
1033
|
+
return `fanout failed: ${fanout.reason ?? "unknown failure"}`;
|
|
1019
1034
|
}
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
function normalizeInstallTargetNames(values) {
|
|
1023
|
-
const normalized = [];
|
|
1024
|
-
for (const value of values) {
|
|
1025
|
-
const target = value.trim().toLowerCase();
|
|
1026
|
-
if (target !== "codex" && target !== "claude" && target !== "local") {
|
|
1027
|
-
throw new WorkbenchCodedError("usage", `Unsupported install target: ${value}`, {
|
|
1028
|
-
remediation: "Use --to codex, --to claude, or --to local.",
|
|
1029
|
-
exitCode: 2,
|
|
1030
|
-
});
|
|
1031
|
-
}
|
|
1032
|
-
normalized.push(target);
|
|
1035
|
+
if (fanout.linkedAgents.length === 0) {
|
|
1036
|
+
return "fanout: completed";
|
|
1033
1037
|
}
|
|
1034
|
-
|
|
1038
|
+
const suffix = fanout.additionalAgents ? ` and ${fanout.additionalAgents} more` : "";
|
|
1039
|
+
return `fanned out to: ${fanout.linkedAgents.join(", ")}${suffix}`;
|
|
1035
1040
|
}
|
|
1036
|
-
async function
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
return
|
|
1040
|
-
}
|
|
1041
|
-
catch {
|
|
1042
|
-
return false;
|
|
1041
|
+
async function latestInstallVersion(record) {
|
|
1042
|
+
const handle = normalizedOwnerSkillHandle(record.handle);
|
|
1043
|
+
if (!handle) {
|
|
1044
|
+
return undefined;
|
|
1043
1045
|
}
|
|
1046
|
+
const snapshot = await withTimeout(fetchWorkbenchInstallSourceSnapshot({
|
|
1047
|
+
baseUrl: record.baseUrl,
|
|
1048
|
+
owner: handle.owner,
|
|
1049
|
+
skill: handle.skill,
|
|
1050
|
+
}, record.handle), 3000);
|
|
1051
|
+
return snapshot.versionId;
|
|
1052
|
+
}
|
|
1053
|
+
function withTimeout(promise, timeoutMs) {
|
|
1054
|
+
return new Promise((resolve, reject) => {
|
|
1055
|
+
const timer = setTimeout(() => reject(new Error(`Timed out after ${timeoutMs}ms.`)), timeoutMs);
|
|
1056
|
+
promise.then((value) => {
|
|
1057
|
+
clearTimeout(timer);
|
|
1058
|
+
resolve(value);
|
|
1059
|
+
}, (error) => {
|
|
1060
|
+
clearTimeout(timer);
|
|
1061
|
+
reject(error);
|
|
1062
|
+
});
|
|
1063
|
+
});
|
|
1044
1064
|
}
|
|
1045
1065
|
async function startCloudExecution(command, parsed, io) {
|
|
1046
1066
|
const root = dirFlag(parsed) ?? process.cwd();
|
|
@@ -1062,12 +1082,23 @@ async function startCloudExecution(command, parsed, io) {
|
|
|
1062
1082
|
});
|
|
1063
1083
|
}
|
|
1064
1084
|
const core = { dir: root, authToken: token };
|
|
1065
|
-
|
|
1085
|
+
const request = command === "eval"
|
|
1086
|
+
? await prepareWorkbenchCloudEvalRequest({
|
|
1087
|
+
...core,
|
|
1088
|
+
skill: stringFlag(parsed, "skills"),
|
|
1089
|
+
agent: stringFlag(parsed, "agents"),
|
|
1090
|
+
samples: intFlag(parsed, "samples"),
|
|
1091
|
+
})
|
|
1092
|
+
: await prepareWorkbenchCloudImproveRequest({
|
|
1093
|
+
...core,
|
|
1094
|
+
skill: stringFlag(parsed, "skills"),
|
|
1095
|
+
agent: stringFlag(parsed, "agents"),
|
|
1096
|
+
samples: intFlag(parsed, "samples"),
|
|
1097
|
+
budget: intFlag(parsed, "budget"),
|
|
1098
|
+
});
|
|
1066
1099
|
const syncBefore = await syncWorkbenchRemote({ ...core, remote: remote.name });
|
|
1067
|
-
writeCloudProgress(io, `workbench cloud: synced ${remote.name} before hosted ${command} (pushed=${syncBefore.pushed}, pulled=${syncBefore.pulled}, up-to-date=${syncBefore.upToDate}).`, showProgress);
|
|
1068
|
-
const startSnapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
1069
1100
|
const skillId = await resolveCloudSkillId(source);
|
|
1070
|
-
const response = await apiRequest(`/api/workbench/skills/${encodeURIComponent(skillId)}${command === "improve" ? "/improve" : "/runs"}`, { method: "POST", body: cloudExecutionRequestBody(command,
|
|
1101
|
+
const response = await apiRequest(`/api/workbench/skills/${encodeURIComponent(skillId)}${command === "improve" ? "/improve" : "/runs"}`, { method: "POST", body: cloudExecutionRequestBody(command, request) }, source.baseUrl);
|
|
1071
1102
|
const runs = response.runs ?? [];
|
|
1072
1103
|
if (runs.length === 0) {
|
|
1073
1104
|
throw new WorkbenchCodedError("cloud_run_missing", `Workbench Cloud did not return a run for ${command}.`, {
|
|
@@ -1077,11 +1108,10 @@ async function startCloudExecution(command, parsed, io) {
|
|
|
1077
1108
|
exitCode: 1,
|
|
1078
1109
|
});
|
|
1079
1110
|
}
|
|
1111
|
+
const syncAfterSchedule = await syncWorkbenchRemote({ ...core, remote: remote.name });
|
|
1080
1112
|
const initialRunIds = runs.map((run) => run.id);
|
|
1081
1113
|
writeCloudProgress(io, `workbench cloud: scheduled hosted ${command} on ${remote.url} (${formatCloudRunStatuses(runs)}).`, showProgress);
|
|
1082
|
-
|
|
1083
|
-
writeCloudProgress(io, `workbench cloud: synced after scheduling hosted ${command} (pushed=${initialSyncAfter.pushed}, pulled=${initialSyncAfter.pulled}, up-to-date=${initialSyncAfter.upToDate}).`, showProgress);
|
|
1084
|
-
writeCloudProgress(io, `workbench cloud: waiting for terminal status; press Ctrl-C to detach and resume with workbench status or workbench show ${displayRef(initialRunIds[0] ?? "run")}.`, showProgress);
|
|
1114
|
+
writeCloudProgress(io, `workbench cloud: waiting for terminal status; press Ctrl-C to detach and resume with workbench show ${displayRef(initialRunIds[0] ?? "run")}.`, showProgress);
|
|
1085
1115
|
const completed = await waitForCloudRuns({
|
|
1086
1116
|
command,
|
|
1087
1117
|
core,
|
|
@@ -1089,7 +1119,9 @@ async function startCloudExecution(command, parsed, io) {
|
|
|
1089
1119
|
progress: showProgress,
|
|
1090
1120
|
remote,
|
|
1091
1121
|
runs,
|
|
1092
|
-
|
|
1122
|
+
source,
|
|
1123
|
+
skillId,
|
|
1124
|
+
initialSync: syncAfterSchedule,
|
|
1093
1125
|
});
|
|
1094
1126
|
return {
|
|
1095
1127
|
core,
|
|
@@ -1098,7 +1130,7 @@ async function startCloudExecution(command, parsed, io) {
|
|
|
1098
1130
|
initialRunIds,
|
|
1099
1131
|
runs: completed.runs,
|
|
1100
1132
|
...(completed.detached ? { detached: true } : {}),
|
|
1101
|
-
startVersionId:
|
|
1133
|
+
startVersionId: request.versionId,
|
|
1102
1134
|
source,
|
|
1103
1135
|
sync: {
|
|
1104
1136
|
before: { pushed: syncBefore.pushed, pulled: syncBefore.pulled, upToDate: syncBefore.upToDate },
|
|
@@ -1123,6 +1155,8 @@ async function waitForCloudRuns(input) {
|
|
|
1123
1155
|
const deadline = Date.now() + timeoutMs;
|
|
1124
1156
|
let runs = [...input.runs];
|
|
1125
1157
|
let interrupted = false;
|
|
1158
|
+
const startedAtMs = Date.now();
|
|
1159
|
+
let lastProgressAtMs = startedAtMs;
|
|
1126
1160
|
const onSigint = () => {
|
|
1127
1161
|
interrupted = true;
|
|
1128
1162
|
writeCloudProgress(input.io, `workbench cloud: detaching from hosted ${input.command} (${runIds.map(displayRef).join(", ")}).`, input.progress);
|
|
@@ -1131,25 +1165,28 @@ async function waitForCloudRuns(input) {
|
|
|
1131
1165
|
const seenStatuses = new Map();
|
|
1132
1166
|
try {
|
|
1133
1167
|
while (true) {
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
.filter((run) => Boolean(run));
|
|
1138
|
-
if (snapshotRuns.length > 0) {
|
|
1139
|
-
runs = runIds.map((id) => snapshotRuns.find((entry) => entry.id === id) ?? runs.find((entry) => entry.id === id))
|
|
1140
|
-
.filter((run) => Boolean(run));
|
|
1141
|
-
}
|
|
1168
|
+
runs = await fetchCloudRuns(input.source.baseUrl, input.skillId, runIds, runs);
|
|
1169
|
+
let wroteProgress = false;
|
|
1170
|
+
const nowMs = Date.now();
|
|
1142
1171
|
for (const run of runs) {
|
|
1143
1172
|
const previous = seenStatuses.get(run.id);
|
|
1144
1173
|
if (previous !== run.status) {
|
|
1145
1174
|
seenStatuses.set(run.id, run.status);
|
|
1146
|
-
writeCloudProgress(input.io, `workbench cloud: ${
|
|
1175
|
+
writeCloudProgress(input.io, `workbench cloud: ${formatCloudRunState(run, startedAtMs, nowMs)}.`, input.progress);
|
|
1176
|
+
wroteProgress = input.progress || wroteProgress;
|
|
1147
1177
|
}
|
|
1148
1178
|
}
|
|
1149
1179
|
if (runs.length === runIds.length && runs.every(isTerminalRun)) {
|
|
1150
|
-
|
|
1180
|
+
sync = await syncWorkbenchRemote({ ...input.core, remote: input.remote.name });
|
|
1151
1181
|
return { runs, sync };
|
|
1152
1182
|
}
|
|
1183
|
+
if (wroteProgress) {
|
|
1184
|
+
lastProgressAtMs = nowMs;
|
|
1185
|
+
}
|
|
1186
|
+
else if (input.progress && nowMs - lastProgressAtMs >= 60_000) {
|
|
1187
|
+
writeCloudProgress(input.io, `workbench cloud: still waiting (${formatCloudRunStates(runs, startedAtMs, nowMs)}).`);
|
|
1188
|
+
lastProgressAtMs = nowMs;
|
|
1189
|
+
}
|
|
1153
1190
|
if (interrupted) {
|
|
1154
1191
|
return { runs, sync, detached: true };
|
|
1155
1192
|
}
|
|
@@ -1168,14 +1205,18 @@ async function waitForCloudRuns(input) {
|
|
|
1168
1205
|
if (interrupted) {
|
|
1169
1206
|
return { runs, sync, detached: true };
|
|
1170
1207
|
}
|
|
1171
|
-
sync = await syncWorkbenchRemote({ ...input.core, remote: input.remote.name });
|
|
1172
|
-
writeCloudProgress(input.io, `workbench cloud: synced ${input.remote.name} while waiting (${formatCloudRunStatuses(runs)}).`, input.progress);
|
|
1173
1208
|
}
|
|
1174
1209
|
}
|
|
1175
1210
|
finally {
|
|
1176
1211
|
process.off("SIGINT", onSigint);
|
|
1177
1212
|
}
|
|
1178
1213
|
}
|
|
1214
|
+
async function fetchCloudRuns(baseUrl, skillId, runIds, fallback) {
|
|
1215
|
+
const responses = await Promise.all(runIds.map((runId) => apiRequest(`/api/workbench/skills/${encodeURIComponent(skillId)}/runs/${encodeURIComponent(runId)}`, {}, baseUrl)));
|
|
1216
|
+
return runIds
|
|
1217
|
+
.map((runId, index) => responses[index]?.run ?? fallback.find((run) => run.id === runId))
|
|
1218
|
+
.filter((run) => Boolean(run));
|
|
1219
|
+
}
|
|
1179
1220
|
function isTerminalRun(run) {
|
|
1180
1221
|
return run.status === "succeeded" || run.status === "failed" || run.status === "canceled";
|
|
1181
1222
|
}
|
|
@@ -1288,13 +1329,15 @@ async function resolveCloudSkillId(source) {
|
|
|
1288
1329
|
}
|
|
1289
1330
|
return skill.id;
|
|
1290
1331
|
}
|
|
1291
|
-
function cloudExecutionRequestBody(command,
|
|
1332
|
+
function cloudExecutionRequestBody(command, request) {
|
|
1292
1333
|
return {
|
|
1293
|
-
|
|
1294
|
-
skill:
|
|
1295
|
-
agent:
|
|
1296
|
-
samples:
|
|
1297
|
-
...(command === "improve" ? {
|
|
1334
|
+
versionId: request.versionId,
|
|
1335
|
+
skill: request.skill,
|
|
1336
|
+
agent: request.agent,
|
|
1337
|
+
samples: request.samples,
|
|
1338
|
+
...(command === "improve" ? {
|
|
1339
|
+
budget: request.budget,
|
|
1340
|
+
} : {}),
|
|
1298
1341
|
};
|
|
1299
1342
|
}
|
|
1300
1343
|
function cloudImproveNextCommand(runs) {
|
|
@@ -1335,6 +1378,17 @@ function formatCloudRunStatuses(runs) {
|
|
|
1335
1378
|
? runs.map((run) => `${displayRef(run.id)}:${run.status}`).join(", ")
|
|
1336
1379
|
: "no runs";
|
|
1337
1380
|
}
|
|
1381
|
+
function formatCloudRunStates(runs, startedAtMs, nowMs) {
|
|
1382
|
+
return runs.length > 0
|
|
1383
|
+
? runs.map((run) => formatCloudRunState(run, startedAtMs, nowMs)).join(", ")
|
|
1384
|
+
: `no runs (${elapsedSeconds(startedAtMs, nowMs)}s)`;
|
|
1385
|
+
}
|
|
1386
|
+
function formatCloudRunState(run, startedAtMs, nowMs) {
|
|
1387
|
+
return `${displayRef(run.id)} ${run.status} (${elapsedSeconds(startedAtMs, nowMs)}s)`;
|
|
1388
|
+
}
|
|
1389
|
+
function elapsedSeconds(startedAtMs, nowMs) {
|
|
1390
|
+
return Math.max(0, Math.floor((nowMs - startedAtMs) / 1000));
|
|
1391
|
+
}
|
|
1338
1392
|
function workbenchInstallSourceSummary(source, snapshot) {
|
|
1339
1393
|
const installUrl = `${source.baseUrl}/skills/${encodeURIComponent(source.owner)}/${encodeURIComponent(source.skill)}`;
|
|
1340
1394
|
return {
|
|
@@ -1495,7 +1549,6 @@ async function loadConfig() {
|
|
|
1495
1549
|
...(typeof parsed.baseUrl === "string" ? { baseUrl: normalizeBaseUrl(parsed.baseUrl) } : {}),
|
|
1496
1550
|
...(typeof parsed.accessToken === "string" ? { accessToken: parsed.accessToken } : {}),
|
|
1497
1551
|
...(typeof parsed.username === "string" ? { username: parsed.username } : {}),
|
|
1498
|
-
...(Array.isArray(parsed.installTargets) ? { installTargets: normalizeInstallTargetNames(parsed.installTargets.flatMap((entry) => typeof entry === "string" ? [entry] : [])) } : {}),
|
|
1499
1552
|
};
|
|
1500
1553
|
}
|
|
1501
1554
|
// Single resolver for the Workbench Cloud token used by every authenticated
|
|
@@ -2250,7 +2303,7 @@ function flagSpecForParsedPrefix(positionals, flags) {
|
|
|
2250
2303
|
return allowedFlagsForCommand({ positionals: [...positionals], flags: {} }, command);
|
|
2251
2304
|
}
|
|
2252
2305
|
function addFlag(flags, name, value) {
|
|
2253
|
-
if (name === "with"
|
|
2306
|
+
if (name === "with") {
|
|
2254
2307
|
const existing = flags[name];
|
|
2255
2308
|
flags[name] = Array.isArray(existing)
|
|
2256
2309
|
? [...existing, String(value)]
|
|
@@ -2274,14 +2327,6 @@ function stringFlag(parsed, name) {
|
|
|
2274
2327
|
const value = parsed.flags[name];
|
|
2275
2328
|
return typeof value === "string" ? value : undefined;
|
|
2276
2329
|
}
|
|
2277
|
-
function stringsFlag(parsed, name) {
|
|
2278
|
-
const value = parsed.flags[name];
|
|
2279
|
-
return Array.isArray(value)
|
|
2280
|
-
? value
|
|
2281
|
-
: typeof value === "string"
|
|
2282
|
-
? [value]
|
|
2283
|
-
: [];
|
|
2284
|
-
}
|
|
2285
2330
|
function intFlag(parsed, name) {
|
|
2286
2331
|
const value = stringFlag(parsed, name);
|
|
2287
2332
|
if (!value) {
|
|
@@ -2382,12 +2427,17 @@ async function previewPublishWithDerivedRemote(parsed) {
|
|
|
2382
2427
|
return {
|
|
2383
2428
|
remote,
|
|
2384
2429
|
version,
|
|
2385
|
-
visibility: parsePublishVisibilityFlags(parsed) ??
|
|
2430
|
+
visibility: parsePublishVisibilityFlags(parsed) ??
|
|
2431
|
+
normalizePublishVisibility(reconciledSnapshot.refs["publication/visibility"]) ??
|
|
2432
|
+
"private",
|
|
2386
2433
|
installHandle: installHandleFromCloudRemote(remote),
|
|
2387
2434
|
installUrl: remote.url,
|
|
2388
2435
|
pinnedInstallUrl: `${remote.url}/releases/${encodeURIComponent(version.id)}`,
|
|
2389
2436
|
};
|
|
2390
2437
|
}
|
|
2438
|
+
function normalizePublishVisibility(value) {
|
|
2439
|
+
return value === "private" || value === "internal" || value === "public" ? value : undefined;
|
|
2440
|
+
}
|
|
2391
2441
|
async function ensurePublishRemote(parsed) {
|
|
2392
2442
|
const core = await coreOptions(parsed);
|
|
2393
2443
|
const root = path.resolve(dirFlag(parsed) ?? process.cwd());
|
|
@@ -2460,7 +2510,7 @@ async function resolveWorkbenchInstallSourceInput(input) {
|
|
|
2460
2510
|
const handle = normalizedOwnerSkillHandle(input);
|
|
2461
2511
|
if (!handle) {
|
|
2462
2512
|
throw new WorkbenchCodedError("usage", "workbench install expects OWNER/SKILL or a Workbench Cloud skill URL.", {
|
|
2463
|
-
remediation: "Run workbench install OWNER/SKILL
|
|
2513
|
+
remediation: "Run workbench install OWNER/SKILL.",
|
|
2464
2514
|
exitCode: 2,
|
|
2465
2515
|
});
|
|
2466
2516
|
}
|
|
@@ -2611,11 +2661,25 @@ async function workbenchCliAuthStatus() {
|
|
|
2611
2661
|
})),
|
|
2612
2662
|
};
|
|
2613
2663
|
}
|
|
2664
|
+
async function workbenchMachineStatus(auth) {
|
|
2665
|
+
const inventory = await readInstalledSkillsInventory();
|
|
2666
|
+
return {
|
|
2667
|
+
installedSkillCount: inventory.skills.length,
|
|
2668
|
+
stores: inventory.stores,
|
|
2669
|
+
connectedProviders: auth.adapters
|
|
2670
|
+
.filter((entry) => entry.status === "connected")
|
|
2671
|
+
.map((entry) => ({
|
|
2672
|
+
adapter: entry.adapter,
|
|
2673
|
+
...(entry.slot ? { slot: entry.slot } : {}),
|
|
2674
|
+
profile: entry.profile,
|
|
2675
|
+
})),
|
|
2676
|
+
};
|
|
2677
|
+
}
|
|
2614
2678
|
function scoredRunValue(run) {
|
|
2615
|
-
return
|
|
2679
|
+
return typeof run.score === "number" ? run.score : undefined;
|
|
2616
2680
|
}
|
|
2617
2681
|
function scoredJobValue(job) {
|
|
2618
|
-
return
|
|
2682
|
+
return typeof job.score === "number" ? job.score : undefined;
|
|
2619
2683
|
}
|
|
2620
2684
|
function snapshotHasWorkflowCase(snapshot) {
|
|
2621
2685
|
const currentVersion = snapshotVersionByRef(snapshot, snapshot.status.currentVersionId ?? snapshot.refs.current ?? "");
|
|
@@ -2628,9 +2692,12 @@ function installHandleFromStatusRemote(remote) {
|
|
|
2628
2692
|
const source = parseWorkbenchInstallSource(publicationUrl ?? remote.url);
|
|
2629
2693
|
return source ? `${source.owner}/${source.skill}` : publicationUrl ?? remote.url;
|
|
2630
2694
|
}
|
|
2631
|
-
async function statusWithCausalNext(status, auth, core) {
|
|
2695
|
+
async function statusWithCausalNext(status, auth, core, machine) {
|
|
2632
2696
|
if (!status.project.initialized) {
|
|
2633
|
-
return
|
|
2697
|
+
return {
|
|
2698
|
+
...status,
|
|
2699
|
+
next: machine.installedSkillCount > 0 ? "workbench install" : status.next,
|
|
2700
|
+
};
|
|
2634
2701
|
}
|
|
2635
2702
|
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core).catch(() => null);
|
|
2636
2703
|
const lastRun = snapshot?.runs
|
|
@@ -2767,28 +2834,61 @@ function evidenceFilesForRunOrJob(snapshot, ref) {
|
|
|
2767
2834
|
}
|
|
2768
2835
|
const traceById = new Map(snapshot.traces.map((trace) => [trace.id, trace]));
|
|
2769
2836
|
const artifactById = new Map(snapshot.artifacts.map((artifact) => [artifact.id, artifact]));
|
|
2770
|
-
const
|
|
2837
|
+
const candidates = selection.jobs.flatMap((job) => [
|
|
2771
2838
|
...job.artifactIds.flatMap((artifactId) => {
|
|
2772
2839
|
const artifact = artifactById.get(artifactId);
|
|
2773
2840
|
return artifact
|
|
2774
|
-
? artifact.files.filter(isUserFacingEvidenceFile).map((file) =>
|
|
2841
|
+
? artifact.files.filter(isUserFacingEvidenceFile).map((file) => ({
|
|
2842
|
+
file: evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/${file.path}`),
|
|
2843
|
+
jobId: job.id,
|
|
2844
|
+
source: "artifact",
|
|
2845
|
+
}))
|
|
2775
2846
|
: [];
|
|
2776
2847
|
}),
|
|
2777
2848
|
...job.traceIds.flatMap((traceId) => {
|
|
2778
2849
|
const trace = traceById.get(traceId);
|
|
2779
2850
|
return trace
|
|
2780
|
-
? trace.files.filter(
|
|
2851
|
+
? trace.files.filter(isUserFacingTraceEvidenceFile).map((file) => ({
|
|
2852
|
+
file: evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/traces/${evidencePathSegment(trace.id)}/${file.path}`),
|
|
2853
|
+
jobId: job.id,
|
|
2854
|
+
source: "trace",
|
|
2855
|
+
}))
|
|
2781
2856
|
: [];
|
|
2782
2857
|
}),
|
|
2783
2858
|
]);
|
|
2859
|
+
return canonicalEvidenceFiles(candidates);
|
|
2860
|
+
}
|
|
2861
|
+
function canonicalEvidenceFiles(candidates) {
|
|
2784
2862
|
const seen = new Set();
|
|
2785
|
-
|
|
2863
|
+
const sameJobArtifactFiles = new Set();
|
|
2864
|
+
const files = [];
|
|
2865
|
+
for (const candidate of candidates) {
|
|
2866
|
+
const file = candidate.file;
|
|
2786
2867
|
if (seen.has(file.path)) {
|
|
2787
|
-
|
|
2868
|
+
continue;
|
|
2788
2869
|
}
|
|
2789
2870
|
seen.add(file.path);
|
|
2790
|
-
|
|
2791
|
-
|
|
2871
|
+
const equivalentKey = sameJobEquivalentEvidenceKey(candidate);
|
|
2872
|
+
if (candidate.source === "trace" && sameJobArtifactFiles.has(equivalentKey)) {
|
|
2873
|
+
continue;
|
|
2874
|
+
}
|
|
2875
|
+
if (candidate.source === "artifact") {
|
|
2876
|
+
sameJobArtifactFiles.add(equivalentKey);
|
|
2877
|
+
}
|
|
2878
|
+
files.push(file);
|
|
2879
|
+
}
|
|
2880
|
+
return files;
|
|
2881
|
+
}
|
|
2882
|
+
function sameJobEquivalentEvidenceKey(candidate) {
|
|
2883
|
+
const file = candidate.file;
|
|
2884
|
+
return [
|
|
2885
|
+
candidate.jobId,
|
|
2886
|
+
path.basename(file.path),
|
|
2887
|
+
file.kind ?? "text",
|
|
2888
|
+
file.encoding ?? "utf8",
|
|
2889
|
+
file.executable === true ? "1" : "0",
|
|
2890
|
+
file.content,
|
|
2891
|
+
].join("\0");
|
|
2792
2892
|
}
|
|
2793
2893
|
function evidenceFileWithPath(file, filePath) {
|
|
2794
2894
|
return {
|
|
@@ -2800,6 +2900,13 @@ function isUserFacingEvidenceFile(file) {
|
|
|
2800
2900
|
const normalized = file.path.replace(/\\/gu, "/").replace(/^\/+/u, "");
|
|
2801
2901
|
return normalized.split("/").every((segment) => segment !== ".workbench");
|
|
2802
2902
|
}
|
|
2903
|
+
function isUserFacingTraceEvidenceFile(file) {
|
|
2904
|
+
if (!isUserFacingEvidenceFile(file)) {
|
|
2905
|
+
return false;
|
|
2906
|
+
}
|
|
2907
|
+
const basename = path.basename(file.path.replace(/\\/gu, "/"));
|
|
2908
|
+
return basename !== "request.json" && basename !== "result.json" && basename !== "trace.json";
|
|
2909
|
+
}
|
|
2803
2910
|
function evidencePathSegment(value) {
|
|
2804
2911
|
return value.replace(/[^A-Za-z0-9._-]+/gu, "-") || "_";
|
|
2805
2912
|
}
|
|
@@ -2849,41 +2956,6 @@ function manifestOnly(value) {
|
|
|
2849
2956
|
}
|
|
2850
2957
|
return out;
|
|
2851
2958
|
}
|
|
2852
|
-
async function resolveLocalImproverAgent(parsed, core) {
|
|
2853
|
-
if (stringFlag(parsed, "agents")) {
|
|
2854
|
-
return undefined;
|
|
2855
|
-
}
|
|
2856
|
-
const agents = await listWorkbenchAgents(core).catch(() => []);
|
|
2857
|
-
const status = await workbenchStatusSnapshot(core).catch(() => undefined);
|
|
2858
|
-
const defaultAgentName = status?.project.defaultAgent ?? agents[0]?.name;
|
|
2859
|
-
const defaultAgent = agents.find((agent) => agent.name === defaultAgentName);
|
|
2860
|
-
if (defaultAgent && workbenchSkillImproveCanUseQueuedAdapter(defaultAgent)) {
|
|
2861
|
-
return undefined;
|
|
2862
|
-
}
|
|
2863
|
-
const connected = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).listStatus().catch(() => []);
|
|
2864
|
-
const candidates = connected
|
|
2865
|
-
.filter((entry) => entry.status === "connected" &&
|
|
2866
|
-
(entry.adapterId === "claude" || entry.adapterId === "codex"))
|
|
2867
|
-
.sort((left, right) => {
|
|
2868
|
-
const adapterRank = (adapter) => adapter === "claude" ? 0 : adapter === "codex" ? 1 : 2;
|
|
2869
|
-
return adapterRank(left.adapterId) - adapterRank(right.adapterId) ||
|
|
2870
|
-
(Date.parse(right.updatedAt ?? "") || 0) - (Date.parse(left.updatedAt ?? "") || 0);
|
|
2871
|
-
});
|
|
2872
|
-
const selected = candidates[0];
|
|
2873
|
-
if (!selected) {
|
|
2874
|
-
throw new WorkbenchCodedError("auth_required", "workbench improve needs a connected improver.", {
|
|
2875
|
-
remediation: "Run workbench login claude (or codex) to connect an improver.",
|
|
2876
|
-
exitCode: 1,
|
|
2877
|
-
});
|
|
2878
|
-
}
|
|
2879
|
-
return {
|
|
2880
|
-
name: selected.adapterId,
|
|
2881
|
-
adapter: selected.adapterId,
|
|
2882
|
-
config: {
|
|
2883
|
-
auth: selected.slot ? { [selected.slot]: selected.profile } : selected.profile,
|
|
2884
|
-
},
|
|
2885
|
-
};
|
|
2886
|
-
}
|
|
2887
2959
|
function formatLogEntry(entry) {
|
|
2888
2960
|
if (entry.kind === "version") {
|
|
2889
2961
|
return `${entry.createdAt}\tversion\t${displayRef(entry.id)}\tfiles=${entry.fileCount}\t${entry.message}`;
|
|
@@ -2948,17 +3020,6 @@ function findShowFile(files, requestedPath, objectRef) {
|
|
|
2948
3020
|
const candidates = normalized === "stderr.log"
|
|
2949
3021
|
? suffixCandidates.filter((file) => file.content.length > 0)
|
|
2950
3022
|
: suffixCandidates;
|
|
2951
|
-
const canonicalCandidates = candidates.filter(isCanonicalEvidenceFileCandidate);
|
|
2952
|
-
if (canonicalCandidates.length === 1) {
|
|
2953
|
-
return canonicalCandidates[0];
|
|
2954
|
-
}
|
|
2955
|
-
const equivalentCanonicalCandidate = singleEquivalentShowFile(canonicalCandidates);
|
|
2956
|
-
if (equivalentCanonicalCandidate) {
|
|
2957
|
-
return equivalentCanonicalCandidate;
|
|
2958
|
-
}
|
|
2959
|
-
if (canonicalCandidates.length > 1) {
|
|
2960
|
-
throw ambiguousShowPath(objectRef, requestedPath, canonicalCandidates);
|
|
2961
|
-
}
|
|
2962
3023
|
if (candidates.length === 1) {
|
|
2963
3024
|
return candidates[0];
|
|
2964
3025
|
}
|
|
@@ -2975,9 +3036,6 @@ function findShowFile(files, requestedPath, objectRef) {
|
|
|
2975
3036
|
}
|
|
2976
3037
|
throw ambiguousShowPath(objectRef, requestedPath, candidates.length > 0 ? candidates : suffixCandidates);
|
|
2977
3038
|
}
|
|
2978
|
-
function isCanonicalEvidenceFileCandidate(file) {
|
|
2979
|
-
return !file.path.includes("/traces/") && !file.path.includes("/artifacts/");
|
|
2980
|
-
}
|
|
2981
3039
|
function singleEquivalentShowFile(files) {
|
|
2982
3040
|
if (files.length <= 1) {
|
|
2983
3041
|
return null;
|
|
@@ -3006,23 +3064,6 @@ function fileListing(kind, id, files) {
|
|
|
3006
3064
|
function formatFileListing(kind, id, files) {
|
|
3007
3065
|
return [`${kind}\t${displayRef(id)}\tfiles=${files.length}`, ...files.map((file) => file.path)].join("\n");
|
|
3008
3066
|
}
|
|
3009
|
-
async function traceIdForCaseSource(core, ref) {
|
|
3010
|
-
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
3011
|
-
const trace = snapshotObjectByRef(snapshot.traces, ref, "trace");
|
|
3012
|
-
if (trace) {
|
|
3013
|
-
return trace.id;
|
|
3014
|
-
}
|
|
3015
|
-
const selection = runOrJobEvidenceSelection(snapshot, ref);
|
|
3016
|
-
const traceId = selection.run?.traceIds[0] ?? selection.jobs[0]?.traceIds[0];
|
|
3017
|
-
if (traceId) {
|
|
3018
|
-
return traceId;
|
|
3019
|
-
}
|
|
3020
|
-
throw new WorkbenchCodedError("ref_not_found", `Run, job, or trace not found: ${ref}`, {
|
|
3021
|
-
remediation: "Run workbench log, then workbench case add RUN_ID.",
|
|
3022
|
-
subject: { ref },
|
|
3023
|
-
exitCode: 1,
|
|
3024
|
-
});
|
|
3025
|
-
}
|
|
3026
3067
|
async function evalCoverageSummaries(core, runs) {
|
|
3027
3068
|
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
3028
3069
|
const jobsByRun = new Map();
|
|
@@ -3101,6 +3142,12 @@ function formatStatusSnapshot(status) {
|
|
|
3101
3142
|
const lines = [
|
|
3102
3143
|
`Root: ${status.project.root}`,
|
|
3103
3144
|
`Initialized: ${status.project.initialized ? "yes" : "no"}`,
|
|
3145
|
+
...(status.project.initialized ? [] : [
|
|
3146
|
+
`Installed skills: ${status.machine?.installedSkillCount ?? 0}`,
|
|
3147
|
+
`Connected providers: ${status.machine?.connectedProviders.length
|
|
3148
|
+
? status.machine.connectedProviders.map((entry) => `${entry.adapter}/${entry.profile}`).join(", ")
|
|
3149
|
+
: "none"}`,
|
|
3150
|
+
]),
|
|
3104
3151
|
...(status.project.currentVersionId ? [`Current version: ${displayRef(status.project.currentVersionId)}`] : []),
|
|
3105
3152
|
...(status.project.defaultSkill ? [`Default skill: ${status.project.defaultSkill}`] : []),
|
|
3106
3153
|
...(status.project.defaultAgent ? [`Default agent: ${status.project.defaultAgent}`] : []),
|
|
@@ -3130,6 +3177,36 @@ function formatStatusSnapshot(status) {
|
|
|
3130
3177
|
];
|
|
3131
3178
|
return lines.join("\n");
|
|
3132
3179
|
}
|
|
3180
|
+
function formatInstalledInventory(inventory) {
|
|
3181
|
+
if (inventory.skills.length === 0) {
|
|
3182
|
+
return [
|
|
3183
|
+
"No skills installed.",
|
|
3184
|
+
...(inventory.next ? [`next: ${inventory.next}`] : []),
|
|
3185
|
+
].join("\n");
|
|
3186
|
+
}
|
|
3187
|
+
const lines = [
|
|
3188
|
+
"store\tname\tversion\tstatus\tsource",
|
|
3189
|
+
...inventory.skills.map(formatInstalledSkill),
|
|
3190
|
+
...(inventory.next ? [`next: ${inventory.next}`] : []),
|
|
3191
|
+
];
|
|
3192
|
+
return lines.join("\n");
|
|
3193
|
+
}
|
|
3194
|
+
function formatInstalledSkill(skill) {
|
|
3195
|
+
return [
|
|
3196
|
+
skill.store,
|
|
3197
|
+
skill.name,
|
|
3198
|
+
skill.versionId ? shortInstalledVersion(skill.versionId) : "-",
|
|
3199
|
+
skill.status,
|
|
3200
|
+
skill.handle ?? "(no provenance)",
|
|
3201
|
+
].join("\t");
|
|
3202
|
+
}
|
|
3203
|
+
function shortInstalledVersion(versionId) {
|
|
3204
|
+
return versionId.startsWith("v_") && versionId.length > 10
|
|
3205
|
+
? displayRef(versionId)
|
|
3206
|
+
: versionId.length > 12
|
|
3207
|
+
? versionId.slice(0, 12)
|
|
3208
|
+
: versionId;
|
|
3209
|
+
}
|
|
3133
3210
|
function formatVersion(version) {
|
|
3134
3211
|
return `${displayRef(version.id)}\t${version.hash.slice(0, 12)}\t${version.message}`;
|
|
3135
3212
|
}
|