@workbench-ai/workbench 0.0.72 → 0.0.74
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/fanout.d.ts +13 -0
- package/dist/fanout.d.ts.map +1 -0
- package/dist/fanout.js +212 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +326 -184
- package/dist/install-targets.d.ts +54 -21
- package/dist/install-targets.d.ts.map +1 -1
- package/dist/install-targets.js +333 -118
- package/package.json +7 -6
package/dist/index.js
CHANGED
|
@@ -4,10 +4,11 @@ import { createRequire } from "node:module";
|
|
|
4
4
|
import os from "node:os";
|
|
5
5
|
import path from "node:path";
|
|
6
6
|
import { gzipSync } from "node:zlib";
|
|
7
|
-
import {
|
|
7
|
+
import { addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchInspectionSnapshot, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, prepareWorkbenchCloudEvalRequest, prepareWorkbenchCloudImproveRequest, publishWorkbenchVersion, removeWorkbenchAgent, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchSkillImproveCanUseQueuedAdapter, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
|
|
8
8
|
import { normalizeWorkbenchSkillName } from "@workbench-ai/workbench-contract";
|
|
9
9
|
import { emitError, emitResult } from "./output.js";
|
|
10
|
-
import {
|
|
10
|
+
import { fanOutSkill, manualFanOutCommand } from "./fanout.js";
|
|
11
|
+
import { installedInventoryToJson, installSnapshotToStore, normalizeInstallSnapshotPath, readInstalledSkillsInventory, } from "./install-targets.js";
|
|
11
12
|
import { startWorkbenchOpenServer } from "./open-server.js";
|
|
12
13
|
const require = createRequire(import.meta.url);
|
|
13
14
|
const HELP = [
|
|
@@ -19,11 +20,11 @@ const HELP = [
|
|
|
19
20
|
"",
|
|
20
21
|
"Taught commands:",
|
|
21
22
|
" workbench new [DIR] [--json]",
|
|
22
|
-
" workbench eval [
|
|
23
|
-
" workbench improve [
|
|
23
|
+
" workbench eval [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
24
|
+
" workbench improve [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
24
25
|
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
25
26
|
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
|
|
26
|
-
" workbench install HANDLE_OR_URL
|
|
27
|
+
" workbench install [HANDLE_OR_URL] [--yes] [--dry-run] [--json]",
|
|
27
28
|
"",
|
|
28
29
|
"More:",
|
|
29
30
|
" workbench help --all",
|
|
@@ -32,11 +33,11 @@ const HELP_ALL = [
|
|
|
32
33
|
"Usage:",
|
|
33
34
|
" workbench # = workbench status",
|
|
34
35
|
" workbench new [DIR] [--json]",
|
|
35
|
-
" workbench eval [
|
|
36
|
+
" workbench eval [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
36
37
|
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
37
|
-
" workbench improve [
|
|
38
|
+
" workbench improve [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
38
39
|
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
|
|
39
|
-
" workbench install HANDLE_OR_URL
|
|
40
|
+
" workbench install [HANDLE_OR_URL] [--yes] [--dry-run] [--json]",
|
|
40
41
|
"",
|
|
41
42
|
"Inspect:",
|
|
42
43
|
" workbench status [--dir DIR] [--json]",
|
|
@@ -47,7 +48,6 @@ const HELP_ALL = [
|
|
|
47
48
|
" workbench open [--host HOST] [--port PORT] [--no-open]",
|
|
48
49
|
"",
|
|
49
50
|
"Configure:",
|
|
50
|
-
" workbench case add RUN_ID [--json]",
|
|
51
51
|
" workbench agent add NAME --adapter X [--model M] [--with k=v]... | list | rm NAME [--json]",
|
|
52
52
|
"",
|
|
53
53
|
"Share and auth:",
|
|
@@ -71,7 +71,7 @@ const COMMAND_HELP = {
|
|
|
71
71
|
].join("\n"),
|
|
72
72
|
eval: [
|
|
73
73
|
"Usage:",
|
|
74
|
-
" workbench eval [
|
|
74
|
+
" workbench eval [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
75
75
|
"",
|
|
76
76
|
"Runs eval jobs for the selected version, measured skills, and agents. Omitted selectors use manifest defaults.",
|
|
77
77
|
"",
|
|
@@ -80,7 +80,7 @@ const COMMAND_HELP = {
|
|
|
80
80
|
].join("\n"),
|
|
81
81
|
improve: [
|
|
82
82
|
"Usage:",
|
|
83
|
-
" workbench improve [
|
|
83
|
+
" workbench improve [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
84
84
|
"",
|
|
85
85
|
"Creates one improved child version from evidence. The selected skills and agents must resolve to exactly one entry each.",
|
|
86
86
|
"",
|
|
@@ -98,12 +98,12 @@ const COMMAND_HELP = {
|
|
|
98
98
|
].join("\n"),
|
|
99
99
|
install: [
|
|
100
100
|
"Usage:",
|
|
101
|
-
" workbench install HANDLE_OR_URL
|
|
101
|
+
" workbench install [HANDLE_OR_URL] [--yes] [--dry-run] [--json]",
|
|
102
102
|
"",
|
|
103
|
-
"Installs published
|
|
103
|
+
"Installs published source into the canonical machine skill store, or lists installed skills when no source is given.",
|
|
104
104
|
"",
|
|
105
105
|
"Example:",
|
|
106
|
-
" workbench install acme/earnings-prep
|
|
106
|
+
" workbench install acme/earnings-prep",
|
|
107
107
|
].join("\n"),
|
|
108
108
|
status: [
|
|
109
109
|
"Usage:",
|
|
@@ -169,15 +169,6 @@ const COMMAND_HELP = {
|
|
|
169
169
|
"Example:",
|
|
170
170
|
" workbench open --no-open",
|
|
171
171
|
].join("\n"),
|
|
172
|
-
case: [
|
|
173
|
-
"Usage:",
|
|
174
|
-
" workbench case add RUN_ID [--json]",
|
|
175
|
-
"",
|
|
176
|
-
"Captures a regression case from a recorded run.",
|
|
177
|
-
"",
|
|
178
|
-
"Example:",
|
|
179
|
-
" workbench case add run_abc12345",
|
|
180
|
-
].join("\n"),
|
|
181
172
|
agent: [
|
|
182
173
|
"Usage:",
|
|
183
174
|
" workbench agent list [--json]",
|
|
@@ -256,7 +247,7 @@ const COMMAND_FLAGS = {
|
|
|
256
247
|
samples: "positive-integer",
|
|
257
248
|
skills: "string",
|
|
258
249
|
},
|
|
259
|
-
install: { ...COMMON_FLAGS, ...HELP_FLAG, "dry-run": "boolean",
|
|
250
|
+
install: { ...COMMON_FLAGS, ...HELP_FLAG, "dry-run": "boolean", yes: "boolean" },
|
|
260
251
|
log: { ...PROJECT_FLAGS, ...HELP_FLAG, runs: "boolean", versions: "boolean" },
|
|
261
252
|
login: {
|
|
262
253
|
...COMMON_FLAGS,
|
|
@@ -290,11 +281,6 @@ const COMMAND_FLAGS = {
|
|
|
290
281
|
version: { ...COMMON_FLAGS, ...VERSION_FLAG },
|
|
291
282
|
};
|
|
292
283
|
const SUBCOMMAND_FLAGS = {
|
|
293
|
-
case: {
|
|
294
|
-
flags: {
|
|
295
|
-
add: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
296
|
-
},
|
|
297
|
-
},
|
|
298
284
|
agent: {
|
|
299
285
|
flags: {
|
|
300
286
|
list: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
@@ -345,12 +331,16 @@ export async function runCli(argv, io = {
|
|
|
345
331
|
return await handleStatus(parsed, io);
|
|
346
332
|
}
|
|
347
333
|
if (command === "eval") {
|
|
334
|
+
rejectExtraInput(parsed, {
|
|
335
|
+
maxPositionals: 1,
|
|
336
|
+
message: "workbench eval does not accept a VERSION argument.",
|
|
337
|
+
remediation: "Run workbench eval for current source, or use workbench show VERSION to inspect old source.",
|
|
338
|
+
});
|
|
348
339
|
if (parsed.flags.cloud === true) {
|
|
349
340
|
return await handleCloudEval(parsed, io);
|
|
350
341
|
}
|
|
351
342
|
const runs = await evalWorkbenchSkill({
|
|
352
343
|
...core,
|
|
353
|
-
version: optionalPositional(parsed, 1),
|
|
354
344
|
skill: stringFlag(parsed, "skills"),
|
|
355
345
|
agent: stringFlag(parsed, "agents"),
|
|
356
346
|
samples: intFlag(parsed, "samples"),
|
|
@@ -377,13 +367,17 @@ export async function runCli(argv, io = {
|
|
|
377
367
|
].filter(Boolean).join("\n"));
|
|
378
368
|
}
|
|
379
369
|
if (command === "improve") {
|
|
370
|
+
rejectExtraInput(parsed, {
|
|
371
|
+
maxPositionals: 1,
|
|
372
|
+
message: "workbench improve does not accept a VERSION argument.",
|
|
373
|
+
remediation: "Run workbench improve for current source after recording failed or reviewed eval evidence.",
|
|
374
|
+
});
|
|
380
375
|
if (parsed.flags.cloud === true) {
|
|
381
376
|
return await handleCloudImprove(parsed, io);
|
|
382
377
|
}
|
|
383
378
|
const improverAgent = await resolveLocalImproverAgent(parsed, core);
|
|
384
379
|
const result = await improveWorkbenchSkill({
|
|
385
380
|
...core,
|
|
386
|
-
version: optionalPositional(parsed, 1),
|
|
387
381
|
skill: stringFlag(parsed, "skills"),
|
|
388
382
|
agent: stringFlag(parsed, "agents"),
|
|
389
383
|
...(improverAgent ? { improverAgent } : {}),
|
|
@@ -423,9 +417,6 @@ export async function runCli(argv, io = {
|
|
|
423
417
|
if (command === "agent") {
|
|
424
418
|
return await handleAgent(parsed, io);
|
|
425
419
|
}
|
|
426
|
-
if (command === "case") {
|
|
427
|
-
return await handleCase(parsed, io);
|
|
428
|
-
}
|
|
429
420
|
if (command === "sync") {
|
|
430
421
|
const result = await syncWorkbenchRemote({
|
|
431
422
|
...core,
|
|
@@ -505,15 +496,17 @@ async function handleStatus(parsed, io) {
|
|
|
505
496
|
const core = await coreOptions(parsed);
|
|
506
497
|
const status = await workbenchStatusSnapshot(core);
|
|
507
498
|
const auth = await workbenchCliAuthStatus();
|
|
508
|
-
const
|
|
499
|
+
const machine = await workbenchMachineStatus(auth);
|
|
500
|
+
const cliStatus = await statusWithCausalNext(status, auth, core, machine);
|
|
509
501
|
return emitResult("workbench.status.v1", {
|
|
510
502
|
project: cliStatus.project,
|
|
511
503
|
worktree: cliStatus.worktree,
|
|
512
504
|
runs: cliStatus.runs,
|
|
513
505
|
remotes: cliStatus.remotes,
|
|
514
506
|
auth: auth,
|
|
507
|
+
machine: machine,
|
|
515
508
|
next: cliStatus.next,
|
|
516
|
-
}, parsed, io, () => formatStatusSnapshot({ ...cliStatus, auth }));
|
|
509
|
+
}, parsed, io, () => formatStatusSnapshot({ ...cliStatus, auth, machine }));
|
|
517
510
|
}
|
|
518
511
|
async function handleLog(parsed, io) {
|
|
519
512
|
if (parsed.flags.runs === true && parsed.flags.versions === true) {
|
|
@@ -584,7 +577,8 @@ async function handleShow(parsed, io) {
|
|
|
584
577
|
}
|
|
585
578
|
const trace = snapshotObjectByRef(snapshot.traces, objectRef, "trace");
|
|
586
579
|
if (trace) {
|
|
587
|
-
|
|
580
|
+
const files = trace.files.filter(isUserFacingTraceEvidenceFile);
|
|
581
|
+
return output(fileListing("trace", trace.id, files), parsed, io, () => formatFileListing("trace", trace.id, files));
|
|
588
582
|
}
|
|
589
583
|
const artifact = snapshotObjectByRef(snapshot.artifacts, objectRef, "artifact");
|
|
590
584
|
if (artifact) {
|
|
@@ -628,21 +622,6 @@ async function handleAgent(parsed, io) {
|
|
|
628
622
|
}
|
|
629
623
|
throw new WorkbenchUserError(`Unsupported agent command: ${subcommand}`);
|
|
630
624
|
}
|
|
631
|
-
async function handleCase(parsed, io) {
|
|
632
|
-
const subcommand = requiredPositional(parsed, 1, "workbench case requires add.");
|
|
633
|
-
if (subcommand === "add") {
|
|
634
|
-
const core = await coreOptions(parsed);
|
|
635
|
-
const sourceRef = requiredPositional(parsed, 2, "workbench case add requires RUN_ID.");
|
|
636
|
-
rejectExtraInput(parsed, {
|
|
637
|
-
maxPositionals: 3,
|
|
638
|
-
message: "workbench case add accepts one RUN_ID argument.",
|
|
639
|
-
remediation: "Run workbench case add RUN_ID.",
|
|
640
|
-
});
|
|
641
|
-
const record = await addWorkbenchCase({ ...core, fromTraceId: await traceIdForCaseSource(core, sourceRef) });
|
|
642
|
-
return output(record, parsed, io, () => `Added draft case ${record.id}. Edit .workbench/cases/${record.path}/case.yaml before using it as score evidence.`);
|
|
643
|
-
}
|
|
644
|
-
throw new WorkbenchUserError(`Unknown command: workbench case ${subcommand}`);
|
|
645
|
-
}
|
|
646
625
|
async function handleAdapterLogin(provider, parsed, io) {
|
|
647
626
|
const target = parseAuthTarget(provider, authProfileFlag(parsed));
|
|
648
627
|
const method = authMethod(parsed, target.adapterId);
|
|
@@ -862,67 +841,87 @@ async function handleLogout(parsed, io) {
|
|
|
862
841
|
if (tokenPresent) {
|
|
863
842
|
await writeConfig({ schema: CONFIG_SCHEMA, ...(baseUrl ? { baseUrl } : {}) });
|
|
864
843
|
}
|
|
865
|
-
const adapterStatuses = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).listStatus().catch(() => []);
|
|
866
|
-
const adapterAuthRetained = adapterStatuses.length > 0;
|
|
867
844
|
return emitResult("workbench.cli.logout.v1", {
|
|
868
845
|
...(baseUrl ? { baseUrl } : {}),
|
|
869
846
|
tokenPresent,
|
|
870
847
|
revoke,
|
|
871
848
|
configRemoved,
|
|
872
|
-
|
|
849
|
+
adapterAuth: "unchanged",
|
|
873
850
|
}, parsed, io, () => [
|
|
874
851
|
`Logged out of Workbench${baseUrl ? ` (${baseUrl})` : ""}.`,
|
|
875
852
|
`Token: ${tokenPresent ? "present" : "absent"}; revoke ${revoke}; config ${configRemoved ? "removed" : "unchanged"}.`,
|
|
876
|
-
|
|
877
|
-
? "Local adapter auth records were retained; run workbench logout PROVIDER to remove them."
|
|
878
|
-
: "No local adapter auth records remain.",
|
|
853
|
+
"Local adapter auth unchanged; run workbench logout PROVIDER to remove provider credentials.",
|
|
879
854
|
].join("\n"));
|
|
880
855
|
}
|
|
881
856
|
async function handleInstall(parsed, io) {
|
|
882
|
-
const sourceInput =
|
|
857
|
+
const sourceInput = optionalPositional(parsed, 1);
|
|
858
|
+
if (!sourceInput) {
|
|
859
|
+
rejectExtraInput(parsed, {
|
|
860
|
+
maxPositionals: 1,
|
|
861
|
+
message: "workbench install with no HANDLE_OR_URL lists installed skills.",
|
|
862
|
+
remediation: "Run workbench install OWNER/SKILL to install a published skill.",
|
|
863
|
+
});
|
|
864
|
+
if (parsed.flags.yes === true || parsed.flags["dry-run"] === true) {
|
|
865
|
+
throw new WorkbenchCodedError("usage", "workbench install inventory does not accept --yes or --dry-run.", {
|
|
866
|
+
remediation: "Run workbench install --json, or run workbench install OWNER/SKILL --dry-run.",
|
|
867
|
+
exitCode: 2,
|
|
868
|
+
});
|
|
869
|
+
}
|
|
870
|
+
const inventory = await readInstalledSkillsInventory({
|
|
871
|
+
includeUpdates: true,
|
|
872
|
+
lookupLatestVersion: latestInstallVersion,
|
|
873
|
+
});
|
|
874
|
+
return emitResult("workbench.cli.installed.v1", installedInventoryToJson(inventory), parsed, io, () => formatInstalledInventory(inventory));
|
|
875
|
+
}
|
|
883
876
|
rejectExtraInput(parsed, {
|
|
884
877
|
maxPositionals: 2,
|
|
885
878
|
message: "workbench install accepts one HANDLE_OR_URL argument.",
|
|
886
|
-
remediation: "Run workbench install OWNER/SKILL
|
|
879
|
+
remediation: "Run workbench install OWNER/SKILL.",
|
|
887
880
|
});
|
|
888
881
|
const source = await resolveWorkbenchInstallSourceInput(sourceInput);
|
|
889
882
|
const workbenchSource = parseWorkbenchInstallSource(source);
|
|
890
883
|
if (!workbenchSource) {
|
|
891
884
|
throw new WorkbenchCodedError("usage", "workbench install requires a Workbench Cloud source URL.", {
|
|
892
|
-
remediation: "Run workbench install OWNER/SKILL
|
|
885
|
+
remediation: "Run workbench install OWNER/SKILL.",
|
|
893
886
|
exitCode: 2,
|
|
894
887
|
});
|
|
895
888
|
}
|
|
896
889
|
const snapshot = await fetchWorkbenchInstallSourceSnapshot(workbenchSource, source);
|
|
897
890
|
const sourceSummary = workbenchInstallSourceSummary(workbenchSource, snapshot);
|
|
898
|
-
const
|
|
899
|
-
const toTargets = stringsFlag(parsed, "to");
|
|
900
|
-
const selectedTargets = toTargets.length > 0 ? normalizeInstallTargetNames(toTargets) : await defaultInstallTargetNames(config);
|
|
901
|
-
const targets = resolveInstallTargets({
|
|
902
|
-
agents: selectedTargets.filter((target) => target !== "local"),
|
|
903
|
-
local: selectedTargets.some((target) => target === "local"),
|
|
904
|
-
skillName: snapshot.name,
|
|
905
|
-
});
|
|
906
|
-
const result = await installSnapshotToTargets({
|
|
891
|
+
const result = await installSnapshotToStore({
|
|
907
892
|
snapshot,
|
|
908
|
-
targets,
|
|
909
893
|
overwrite: parsed.flags.yes === true,
|
|
910
894
|
dryRun: parsed.flags["dry-run"] === true,
|
|
895
|
+
provenance: {
|
|
896
|
+
handle: `${workbenchSource.owner}/${workbenchSource.skill}`,
|
|
897
|
+
versionId: snapshot.versionId,
|
|
898
|
+
baseUrl: workbenchSource.baseUrl,
|
|
899
|
+
},
|
|
911
900
|
});
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
901
|
+
const fanout = parsed.flags["dry-run"] === true
|
|
902
|
+
? skippedFanOut(result.directoryName, result.destination)
|
|
903
|
+
: await fanOutSkill(result.directoryName, { skillDir: result.destination });
|
|
904
|
+
const next = installNextCommand(fanout);
|
|
915
905
|
return emitResult("workbench.cli.install.v1", {
|
|
916
906
|
source: sourceSummary,
|
|
917
907
|
result: result.result,
|
|
918
|
-
|
|
908
|
+
store: result.store,
|
|
909
|
+
skill: result.directoryName,
|
|
910
|
+
destination: result.destination,
|
|
911
|
+
previous: result.previous,
|
|
919
912
|
filesCopied: result.filesCopied,
|
|
913
|
+
contentHash: result.contentHash,
|
|
914
|
+
provenancePath: result.provenancePath,
|
|
915
|
+
fanout: fanOutToJson(fanout),
|
|
916
|
+
next: next,
|
|
920
917
|
...(parsed.flags["dry-run"] === true ? { dryRun: true } : {}),
|
|
921
918
|
}, parsed, io, () => [
|
|
922
919
|
parsed.flags["dry-run"] === true
|
|
923
|
-
? `Would install ${
|
|
924
|
-
: `Installed ${
|
|
925
|
-
|
|
920
|
+
? `Would install ${result.directoryName} to ${result.destination}: filesCopied=${result.filesCopied}`
|
|
921
|
+
: `Installed ${result.directoryName}: ${result.result}`,
|
|
922
|
+
` machine\t${result.previous}\t${result.destination}`,
|
|
923
|
+
formatFanOut(fanout),
|
|
924
|
+
...(next ? [`next: ${next}`] : []),
|
|
926
925
|
].join("\n"));
|
|
927
926
|
}
|
|
928
927
|
async function handleCloudEval(parsed, io) {
|
|
@@ -1003,47 +1002,71 @@ async function handleCloudImprove(parsed, io) {
|
|
|
1003
1002
|
...(next ? [`next: ${next}`] : []),
|
|
1004
1003
|
].filter(Boolean).join("\n"));
|
|
1005
1004
|
}
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1005
|
+
function skippedFanOut(name, destination) {
|
|
1006
|
+
return {
|
|
1007
|
+
status: "skipped",
|
|
1008
|
+
command: manualFanOutCommand(destination, name),
|
|
1009
|
+
linkedAgents: [],
|
|
1010
|
+
reason: "dry-run",
|
|
1011
|
+
};
|
|
1012
|
+
}
|
|
1013
|
+
function installNextCommand(fanout) {
|
|
1014
|
+
return fanout.status === "failed" || (fanout.status === "skipped" && fanout.reason !== "dry-run")
|
|
1015
|
+
? fanout.command
|
|
1016
|
+
: null;
|
|
1017
|
+
}
|
|
1018
|
+
function fanOutToJson(fanout) {
|
|
1019
|
+
return {
|
|
1020
|
+
status: fanout.status,
|
|
1021
|
+
command: fanout.command,
|
|
1022
|
+
linkedAgents: fanout.linkedAgents,
|
|
1023
|
+
...(fanout.additionalAgents ? { additionalAgents: fanout.additionalAgents } : {}),
|
|
1024
|
+
...(fanout.reason ? { reason: fanout.reason } : {}),
|
|
1025
|
+
...(fanout.exitCode !== undefined ? { exitCode: fanout.exitCode } : {}),
|
|
1026
|
+
};
|
|
1027
|
+
}
|
|
1028
|
+
function formatFanOut(fanout) {
|
|
1029
|
+
if (fanout.status === "skipped") {
|
|
1030
|
+
return fanout.reason === "dry-run"
|
|
1031
|
+
? "fanout: planned"
|
|
1032
|
+
: `fanout skipped: ${fanout.reason ?? "not available"}`;
|
|
1009
1033
|
}
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
if (target.agent === "local") {
|
|
1013
|
-
continue;
|
|
1014
|
-
}
|
|
1015
|
-
const home = path.dirname(path.dirname(target.destination));
|
|
1016
|
-
if (await pathExists(home)) {
|
|
1017
|
-
detected.push(target.agent);
|
|
1018
|
-
}
|
|
1034
|
+
if (fanout.status === "failed") {
|
|
1035
|
+
return `fanout failed: ${fanout.reason ?? "unknown failure"}`;
|
|
1019
1036
|
}
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
function normalizeInstallTargetNames(values) {
|
|
1023
|
-
const normalized = [];
|
|
1024
|
-
for (const value of values) {
|
|
1025
|
-
const target = value.trim().toLowerCase();
|
|
1026
|
-
if (target !== "codex" && target !== "claude" && target !== "local") {
|
|
1027
|
-
throw new WorkbenchCodedError("usage", `Unsupported install target: ${value}`, {
|
|
1028
|
-
remediation: "Use --to codex, --to claude, or --to local.",
|
|
1029
|
-
exitCode: 2,
|
|
1030
|
-
});
|
|
1031
|
-
}
|
|
1032
|
-
normalized.push(target);
|
|
1037
|
+
if (fanout.linkedAgents.length === 0) {
|
|
1038
|
+
return "fanout: completed";
|
|
1033
1039
|
}
|
|
1034
|
-
|
|
1040
|
+
const suffix = fanout.additionalAgents ? ` and ${fanout.additionalAgents} more` : "";
|
|
1041
|
+
return `fanned out to: ${fanout.linkedAgents.join(", ")}${suffix}`;
|
|
1035
1042
|
}
|
|
1036
|
-
async function
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
return
|
|
1040
|
-
}
|
|
1041
|
-
catch {
|
|
1042
|
-
return false;
|
|
1043
|
+
async function latestInstallVersion(record) {
|
|
1044
|
+
const handle = normalizedOwnerSkillHandle(record.handle);
|
|
1045
|
+
if (!handle) {
|
|
1046
|
+
return undefined;
|
|
1043
1047
|
}
|
|
1048
|
+
const snapshot = await withTimeout(fetchWorkbenchInstallSourceSnapshot({
|
|
1049
|
+
baseUrl: record.baseUrl,
|
|
1050
|
+
owner: handle.owner,
|
|
1051
|
+
skill: handle.skill,
|
|
1052
|
+
}, record.handle), 3000);
|
|
1053
|
+
return snapshot.versionId;
|
|
1054
|
+
}
|
|
1055
|
+
function withTimeout(promise, timeoutMs) {
|
|
1056
|
+
return new Promise((resolve, reject) => {
|
|
1057
|
+
const timer = setTimeout(() => reject(new Error(`Timed out after ${timeoutMs}ms.`)), timeoutMs);
|
|
1058
|
+
promise.then((value) => {
|
|
1059
|
+
clearTimeout(timer);
|
|
1060
|
+
resolve(value);
|
|
1061
|
+
}, (error) => {
|
|
1062
|
+
clearTimeout(timer);
|
|
1063
|
+
reject(error);
|
|
1064
|
+
});
|
|
1065
|
+
});
|
|
1044
1066
|
}
|
|
1045
1067
|
async function startCloudExecution(command, parsed, io) {
|
|
1046
1068
|
const root = dirFlag(parsed) ?? process.cwd();
|
|
1069
|
+
const showProgress = parsed.flags.json !== true;
|
|
1047
1070
|
const remote = await ensureCloudRemoteForExecution(root, parsed);
|
|
1048
1071
|
const source = parseWorkbenchInstallSource(remote.url);
|
|
1049
1072
|
if (!source) {
|
|
@@ -1061,12 +1084,23 @@ async function startCloudExecution(command, parsed, io) {
|
|
|
1061
1084
|
});
|
|
1062
1085
|
}
|
|
1063
1086
|
const core = { dir: root, authToken: token };
|
|
1064
|
-
|
|
1087
|
+
const request = command === "eval"
|
|
1088
|
+
? await prepareWorkbenchCloudEvalRequest({
|
|
1089
|
+
...core,
|
|
1090
|
+
skill: stringFlag(parsed, "skills"),
|
|
1091
|
+
agent: stringFlag(parsed, "agents"),
|
|
1092
|
+
samples: intFlag(parsed, "samples"),
|
|
1093
|
+
})
|
|
1094
|
+
: await prepareWorkbenchCloudImproveRequest({
|
|
1095
|
+
...core,
|
|
1096
|
+
skill: stringFlag(parsed, "skills"),
|
|
1097
|
+
agent: stringFlag(parsed, "agents"),
|
|
1098
|
+
samples: intFlag(parsed, "samples"),
|
|
1099
|
+
budget: intFlag(parsed, "budget"),
|
|
1100
|
+
});
|
|
1065
1101
|
const syncBefore = await syncWorkbenchRemote({ ...core, remote: remote.name });
|
|
1066
|
-
writeCloudProgress(io, `workbench cloud: synced ${remote.name} before hosted ${command} (pushed=${syncBefore.pushed}, pulled=${syncBefore.pulled}, up-to-date=${syncBefore.upToDate}).`);
|
|
1067
|
-
const startSnapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
1068
1102
|
const skillId = await resolveCloudSkillId(source);
|
|
1069
|
-
const response = await apiRequest(`/api/workbench/skills/${encodeURIComponent(skillId)}${command === "improve" ? "/improve" : "/runs"}`, { method: "POST", body: cloudExecutionRequestBody(command,
|
|
1103
|
+
const response = await apiRequest(`/api/workbench/skills/${encodeURIComponent(skillId)}${command === "improve" ? "/improve" : "/runs"}`, { method: "POST", body: cloudExecutionRequestBody(command, request) }, source.baseUrl);
|
|
1070
1104
|
const runs = response.runs ?? [];
|
|
1071
1105
|
if (runs.length === 0) {
|
|
1072
1106
|
throw new WorkbenchCodedError("cloud_run_missing", `Workbench Cloud did not return a run for ${command}.`, {
|
|
@@ -1077,14 +1111,20 @@ async function startCloudExecution(command, parsed, io) {
|
|
|
1077
1111
|
});
|
|
1078
1112
|
}
|
|
1079
1113
|
const initialRunIds = runs.map((run) => run.id);
|
|
1080
|
-
writeCloudProgress(io, `workbench cloud: scheduled hosted ${command} on ${remote.url} (${formatCloudRunStatuses(runs)})
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1114
|
+
writeCloudProgress(io, `workbench cloud: scheduled hosted ${command} on ${remote.url} (${formatCloudRunStatuses(runs)}).`, showProgress);
|
|
1115
|
+
let initialSyncAfter = syncBefore;
|
|
1116
|
+
try {
|
|
1117
|
+
initialSyncAfter = await syncWorkbenchRemote({ ...core, remote: remote.name });
|
|
1118
|
+
}
|
|
1119
|
+
catch (error) {
|
|
1120
|
+
writeCloudProgress(io, `workbench cloud: sync while waiting failed; retrying (${oneLineExcerpt(errorMessage(error)) ?? "unknown error"}).`, showProgress);
|
|
1121
|
+
}
|
|
1122
|
+
writeCloudProgress(io, `workbench cloud: waiting for terminal status; press Ctrl-C to detach and resume with workbench show ${displayRef(initialRunIds[0] ?? "run")}.`, showProgress);
|
|
1084
1123
|
const completed = await waitForCloudRuns({
|
|
1085
1124
|
command,
|
|
1086
1125
|
core,
|
|
1087
1126
|
io,
|
|
1127
|
+
progress: showProgress,
|
|
1088
1128
|
remote,
|
|
1089
1129
|
runs,
|
|
1090
1130
|
initialSync: initialSyncAfter,
|
|
@@ -1096,7 +1136,7 @@ async function startCloudExecution(command, parsed, io) {
|
|
|
1096
1136
|
initialRunIds,
|
|
1097
1137
|
runs: completed.runs,
|
|
1098
1138
|
...(completed.detached ? { detached: true } : {}),
|
|
1099
|
-
startVersionId:
|
|
1139
|
+
startVersionId: request.versionId,
|
|
1100
1140
|
source,
|
|
1101
1141
|
sync: {
|
|
1102
1142
|
before: { pushed: syncBefore.pushed, pulled: syncBefore.pulled, upToDate: syncBefore.upToDate },
|
|
@@ -1121,9 +1161,12 @@ async function waitForCloudRuns(input) {
|
|
|
1121
1161
|
const deadline = Date.now() + timeoutMs;
|
|
1122
1162
|
let runs = [...input.runs];
|
|
1123
1163
|
let interrupted = false;
|
|
1164
|
+
const startedAtMs = Date.now();
|
|
1165
|
+
let lastProgressAtMs = startedAtMs;
|
|
1166
|
+
let lastSyncErrorMessage;
|
|
1124
1167
|
const onSigint = () => {
|
|
1125
1168
|
interrupted = true;
|
|
1126
|
-
writeCloudProgress(input.io, `workbench cloud: detaching from hosted ${input.command} (${runIds.map(displayRef).join(", ")})
|
|
1169
|
+
writeCloudProgress(input.io, `workbench cloud: detaching from hosted ${input.command} (${runIds.map(displayRef).join(", ")}).`, input.progress);
|
|
1127
1170
|
};
|
|
1128
1171
|
process.once("SIGINT", onSigint);
|
|
1129
1172
|
const seenStatuses = new Map();
|
|
@@ -1137,17 +1180,26 @@ async function waitForCloudRuns(input) {
|
|
|
1137
1180
|
runs = runIds.map((id) => snapshotRuns.find((entry) => entry.id === id) ?? runs.find((entry) => entry.id === id))
|
|
1138
1181
|
.filter((run) => Boolean(run));
|
|
1139
1182
|
}
|
|
1183
|
+
let wroteProgress = false;
|
|
1184
|
+
const nowMs = Date.now();
|
|
1140
1185
|
for (const run of runs) {
|
|
1141
1186
|
const previous = seenStatuses.get(run.id);
|
|
1142
1187
|
if (previous !== run.status) {
|
|
1143
1188
|
seenStatuses.set(run.id, run.status);
|
|
1144
|
-
writeCloudProgress(input.io, `workbench cloud: ${
|
|
1189
|
+
writeCloudProgress(input.io, `workbench cloud: ${formatCloudRunState(run, startedAtMs, nowMs)}.`, input.progress);
|
|
1190
|
+
wroteProgress = input.progress || wroteProgress;
|
|
1145
1191
|
}
|
|
1146
1192
|
}
|
|
1147
1193
|
if (runs.length === runIds.length && runs.every(isTerminalRun)) {
|
|
1148
|
-
writeCloudProgress(input.io, `workbench cloud: hosted ${input.command} finished (${formatCloudRunStatuses(runs)}).`);
|
|
1149
1194
|
return { runs, sync };
|
|
1150
1195
|
}
|
|
1196
|
+
if (wroteProgress) {
|
|
1197
|
+
lastProgressAtMs = nowMs;
|
|
1198
|
+
}
|
|
1199
|
+
else if (input.progress && nowMs - lastProgressAtMs >= 60_000) {
|
|
1200
|
+
writeCloudProgress(input.io, `workbench cloud: still waiting (${formatCloudRunStates(runs, startedAtMs, nowMs)}).`);
|
|
1201
|
+
lastProgressAtMs = nowMs;
|
|
1202
|
+
}
|
|
1151
1203
|
if (interrupted) {
|
|
1152
1204
|
return { runs, sync, detached: true };
|
|
1153
1205
|
}
|
|
@@ -1166,8 +1218,20 @@ async function waitForCloudRuns(input) {
|
|
|
1166
1218
|
if (interrupted) {
|
|
1167
1219
|
return { runs, sync, detached: true };
|
|
1168
1220
|
}
|
|
1169
|
-
|
|
1170
|
-
|
|
1221
|
+
try {
|
|
1222
|
+
sync = await syncWorkbenchRemote({ ...input.core, remote: input.remote.name });
|
|
1223
|
+
if (lastSyncErrorMessage) {
|
|
1224
|
+
writeCloudProgress(input.io, "workbench cloud: sync while waiting recovered.", input.progress);
|
|
1225
|
+
lastSyncErrorMessage = undefined;
|
|
1226
|
+
}
|
|
1227
|
+
}
|
|
1228
|
+
catch (error) {
|
|
1229
|
+
const message = oneLineExcerpt(errorMessage(error)) ?? "unknown error";
|
|
1230
|
+
if (message !== lastSyncErrorMessage) {
|
|
1231
|
+
writeCloudProgress(input.io, `workbench cloud: sync while waiting failed; retrying (${message}).`, input.progress);
|
|
1232
|
+
lastSyncErrorMessage = message;
|
|
1233
|
+
}
|
|
1234
|
+
}
|
|
1171
1235
|
}
|
|
1172
1236
|
}
|
|
1173
1237
|
finally {
|
|
@@ -1286,13 +1350,19 @@ async function resolveCloudSkillId(source) {
|
|
|
1286
1350
|
}
|
|
1287
1351
|
return skill.id;
|
|
1288
1352
|
}
|
|
1289
|
-
function cloudExecutionRequestBody(command,
|
|
1353
|
+
function cloudExecutionRequestBody(command, request) {
|
|
1290
1354
|
return {
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1355
|
+
versionId: request.versionId,
|
|
1356
|
+
evalHash: request.evalHash,
|
|
1357
|
+
skill: request.skill,
|
|
1358
|
+
skillBundleHash: request.skillBundleHash,
|
|
1359
|
+
agent: request.agent,
|
|
1360
|
+
agentHash: request.agentHash,
|
|
1361
|
+
samples: request.samples,
|
|
1362
|
+
...(command === "improve" ? {
|
|
1363
|
+
budget: request.budget,
|
|
1364
|
+
evidenceTraceIds: request.evidenceTraceIds,
|
|
1365
|
+
} : {}),
|
|
1296
1366
|
};
|
|
1297
1367
|
}
|
|
1298
1368
|
function cloudImproveNextCommand(runs) {
|
|
@@ -1322,7 +1392,10 @@ function cloudExecutionSummary(started) {
|
|
|
1322
1392
|
sync: started.sync,
|
|
1323
1393
|
};
|
|
1324
1394
|
}
|
|
1325
|
-
function writeCloudProgress(io, message) {
|
|
1395
|
+
function writeCloudProgress(io, message, enabled = true) {
|
|
1396
|
+
if (!enabled) {
|
|
1397
|
+
return;
|
|
1398
|
+
}
|
|
1326
1399
|
io.stderr.write(`${message}\n`);
|
|
1327
1400
|
}
|
|
1328
1401
|
function formatCloudRunStatuses(runs) {
|
|
@@ -1330,6 +1403,17 @@ function formatCloudRunStatuses(runs) {
|
|
|
1330
1403
|
? runs.map((run) => `${displayRef(run.id)}:${run.status}`).join(", ")
|
|
1331
1404
|
: "no runs";
|
|
1332
1405
|
}
|
|
1406
|
+
function formatCloudRunStates(runs, startedAtMs, nowMs) {
|
|
1407
|
+
return runs.length > 0
|
|
1408
|
+
? runs.map((run) => formatCloudRunState(run, startedAtMs, nowMs)).join(", ")
|
|
1409
|
+
: `no runs (${elapsedSeconds(startedAtMs, nowMs)}s)`;
|
|
1410
|
+
}
|
|
1411
|
+
function formatCloudRunState(run, startedAtMs, nowMs) {
|
|
1412
|
+
return `${displayRef(run.id)} ${run.status} (${elapsedSeconds(startedAtMs, nowMs)}s)`;
|
|
1413
|
+
}
|
|
1414
|
+
function elapsedSeconds(startedAtMs, nowMs) {
|
|
1415
|
+
return Math.max(0, Math.floor((nowMs - startedAtMs) / 1000));
|
|
1416
|
+
}
|
|
1333
1417
|
function workbenchInstallSourceSummary(source, snapshot) {
|
|
1334
1418
|
const installUrl = `${source.baseUrl}/skills/${encodeURIComponent(source.owner)}/${encodeURIComponent(source.skill)}`;
|
|
1335
1419
|
return {
|
|
@@ -1490,7 +1574,6 @@ async function loadConfig() {
|
|
|
1490
1574
|
...(typeof parsed.baseUrl === "string" ? { baseUrl: normalizeBaseUrl(parsed.baseUrl) } : {}),
|
|
1491
1575
|
...(typeof parsed.accessToken === "string" ? { accessToken: parsed.accessToken } : {}),
|
|
1492
1576
|
...(typeof parsed.username === "string" ? { username: parsed.username } : {}),
|
|
1493
|
-
...(Array.isArray(parsed.installTargets) ? { installTargets: normalizeInstallTargetNames(parsed.installTargets.flatMap((entry) => typeof entry === "string" ? [entry] : [])) } : {}),
|
|
1494
1577
|
};
|
|
1495
1578
|
}
|
|
1496
1579
|
// Single resolver for the Workbench Cloud token used by every authenticated
|
|
@@ -2245,7 +2328,7 @@ function flagSpecForParsedPrefix(positionals, flags) {
|
|
|
2245
2328
|
return allowedFlagsForCommand({ positionals: [...positionals], flags: {} }, command);
|
|
2246
2329
|
}
|
|
2247
2330
|
function addFlag(flags, name, value) {
|
|
2248
|
-
if (name === "with"
|
|
2331
|
+
if (name === "with") {
|
|
2249
2332
|
const existing = flags[name];
|
|
2250
2333
|
flags[name] = Array.isArray(existing)
|
|
2251
2334
|
? [...existing, String(value)]
|
|
@@ -2269,14 +2352,6 @@ function stringFlag(parsed, name) {
|
|
|
2269
2352
|
const value = parsed.flags[name];
|
|
2270
2353
|
return typeof value === "string" ? value : undefined;
|
|
2271
2354
|
}
|
|
2272
|
-
function stringsFlag(parsed, name) {
|
|
2273
|
-
const value = parsed.flags[name];
|
|
2274
|
-
return Array.isArray(value)
|
|
2275
|
-
? value
|
|
2276
|
-
: typeof value === "string"
|
|
2277
|
-
? [value]
|
|
2278
|
-
: [];
|
|
2279
|
-
}
|
|
2280
2355
|
function intFlag(parsed, name) {
|
|
2281
2356
|
const value = stringFlag(parsed, name);
|
|
2282
2357
|
if (!value) {
|
|
@@ -2377,12 +2452,17 @@ async function previewPublishWithDerivedRemote(parsed) {
|
|
|
2377
2452
|
return {
|
|
2378
2453
|
remote,
|
|
2379
2454
|
version,
|
|
2380
|
-
visibility: parsePublishVisibilityFlags(parsed) ??
|
|
2455
|
+
visibility: parsePublishVisibilityFlags(parsed) ??
|
|
2456
|
+
normalizePublishVisibility(reconciledSnapshot.refs["publication/visibility"]) ??
|
|
2457
|
+
"private",
|
|
2381
2458
|
installHandle: installHandleFromCloudRemote(remote),
|
|
2382
2459
|
installUrl: remote.url,
|
|
2383
2460
|
pinnedInstallUrl: `${remote.url}/releases/${encodeURIComponent(version.id)}`,
|
|
2384
2461
|
};
|
|
2385
2462
|
}
|
|
2463
|
+
function normalizePublishVisibility(value) {
|
|
2464
|
+
return value === "private" || value === "internal" || value === "public" ? value : undefined;
|
|
2465
|
+
}
|
|
2386
2466
|
async function ensurePublishRemote(parsed) {
|
|
2387
2467
|
const core = await coreOptions(parsed);
|
|
2388
2468
|
const root = path.resolve(dirFlag(parsed) ?? process.cwd());
|
|
@@ -2455,7 +2535,7 @@ async function resolveWorkbenchInstallSourceInput(input) {
|
|
|
2455
2535
|
const handle = normalizedOwnerSkillHandle(input);
|
|
2456
2536
|
if (!handle) {
|
|
2457
2537
|
throw new WorkbenchCodedError("usage", "workbench install expects OWNER/SKILL or a Workbench Cloud skill URL.", {
|
|
2458
|
-
remediation: "Run workbench install OWNER/SKILL
|
|
2538
|
+
remediation: "Run workbench install OWNER/SKILL.",
|
|
2459
2539
|
exitCode: 2,
|
|
2460
2540
|
});
|
|
2461
2541
|
}
|
|
@@ -2606,6 +2686,20 @@ async function workbenchCliAuthStatus() {
|
|
|
2606
2686
|
})),
|
|
2607
2687
|
};
|
|
2608
2688
|
}
|
|
2689
|
+
async function workbenchMachineStatus(auth) {
|
|
2690
|
+
const inventory = await readInstalledSkillsInventory();
|
|
2691
|
+
return {
|
|
2692
|
+
installedSkillCount: inventory.skills.length,
|
|
2693
|
+
stores: inventory.stores,
|
|
2694
|
+
connectedProviders: auth.adapters
|
|
2695
|
+
.filter((entry) => entry.status === "connected")
|
|
2696
|
+
.map((entry) => ({
|
|
2697
|
+
adapter: entry.adapter,
|
|
2698
|
+
...(entry.slot ? { slot: entry.slot } : {}),
|
|
2699
|
+
profile: entry.profile,
|
|
2700
|
+
})),
|
|
2701
|
+
};
|
|
2702
|
+
}
|
|
2609
2703
|
function scoredRunValue(run) {
|
|
2610
2704
|
return run.status === "succeeded" && typeof run.score === "number" ? run.score : undefined;
|
|
2611
2705
|
}
|
|
@@ -2623,9 +2717,12 @@ function installHandleFromStatusRemote(remote) {
|
|
|
2623
2717
|
const source = parseWorkbenchInstallSource(publicationUrl ?? remote.url);
|
|
2624
2718
|
return source ? `${source.owner}/${source.skill}` : publicationUrl ?? remote.url;
|
|
2625
2719
|
}
|
|
2626
|
-
async function statusWithCausalNext(status, auth, core) {
|
|
2720
|
+
async function statusWithCausalNext(status, auth, core, machine) {
|
|
2627
2721
|
if (!status.project.initialized) {
|
|
2628
|
-
return
|
|
2722
|
+
return {
|
|
2723
|
+
...status,
|
|
2724
|
+
next: machine.installedSkillCount > 0 ? "workbench install" : status.next,
|
|
2725
|
+
};
|
|
2629
2726
|
}
|
|
2630
2727
|
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core).catch(() => null);
|
|
2631
2728
|
const lastRun = snapshot?.runs
|
|
@@ -2762,28 +2859,61 @@ function evidenceFilesForRunOrJob(snapshot, ref) {
|
|
|
2762
2859
|
}
|
|
2763
2860
|
const traceById = new Map(snapshot.traces.map((trace) => [trace.id, trace]));
|
|
2764
2861
|
const artifactById = new Map(snapshot.artifacts.map((artifact) => [artifact.id, artifact]));
|
|
2765
|
-
const
|
|
2862
|
+
const candidates = selection.jobs.flatMap((job) => [
|
|
2766
2863
|
...job.artifactIds.flatMap((artifactId) => {
|
|
2767
2864
|
const artifact = artifactById.get(artifactId);
|
|
2768
2865
|
return artifact
|
|
2769
|
-
? artifact.files.filter(isUserFacingEvidenceFile).map((file) =>
|
|
2866
|
+
? artifact.files.filter(isUserFacingEvidenceFile).map((file) => ({
|
|
2867
|
+
file: evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/${file.path}`),
|
|
2868
|
+
jobId: job.id,
|
|
2869
|
+
source: "artifact",
|
|
2870
|
+
}))
|
|
2770
2871
|
: [];
|
|
2771
2872
|
}),
|
|
2772
2873
|
...job.traceIds.flatMap((traceId) => {
|
|
2773
2874
|
const trace = traceById.get(traceId);
|
|
2774
2875
|
return trace
|
|
2775
|
-
? trace.files.filter(
|
|
2876
|
+
? trace.files.filter(isUserFacingTraceEvidenceFile).map((file) => ({
|
|
2877
|
+
file: evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/traces/${evidencePathSegment(trace.id)}/${file.path}`),
|
|
2878
|
+
jobId: job.id,
|
|
2879
|
+
source: "trace",
|
|
2880
|
+
}))
|
|
2776
2881
|
: [];
|
|
2777
2882
|
}),
|
|
2778
2883
|
]);
|
|
2884
|
+
return canonicalEvidenceFiles(candidates);
|
|
2885
|
+
}
|
|
2886
|
+
function canonicalEvidenceFiles(candidates) {
|
|
2779
2887
|
const seen = new Set();
|
|
2780
|
-
|
|
2888
|
+
const sameJobArtifactFiles = new Set();
|
|
2889
|
+
const files = [];
|
|
2890
|
+
for (const candidate of candidates) {
|
|
2891
|
+
const file = candidate.file;
|
|
2781
2892
|
if (seen.has(file.path)) {
|
|
2782
|
-
|
|
2893
|
+
continue;
|
|
2783
2894
|
}
|
|
2784
2895
|
seen.add(file.path);
|
|
2785
|
-
|
|
2786
|
-
|
|
2896
|
+
const equivalentKey = sameJobEquivalentEvidenceKey(candidate);
|
|
2897
|
+
if (candidate.source === "trace" && sameJobArtifactFiles.has(equivalentKey)) {
|
|
2898
|
+
continue;
|
|
2899
|
+
}
|
|
2900
|
+
if (candidate.source === "artifact") {
|
|
2901
|
+
sameJobArtifactFiles.add(equivalentKey);
|
|
2902
|
+
}
|
|
2903
|
+
files.push(file);
|
|
2904
|
+
}
|
|
2905
|
+
return files;
|
|
2906
|
+
}
|
|
2907
|
+
function sameJobEquivalentEvidenceKey(candidate) {
|
|
2908
|
+
const file = candidate.file;
|
|
2909
|
+
return [
|
|
2910
|
+
candidate.jobId,
|
|
2911
|
+
path.basename(file.path),
|
|
2912
|
+
file.kind ?? "text",
|
|
2913
|
+
file.encoding ?? "utf8",
|
|
2914
|
+
file.executable === true ? "1" : "0",
|
|
2915
|
+
file.content,
|
|
2916
|
+
].join("\0");
|
|
2787
2917
|
}
|
|
2788
2918
|
function evidenceFileWithPath(file, filePath) {
|
|
2789
2919
|
return {
|
|
@@ -2795,6 +2925,13 @@ function isUserFacingEvidenceFile(file) {
|
|
|
2795
2925
|
const normalized = file.path.replace(/\\/gu, "/").replace(/^\/+/u, "");
|
|
2796
2926
|
return normalized.split("/").every((segment) => segment !== ".workbench");
|
|
2797
2927
|
}
|
|
2928
|
+
function isUserFacingTraceEvidenceFile(file) {
|
|
2929
|
+
if (!isUserFacingEvidenceFile(file)) {
|
|
2930
|
+
return false;
|
|
2931
|
+
}
|
|
2932
|
+
const basename = path.basename(file.path.replace(/\\/gu, "/"));
|
|
2933
|
+
return basename !== "request.json" && basename !== "result.json" && basename !== "trace.json";
|
|
2934
|
+
}
|
|
2798
2935
|
function evidencePathSegment(value) {
|
|
2799
2936
|
return value.replace(/[^A-Za-z0-9._-]+/gu, "-") || "_";
|
|
2800
2937
|
}
|
|
@@ -2943,17 +3080,6 @@ function findShowFile(files, requestedPath, objectRef) {
|
|
|
2943
3080
|
const candidates = normalized === "stderr.log"
|
|
2944
3081
|
? suffixCandidates.filter((file) => file.content.length > 0)
|
|
2945
3082
|
: suffixCandidates;
|
|
2946
|
-
const canonicalCandidates = candidates.filter(isCanonicalEvidenceFileCandidate);
|
|
2947
|
-
if (canonicalCandidates.length === 1) {
|
|
2948
|
-
return canonicalCandidates[0];
|
|
2949
|
-
}
|
|
2950
|
-
const equivalentCanonicalCandidate = singleEquivalentShowFile(canonicalCandidates);
|
|
2951
|
-
if (equivalentCanonicalCandidate) {
|
|
2952
|
-
return equivalentCanonicalCandidate;
|
|
2953
|
-
}
|
|
2954
|
-
if (canonicalCandidates.length > 1) {
|
|
2955
|
-
throw ambiguousShowPath(objectRef, requestedPath, canonicalCandidates);
|
|
2956
|
-
}
|
|
2957
3083
|
if (candidates.length === 1) {
|
|
2958
3084
|
return candidates[0];
|
|
2959
3085
|
}
|
|
@@ -2970,9 +3096,6 @@ function findShowFile(files, requestedPath, objectRef) {
|
|
|
2970
3096
|
}
|
|
2971
3097
|
throw ambiguousShowPath(objectRef, requestedPath, candidates.length > 0 ? candidates : suffixCandidates);
|
|
2972
3098
|
}
|
|
2973
|
-
function isCanonicalEvidenceFileCandidate(file) {
|
|
2974
|
-
return !file.path.includes("/traces/") && !file.path.includes("/artifacts/");
|
|
2975
|
-
}
|
|
2976
3099
|
function singleEquivalentShowFile(files) {
|
|
2977
3100
|
if (files.length <= 1) {
|
|
2978
3101
|
return null;
|
|
@@ -3001,23 +3124,6 @@ function fileListing(kind, id, files) {
|
|
|
3001
3124
|
function formatFileListing(kind, id, files) {
|
|
3002
3125
|
return [`${kind}\t${displayRef(id)}\tfiles=${files.length}`, ...files.map((file) => file.path)].join("\n");
|
|
3003
3126
|
}
|
|
3004
|
-
async function traceIdForCaseSource(core, ref) {
|
|
3005
|
-
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
3006
|
-
const trace = snapshotObjectByRef(snapshot.traces, ref, "trace");
|
|
3007
|
-
if (trace) {
|
|
3008
|
-
return trace.id;
|
|
3009
|
-
}
|
|
3010
|
-
const selection = runOrJobEvidenceSelection(snapshot, ref);
|
|
3011
|
-
const traceId = selection.run?.traceIds[0] ?? selection.jobs[0]?.traceIds[0];
|
|
3012
|
-
if (traceId) {
|
|
3013
|
-
return traceId;
|
|
3014
|
-
}
|
|
3015
|
-
throw new WorkbenchCodedError("ref_not_found", `Run, job, or trace not found: ${ref}`, {
|
|
3016
|
-
remediation: "Run workbench log, then workbench case add RUN_ID.",
|
|
3017
|
-
subject: { ref },
|
|
3018
|
-
exitCode: 1,
|
|
3019
|
-
});
|
|
3020
|
-
}
|
|
3021
3127
|
async function evalCoverageSummaries(core, runs) {
|
|
3022
3128
|
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
3023
3129
|
const jobsByRun = new Map();
|
|
@@ -3096,6 +3202,12 @@ function formatStatusSnapshot(status) {
|
|
|
3096
3202
|
const lines = [
|
|
3097
3203
|
`Root: ${status.project.root}`,
|
|
3098
3204
|
`Initialized: ${status.project.initialized ? "yes" : "no"}`,
|
|
3205
|
+
...(status.project.initialized ? [] : [
|
|
3206
|
+
`Installed skills: ${status.machine?.installedSkillCount ?? 0}`,
|
|
3207
|
+
`Connected providers: ${status.machine?.connectedProviders.length
|
|
3208
|
+
? status.machine.connectedProviders.map((entry) => `${entry.adapter}/${entry.profile}`).join(", ")
|
|
3209
|
+
: "none"}`,
|
|
3210
|
+
]),
|
|
3099
3211
|
...(status.project.currentVersionId ? [`Current version: ${displayRef(status.project.currentVersionId)}`] : []),
|
|
3100
3212
|
...(status.project.defaultSkill ? [`Default skill: ${status.project.defaultSkill}`] : []),
|
|
3101
3213
|
...(status.project.defaultAgent ? [`Default agent: ${status.project.defaultAgent}`] : []),
|
|
@@ -3125,6 +3237,36 @@ function formatStatusSnapshot(status) {
|
|
|
3125
3237
|
];
|
|
3126
3238
|
return lines.join("\n");
|
|
3127
3239
|
}
|
|
3240
|
+
function formatInstalledInventory(inventory) {
|
|
3241
|
+
if (inventory.skills.length === 0) {
|
|
3242
|
+
return [
|
|
3243
|
+
"No skills installed.",
|
|
3244
|
+
...(inventory.next ? [`next: ${inventory.next}`] : []),
|
|
3245
|
+
].join("\n");
|
|
3246
|
+
}
|
|
3247
|
+
const lines = [
|
|
3248
|
+
"store\tname\tversion\tstatus\tsource",
|
|
3249
|
+
...inventory.skills.map(formatInstalledSkill),
|
|
3250
|
+
...(inventory.next ? [`next: ${inventory.next}`] : []),
|
|
3251
|
+
];
|
|
3252
|
+
return lines.join("\n");
|
|
3253
|
+
}
|
|
3254
|
+
function formatInstalledSkill(skill) {
|
|
3255
|
+
return [
|
|
3256
|
+
skill.store,
|
|
3257
|
+
skill.name,
|
|
3258
|
+
skill.versionId ? shortInstalledVersion(skill.versionId) : "-",
|
|
3259
|
+
skill.status,
|
|
3260
|
+
skill.handle ?? "(no provenance)",
|
|
3261
|
+
].join("\t");
|
|
3262
|
+
}
|
|
3263
|
+
function shortInstalledVersion(versionId) {
|
|
3264
|
+
return versionId.startsWith("v_") && versionId.length > 10
|
|
3265
|
+
? displayRef(versionId)
|
|
3266
|
+
: versionId.length > 12
|
|
3267
|
+
? versionId.slice(0, 12)
|
|
3268
|
+
: versionId;
|
|
3269
|
+
}
|
|
3128
3270
|
function formatVersion(version) {
|
|
3129
3271
|
return `${displayRef(version.id)}\t${version.hash.slice(0, 12)}\t${version.message}`;
|
|
3130
3272
|
}
|