@workbench-ai/workbench 0.0.73 → 0.0.75

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -4,10 +4,11 @@ import { createRequire } from "node:module";
4
4
  import os from "node:os";
5
5
  import path from "node:path";
6
6
  import { gzipSync } from "node:zlib";
7
- import { addWorkbenchCase, addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchInspectionSnapshot, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, publishWorkbenchVersion, removeWorkbenchAgent, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchSkillImproveCanUseQueuedAdapter, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
7
+ import { addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchInspectionSnapshot, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, prepareWorkbenchCloudEvalRequest, prepareWorkbenchCloudImproveRequest, publishWorkbenchVersion, removeWorkbenchAgent, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
8
8
  import { normalizeWorkbenchSkillName } from "@workbench-ai/workbench-contract";
9
9
  import { emitError, emitResult } from "./output.js";
10
- import { installSnapshotToTargets, normalizeInstallSnapshotPath, resolveInstallTargets, supportedInstallTargets, } from "./install-targets.js";
10
+ import { fanOutSkill, manualFanOutCommand } from "./fanout.js";
11
+ import { installedInventoryToJson, installSnapshotToStore, normalizeInstallSnapshotPath, readInstalledSkillsInventory, } from "./install-targets.js";
11
12
  import { startWorkbenchOpenServer } from "./open-server.js";
12
13
  const require = createRequire(import.meta.url);
13
14
  const HELP = [
@@ -19,11 +20,11 @@ const HELP = [
19
20
  "",
20
21
  "Taught commands:",
21
22
  " workbench new [DIR] [--json]",
22
- " workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
23
- " workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
23
+ " workbench eval [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
24
+ " workbench improve [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
24
25
  " workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
25
26
  " workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
26
- " workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--dry-run] [--json]",
27
+ " workbench install [HANDLE_OR_URL] [--yes] [--dry-run] [--json]",
27
28
  "",
28
29
  "More:",
29
30
  " workbench help --all",
@@ -32,11 +33,11 @@ const HELP_ALL = [
32
33
  "Usage:",
33
34
  " workbench # = workbench status",
34
35
  " workbench new [DIR] [--json]",
35
- " workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
36
+ " workbench eval [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
36
37
  " workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
37
- " workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
38
+ " workbench improve [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
38
39
  " workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
39
- " workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--dry-run] [--json]",
40
+ " workbench install [HANDLE_OR_URL] [--yes] [--dry-run] [--json]",
40
41
  "",
41
42
  "Inspect:",
42
43
  " workbench status [--dir DIR] [--json]",
@@ -47,7 +48,6 @@ const HELP_ALL = [
47
48
  " workbench open [--host HOST] [--port PORT] [--no-open]",
48
49
  "",
49
50
  "Configure:",
50
- " workbench case add RUN_ID [--json]",
51
51
  " workbench agent add NAME --adapter X [--model M] [--with k=v]... | list | rm NAME [--json]",
52
52
  "",
53
53
  "Share and auth:",
@@ -71,7 +71,7 @@ const COMMAND_HELP = {
71
71
  ].join("\n"),
72
72
  eval: [
73
73
  "Usage:",
74
- " workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
74
+ " workbench eval [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
75
75
  "",
76
76
  "Runs eval jobs for the selected version, measured skills, and agents. Omitted selectors use manifest defaults.",
77
77
  "",
@@ -80,7 +80,7 @@ const COMMAND_HELP = {
80
80
  ].join("\n"),
81
81
  improve: [
82
82
  "Usage:",
83
- " workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
83
+ " workbench improve [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
84
84
  "",
85
85
  "Creates one improved child version from evidence. The selected skills and agents must resolve to exactly one entry each.",
86
86
  "",
@@ -98,12 +98,12 @@ const COMMAND_HELP = {
98
98
  ].join("\n"),
99
99
  install: [
100
100
  "Usage:",
101
- " workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--dry-run] [--json]",
101
+ " workbench install [HANDLE_OR_URL] [--yes] [--dry-run] [--json]",
102
102
  "",
103
- "Installs published Workbench Cloud source into local agent targets.",
103
+ "Installs published source into the canonical machine skill store, or lists installed skills when no source is given.",
104
104
  "",
105
105
  "Example:",
106
- " workbench install acme/earnings-prep --to codex --yes",
106
+ " workbench install acme/earnings-prep",
107
107
  ].join("\n"),
108
108
  status: [
109
109
  "Usage:",
@@ -169,15 +169,6 @@ const COMMAND_HELP = {
169
169
  "Example:",
170
170
  " workbench open --no-open",
171
171
  ].join("\n"),
172
- case: [
173
- "Usage:",
174
- " workbench case add RUN_ID [--json]",
175
- "",
176
- "Captures a regression case from a recorded run.",
177
- "",
178
- "Example:",
179
- " workbench case add run_abc12345",
180
- ].join("\n"),
181
172
  agent: [
182
173
  "Usage:",
183
174
  " workbench agent list [--json]",
@@ -256,7 +247,7 @@ const COMMAND_FLAGS = {
256
247
  samples: "positive-integer",
257
248
  skills: "string",
258
249
  },
259
- install: { ...COMMON_FLAGS, ...HELP_FLAG, "dry-run": "boolean", to: "repeat-string", yes: "boolean" },
250
+ install: { ...COMMON_FLAGS, ...HELP_FLAG, "dry-run": "boolean", yes: "boolean" },
260
251
  log: { ...PROJECT_FLAGS, ...HELP_FLAG, runs: "boolean", versions: "boolean" },
261
252
  login: {
262
253
  ...COMMON_FLAGS,
@@ -290,11 +281,6 @@ const COMMAND_FLAGS = {
290
281
  version: { ...COMMON_FLAGS, ...VERSION_FLAG },
291
282
  };
292
283
  const SUBCOMMAND_FLAGS = {
293
- case: {
294
- flags: {
295
- add: { ...PROJECT_FLAGS, ...HELP_FLAG },
296
- },
297
- },
298
284
  agent: {
299
285
  flags: {
300
286
  list: { ...PROJECT_FLAGS, ...HELP_FLAG },
@@ -345,12 +331,16 @@ export async function runCli(argv, io = {
345
331
  return await handleStatus(parsed, io);
346
332
  }
347
333
  if (command === "eval") {
334
+ rejectExtraInput(parsed, {
335
+ maxPositionals: 1,
336
+ message: "workbench eval does not accept a VERSION argument.",
337
+ remediation: "Run workbench eval for current source, or use workbench show VERSION to inspect old source.",
338
+ });
348
339
  if (parsed.flags.cloud === true) {
349
340
  return await handleCloudEval(parsed, io);
350
341
  }
351
342
  const runs = await evalWorkbenchSkill({
352
343
  ...core,
353
- version: optionalPositional(parsed, 1),
354
344
  skill: stringFlag(parsed, "skills"),
355
345
  agent: stringFlag(parsed, "agents"),
356
346
  samples: intFlag(parsed, "samples"),
@@ -377,16 +367,18 @@ export async function runCli(argv, io = {
377
367
  ].filter(Boolean).join("\n"));
378
368
  }
379
369
  if (command === "improve") {
370
+ rejectExtraInput(parsed, {
371
+ maxPositionals: 1,
372
+ message: "workbench improve does not accept a VERSION argument.",
373
+ remediation: "Run workbench improve for current source after recording failed or reviewed eval evidence.",
374
+ });
380
375
  if (parsed.flags.cloud === true) {
381
376
  return await handleCloudImprove(parsed, io);
382
377
  }
383
- const improverAgent = await resolveLocalImproverAgent(parsed, core);
384
378
  const result = await improveWorkbenchSkill({
385
379
  ...core,
386
- version: optionalPositional(parsed, 1),
387
380
  skill: stringFlag(parsed, "skills"),
388
381
  agent: stringFlag(parsed, "agents"),
389
- ...(improverAgent ? { improverAgent } : {}),
390
382
  budget: intFlag(parsed, "budget"),
391
383
  samples: intFlag(parsed, "samples"),
392
384
  });
@@ -423,9 +415,6 @@ export async function runCli(argv, io = {
423
415
  if (command === "agent") {
424
416
  return await handleAgent(parsed, io);
425
417
  }
426
- if (command === "case") {
427
- return await handleCase(parsed, io);
428
- }
429
418
  if (command === "sync") {
430
419
  const result = await syncWorkbenchRemote({
431
420
  ...core,
@@ -505,15 +494,17 @@ async function handleStatus(parsed, io) {
505
494
  const core = await coreOptions(parsed);
506
495
  const status = await workbenchStatusSnapshot(core);
507
496
  const auth = await workbenchCliAuthStatus();
508
- const cliStatus = await statusWithCausalNext(status, auth, core);
497
+ const machine = await workbenchMachineStatus(auth);
498
+ const cliStatus = await statusWithCausalNext(status, auth, core, machine);
509
499
  return emitResult("workbench.status.v1", {
510
500
  project: cliStatus.project,
511
501
  worktree: cliStatus.worktree,
512
502
  runs: cliStatus.runs,
513
503
  remotes: cliStatus.remotes,
514
504
  auth: auth,
505
+ machine: machine,
515
506
  next: cliStatus.next,
516
- }, parsed, io, () => formatStatusSnapshot({ ...cliStatus, auth }));
507
+ }, parsed, io, () => formatStatusSnapshot({ ...cliStatus, auth, machine }));
517
508
  }
518
509
  async function handleLog(parsed, io) {
519
510
  if (parsed.flags.runs === true && parsed.flags.versions === true) {
@@ -584,7 +575,8 @@ async function handleShow(parsed, io) {
584
575
  }
585
576
  const trace = snapshotObjectByRef(snapshot.traces, objectRef, "trace");
586
577
  if (trace) {
587
- return output(fileListing("trace", trace.id, trace.files), parsed, io, () => formatFileListing("trace", trace.id, trace.files));
578
+ const files = trace.files.filter(isUserFacingTraceEvidenceFile);
579
+ return output(fileListing("trace", trace.id, files), parsed, io, () => formatFileListing("trace", trace.id, files));
588
580
  }
589
581
  const artifact = snapshotObjectByRef(snapshot.artifacts, objectRef, "artifact");
590
582
  if (artifact) {
@@ -628,21 +620,6 @@ async function handleAgent(parsed, io) {
628
620
  }
629
621
  throw new WorkbenchUserError(`Unsupported agent command: ${subcommand}`);
630
622
  }
631
- async function handleCase(parsed, io) {
632
- const subcommand = requiredPositional(parsed, 1, "workbench case requires add.");
633
- if (subcommand === "add") {
634
- const core = await coreOptions(parsed);
635
- const sourceRef = requiredPositional(parsed, 2, "workbench case add requires RUN_ID.");
636
- rejectExtraInput(parsed, {
637
- maxPositionals: 3,
638
- message: "workbench case add accepts one RUN_ID argument.",
639
- remediation: "Run workbench case add RUN_ID.",
640
- });
641
- const record = await addWorkbenchCase({ ...core, fromTraceId: await traceIdForCaseSource(core, sourceRef) });
642
- return output(record, parsed, io, () => `Added draft case ${record.id}. Edit .workbench/cases/${record.path}/case.yaml before using it as score evidence.`);
643
- }
644
- throw new WorkbenchUserError(`Unknown command: workbench case ${subcommand}`);
645
- }
646
623
  async function handleAdapterLogin(provider, parsed, io) {
647
624
  const target = parseAuthTarget(provider, authProfileFlag(parsed));
648
625
  const method = authMethod(parsed, target.adapterId);
@@ -862,67 +839,87 @@ async function handleLogout(parsed, io) {
862
839
  if (tokenPresent) {
863
840
  await writeConfig({ schema: CONFIG_SCHEMA, ...(baseUrl ? { baseUrl } : {}) });
864
841
  }
865
- const adapterStatuses = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).listStatus().catch(() => []);
866
- const adapterAuthRetained = adapterStatuses.length > 0;
867
842
  return emitResult("workbench.cli.logout.v1", {
868
843
  ...(baseUrl ? { baseUrl } : {}),
869
844
  tokenPresent,
870
845
  revoke,
871
846
  configRemoved,
872
- adapterAuthRetained,
847
+ adapterAuth: "unchanged",
873
848
  }, parsed, io, () => [
874
849
  `Logged out of Workbench${baseUrl ? ` (${baseUrl})` : ""}.`,
875
850
  `Token: ${tokenPresent ? "present" : "absent"}; revoke ${revoke}; config ${configRemoved ? "removed" : "unchanged"}.`,
876
- adapterAuthRetained
877
- ? "Local adapter auth records were retained; run workbench logout PROVIDER to remove them."
878
- : "No local adapter auth records remain.",
851
+ "Local adapter auth unchanged; run workbench logout PROVIDER to remove provider credentials.",
879
852
  ].join("\n"));
880
853
  }
881
854
  async function handleInstall(parsed, io) {
882
- const sourceInput = requiredPositional(parsed, 1, "workbench install requires HANDLE_OR_URL.");
855
+ const sourceInput = optionalPositional(parsed, 1);
856
+ if (!sourceInput) {
857
+ rejectExtraInput(parsed, {
858
+ maxPositionals: 1,
859
+ message: "workbench install with no HANDLE_OR_URL lists installed skills.",
860
+ remediation: "Run workbench install OWNER/SKILL to install a published skill.",
861
+ });
862
+ if (parsed.flags.yes === true || parsed.flags["dry-run"] === true) {
863
+ throw new WorkbenchCodedError("usage", "workbench install inventory does not accept --yes or --dry-run.", {
864
+ remediation: "Run workbench install --json, or run workbench install OWNER/SKILL --dry-run.",
865
+ exitCode: 2,
866
+ });
867
+ }
868
+ const inventory = await readInstalledSkillsInventory({
869
+ includeUpdates: true,
870
+ lookupLatestVersion: latestInstallVersion,
871
+ });
872
+ return emitResult("workbench.cli.installed.v1", installedInventoryToJson(inventory), parsed, io, () => formatInstalledInventory(inventory));
873
+ }
883
874
  rejectExtraInput(parsed, {
884
875
  maxPositionals: 2,
885
876
  message: "workbench install accepts one HANDLE_OR_URL argument.",
886
- remediation: "Run workbench install OWNER/SKILL --to codex.",
877
+ remediation: "Run workbench install OWNER/SKILL.",
887
878
  });
888
879
  const source = await resolveWorkbenchInstallSourceInput(sourceInput);
889
880
  const workbenchSource = parseWorkbenchInstallSource(source);
890
881
  if (!workbenchSource) {
891
882
  throw new WorkbenchCodedError("usage", "workbench install requires a Workbench Cloud source URL.", {
892
- remediation: "Run workbench install OWNER/SKILL --to codex.",
883
+ remediation: "Run workbench install OWNER/SKILL.",
893
884
  exitCode: 2,
894
885
  });
895
886
  }
896
887
  const snapshot = await fetchWorkbenchInstallSourceSnapshot(workbenchSource, source);
897
888
  const sourceSummary = workbenchInstallSourceSummary(workbenchSource, snapshot);
898
- const config = await loadConfig();
899
- const toTargets = stringsFlag(parsed, "to");
900
- const selectedTargets = toTargets.length > 0 ? normalizeInstallTargetNames(toTargets) : await defaultInstallTargetNames(config);
901
- const targets = resolveInstallTargets({
902
- agents: selectedTargets.filter((target) => target !== "local"),
903
- local: selectedTargets.some((target) => target === "local"),
904
- skillName: snapshot.name,
905
- });
906
- const result = await installSnapshotToTargets({
889
+ const result = await installSnapshotToStore({
907
890
  snapshot,
908
- targets,
909
891
  overwrite: parsed.flags.yes === true,
910
892
  dryRun: parsed.flags["dry-run"] === true,
893
+ provenance: {
894
+ handle: `${workbenchSource.owner}/${workbenchSource.skill}`,
895
+ versionId: snapshot.versionId,
896
+ baseUrl: workbenchSource.baseUrl,
897
+ },
911
898
  });
912
- if (toTargets.length > 0 && parsed.flags["dry-run"] !== true) {
913
- await writeConfig({ ...config, installTargets: selectedTargets });
914
- }
899
+ const fanout = parsed.flags["dry-run"] === true
900
+ ? skippedFanOut(result.directoryName, result.destination)
901
+ : await fanOutSkill(result.directoryName, { skillDir: result.destination });
902
+ const next = installNextCommand(fanout);
915
903
  return emitResult("workbench.cli.install.v1", {
916
904
  source: sourceSummary,
917
905
  result: result.result,
918
- targets: result.targets,
906
+ store: result.store,
907
+ skill: result.directoryName,
908
+ destination: result.destination,
909
+ previous: result.previous,
919
910
  filesCopied: result.filesCopied,
911
+ contentHash: result.contentHash,
912
+ provenancePath: result.provenancePath,
913
+ fanout: fanOutToJson(fanout),
914
+ next: next,
920
915
  ...(parsed.flags["dry-run"] === true ? { dryRun: true } : {}),
921
916
  }, parsed, io, () => [
922
917
  parsed.flags["dry-run"] === true
923
- ? `Would install ${snapshot.name}: filesCopied=${result.filesCopied}`
924
- : `Installed ${snapshot.name}: ${result.result}`,
925
- ...result.targets.map((target) => ` ${target.agent}\t${target.previous}\t${target.destination}`),
918
+ ? `Would install ${result.directoryName} to ${result.destination}: filesCopied=${result.filesCopied}`
919
+ : `Installed ${result.directoryName}: ${result.result}`,
920
+ ` machine\t${result.previous}\t${result.destination}`,
921
+ formatFanOut(fanout),
922
+ ...(next ? [`next: ${next}`] : []),
926
923
  ].join("\n"));
927
924
  }
928
925
  async function handleCloudEval(parsed, io) {
@@ -1003,44 +1000,67 @@ async function handleCloudImprove(parsed, io) {
1003
1000
  ...(next ? [`next: ${next}`] : []),
1004
1001
  ].filter(Boolean).join("\n"));
1005
1002
  }
1006
- async function defaultInstallTargetNames(config) {
1007
- if (config.installTargets && config.installTargets.length > 0) {
1008
- return config.installTargets;
1003
+ function skippedFanOut(name, destination) {
1004
+ return {
1005
+ status: "skipped",
1006
+ command: manualFanOutCommand(destination, name),
1007
+ linkedAgents: [],
1008
+ reason: "dry-run",
1009
+ };
1010
+ }
1011
+ function installNextCommand(fanout) {
1012
+ return fanout.status === "failed" || (fanout.status === "skipped" && fanout.reason !== "dry-run")
1013
+ ? fanout.command
1014
+ : null;
1015
+ }
1016
+ function fanOutToJson(fanout) {
1017
+ return {
1018
+ status: fanout.status,
1019
+ command: fanout.command,
1020
+ linkedAgents: fanout.linkedAgents,
1021
+ ...(fanout.additionalAgents ? { additionalAgents: fanout.additionalAgents } : {}),
1022
+ ...(fanout.reason ? { reason: fanout.reason } : {}),
1023
+ ...(fanout.exitCode !== undefined ? { exitCode: fanout.exitCode } : {}),
1024
+ };
1025
+ }
1026
+ function formatFanOut(fanout) {
1027
+ if (fanout.status === "skipped") {
1028
+ return fanout.reason === "dry-run"
1029
+ ? "fanout: planned"
1030
+ : `fanout skipped: ${fanout.reason ?? "not available"}`;
1009
1031
  }
1010
- const detected = [];
1011
- for (const target of supportedInstallTargets()) {
1012
- if (target.agent === "local") {
1013
- continue;
1014
- }
1015
- const home = path.dirname(path.dirname(target.destination));
1016
- if (await pathExists(home)) {
1017
- detected.push(target.agent);
1018
- }
1032
+ if (fanout.status === "failed") {
1033
+ return `fanout failed: ${fanout.reason ?? "unknown failure"}`;
1019
1034
  }
1020
- return detected.length > 0 ? detected : ["local"];
1021
- }
1022
- function normalizeInstallTargetNames(values) {
1023
- const normalized = [];
1024
- for (const value of values) {
1025
- const target = value.trim().toLowerCase();
1026
- if (target !== "codex" && target !== "claude" && target !== "local") {
1027
- throw new WorkbenchCodedError("usage", `Unsupported install target: ${value}`, {
1028
- remediation: "Use --to codex, --to claude, or --to local.",
1029
- exitCode: 2,
1030
- });
1031
- }
1032
- normalized.push(target);
1035
+ if (fanout.linkedAgents.length === 0) {
1036
+ return "fanout: completed";
1033
1037
  }
1034
- return [...new Set(normalized)];
1038
+ const suffix = fanout.additionalAgents ? ` and ${fanout.additionalAgents} more` : "";
1039
+ return `fanned out to: ${fanout.linkedAgents.join(", ")}${suffix}`;
1035
1040
  }
1036
- async function pathExists(filePath) {
1037
- try {
1038
- await fs.access(filePath);
1039
- return true;
1040
- }
1041
- catch {
1042
- return false;
1041
+ async function latestInstallVersion(record) {
1042
+ const handle = normalizedOwnerSkillHandle(record.handle);
1043
+ if (!handle) {
1044
+ return undefined;
1043
1045
  }
1046
+ const snapshot = await withTimeout(fetchWorkbenchInstallSourceSnapshot({
1047
+ baseUrl: record.baseUrl,
1048
+ owner: handle.owner,
1049
+ skill: handle.skill,
1050
+ }, record.handle), 3000);
1051
+ return snapshot.versionId;
1052
+ }
1053
+ function withTimeout(promise, timeoutMs) {
1054
+ return new Promise((resolve, reject) => {
1055
+ const timer = setTimeout(() => reject(new Error(`Timed out after ${timeoutMs}ms.`)), timeoutMs);
1056
+ promise.then((value) => {
1057
+ clearTimeout(timer);
1058
+ resolve(value);
1059
+ }, (error) => {
1060
+ clearTimeout(timer);
1061
+ reject(error);
1062
+ });
1063
+ });
1044
1064
  }
1045
1065
  async function startCloudExecution(command, parsed, io) {
1046
1066
  const root = dirFlag(parsed) ?? process.cwd();
@@ -1062,12 +1082,23 @@ async function startCloudExecution(command, parsed, io) {
1062
1082
  });
1063
1083
  }
1064
1084
  const core = { dir: root, authToken: token };
1065
- writeCloudProgress(io, `workbench cloud: syncing ${remote.name} before hosted ${command}.`, showProgress);
1085
+ const request = command === "eval"
1086
+ ? await prepareWorkbenchCloudEvalRequest({
1087
+ ...core,
1088
+ skill: stringFlag(parsed, "skills"),
1089
+ agent: stringFlag(parsed, "agents"),
1090
+ samples: intFlag(parsed, "samples"),
1091
+ })
1092
+ : await prepareWorkbenchCloudImproveRequest({
1093
+ ...core,
1094
+ skill: stringFlag(parsed, "skills"),
1095
+ agent: stringFlag(parsed, "agents"),
1096
+ samples: intFlag(parsed, "samples"),
1097
+ budget: intFlag(parsed, "budget"),
1098
+ });
1066
1099
  const syncBefore = await syncWorkbenchRemote({ ...core, remote: remote.name });
1067
- writeCloudProgress(io, `workbench cloud: synced ${remote.name} before hosted ${command} (pushed=${syncBefore.pushed}, pulled=${syncBefore.pulled}, up-to-date=${syncBefore.upToDate}).`, showProgress);
1068
- const startSnapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
1069
1100
  const skillId = await resolveCloudSkillId(source);
1070
- const response = await apiRequest(`/api/workbench/skills/${encodeURIComponent(skillId)}${command === "improve" ? "/improve" : "/runs"}`, { method: "POST", body: cloudExecutionRequestBody(command, parsed) }, source.baseUrl);
1101
+ const response = await apiRequest(`/api/workbench/skills/${encodeURIComponent(skillId)}${command === "improve" ? "/improve" : "/runs"}`, { method: "POST", body: cloudExecutionRequestBody(command, request) }, source.baseUrl);
1071
1102
  const runs = response.runs ?? [];
1072
1103
  if (runs.length === 0) {
1073
1104
  throw new WorkbenchCodedError("cloud_run_missing", `Workbench Cloud did not return a run for ${command}.`, {
@@ -1077,11 +1108,10 @@ async function startCloudExecution(command, parsed, io) {
1077
1108
  exitCode: 1,
1078
1109
  });
1079
1110
  }
1111
+ const syncAfterSchedule = await syncWorkbenchRemote({ ...core, remote: remote.name });
1080
1112
  const initialRunIds = runs.map((run) => run.id);
1081
1113
  writeCloudProgress(io, `workbench cloud: scheduled hosted ${command} on ${remote.url} (${formatCloudRunStatuses(runs)}).`, showProgress);
1082
- const initialSyncAfter = await syncWorkbenchRemote({ ...core, remote: remote.name });
1083
- writeCloudProgress(io, `workbench cloud: synced after scheduling hosted ${command} (pushed=${initialSyncAfter.pushed}, pulled=${initialSyncAfter.pulled}, up-to-date=${initialSyncAfter.upToDate}).`, showProgress);
1084
- writeCloudProgress(io, `workbench cloud: waiting for terminal status; press Ctrl-C to detach and resume with workbench status or workbench show ${displayRef(initialRunIds[0] ?? "run")}.`, showProgress);
1114
+ writeCloudProgress(io, `workbench cloud: waiting for terminal status; press Ctrl-C to detach and resume with workbench show ${displayRef(initialRunIds[0] ?? "run")}.`, showProgress);
1085
1115
  const completed = await waitForCloudRuns({
1086
1116
  command,
1087
1117
  core,
@@ -1089,7 +1119,9 @@ async function startCloudExecution(command, parsed, io) {
1089
1119
  progress: showProgress,
1090
1120
  remote,
1091
1121
  runs,
1092
- initialSync: initialSyncAfter,
1122
+ source,
1123
+ skillId,
1124
+ initialSync: syncAfterSchedule,
1093
1125
  });
1094
1126
  return {
1095
1127
  core,
@@ -1098,7 +1130,7 @@ async function startCloudExecution(command, parsed, io) {
1098
1130
  initialRunIds,
1099
1131
  runs: completed.runs,
1100
1132
  ...(completed.detached ? { detached: true } : {}),
1101
- startVersionId: startSnapshot.status.currentVersionId ?? startSnapshot.refs.current,
1133
+ startVersionId: request.versionId,
1102
1134
  source,
1103
1135
  sync: {
1104
1136
  before: { pushed: syncBefore.pushed, pulled: syncBefore.pulled, upToDate: syncBefore.upToDate },
@@ -1123,6 +1155,8 @@ async function waitForCloudRuns(input) {
1123
1155
  const deadline = Date.now() + timeoutMs;
1124
1156
  let runs = [...input.runs];
1125
1157
  let interrupted = false;
1158
+ const startedAtMs = Date.now();
1159
+ let lastProgressAtMs = startedAtMs;
1126
1160
  const onSigint = () => {
1127
1161
  interrupted = true;
1128
1162
  writeCloudProgress(input.io, `workbench cloud: detaching from hosted ${input.command} (${runIds.map(displayRef).join(", ")}).`, input.progress);
@@ -1131,25 +1165,28 @@ async function waitForCloudRuns(input) {
1131
1165
  const seenStatuses = new Map();
1132
1166
  try {
1133
1167
  while (true) {
1134
- const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(input.core);
1135
- const snapshotRuns = runIds
1136
- .map((id) => snapshot.runs.find((entry) => entry.id === id))
1137
- .filter((run) => Boolean(run));
1138
- if (snapshotRuns.length > 0) {
1139
- runs = runIds.map((id) => snapshotRuns.find((entry) => entry.id === id) ?? runs.find((entry) => entry.id === id))
1140
- .filter((run) => Boolean(run));
1141
- }
1168
+ runs = await fetchCloudRuns(input.source.baseUrl, input.skillId, runIds, runs);
1169
+ let wroteProgress = false;
1170
+ const nowMs = Date.now();
1142
1171
  for (const run of runs) {
1143
1172
  const previous = seenStatuses.get(run.id);
1144
1173
  if (previous !== run.status) {
1145
1174
  seenStatuses.set(run.id, run.status);
1146
- writeCloudProgress(input.io, `workbench cloud: ${displayRef(run.id)} is ${run.status}.`, input.progress);
1175
+ writeCloudProgress(input.io, `workbench cloud: ${formatCloudRunState(run, startedAtMs, nowMs)}.`, input.progress);
1176
+ wroteProgress = input.progress || wroteProgress;
1147
1177
  }
1148
1178
  }
1149
1179
  if (runs.length === runIds.length && runs.every(isTerminalRun)) {
1150
- writeCloudProgress(input.io, `workbench cloud: hosted ${input.command} finished (${formatCloudRunStatuses(runs)}).`, input.progress);
1180
+ sync = await syncWorkbenchRemote({ ...input.core, remote: input.remote.name });
1151
1181
  return { runs, sync };
1152
1182
  }
1183
+ if (wroteProgress) {
1184
+ lastProgressAtMs = nowMs;
1185
+ }
1186
+ else if (input.progress && nowMs - lastProgressAtMs >= 60_000) {
1187
+ writeCloudProgress(input.io, `workbench cloud: still waiting (${formatCloudRunStates(runs, startedAtMs, nowMs)}).`);
1188
+ lastProgressAtMs = nowMs;
1189
+ }
1153
1190
  if (interrupted) {
1154
1191
  return { runs, sync, detached: true };
1155
1192
  }
@@ -1168,14 +1205,18 @@ async function waitForCloudRuns(input) {
1168
1205
  if (interrupted) {
1169
1206
  return { runs, sync, detached: true };
1170
1207
  }
1171
- sync = await syncWorkbenchRemote({ ...input.core, remote: input.remote.name });
1172
- writeCloudProgress(input.io, `workbench cloud: synced ${input.remote.name} while waiting (${formatCloudRunStatuses(runs)}).`, input.progress);
1173
1208
  }
1174
1209
  }
1175
1210
  finally {
1176
1211
  process.off("SIGINT", onSigint);
1177
1212
  }
1178
1213
  }
1214
+ async function fetchCloudRuns(baseUrl, skillId, runIds, fallback) {
1215
+ const responses = await Promise.all(runIds.map((runId) => apiRequest(`/api/workbench/skills/${encodeURIComponent(skillId)}/runs/${encodeURIComponent(runId)}`, {}, baseUrl)));
1216
+ return runIds
1217
+ .map((runId, index) => responses[index]?.run ?? fallback.find((run) => run.id === runId))
1218
+ .filter((run) => Boolean(run));
1219
+ }
1179
1220
  function isTerminalRun(run) {
1180
1221
  return run.status === "succeeded" || run.status === "failed" || run.status === "canceled";
1181
1222
  }
@@ -1288,13 +1329,15 @@ async function resolveCloudSkillId(source) {
1288
1329
  }
1289
1330
  return skill.id;
1290
1331
  }
1291
- function cloudExecutionRequestBody(command, parsed) {
1332
+ function cloudExecutionRequestBody(command, request) {
1292
1333
  return {
1293
- version: optionalPositional(parsed, 1),
1294
- skill: stringFlag(parsed, "skills"),
1295
- agent: stringFlag(parsed, "agents"),
1296
- samples: intFlag(parsed, "samples"),
1297
- ...(command === "improve" ? { budget: intFlag(parsed, "budget") } : {}),
1334
+ versionId: request.versionId,
1335
+ skill: request.skill,
1336
+ agent: request.agent,
1337
+ samples: request.samples,
1338
+ ...(command === "improve" ? {
1339
+ budget: request.budget,
1340
+ } : {}),
1298
1341
  };
1299
1342
  }
1300
1343
  function cloudImproveNextCommand(runs) {
@@ -1335,6 +1378,17 @@ function formatCloudRunStatuses(runs) {
1335
1378
  ? runs.map((run) => `${displayRef(run.id)}:${run.status}`).join(", ")
1336
1379
  : "no runs";
1337
1380
  }
1381
+ function formatCloudRunStates(runs, startedAtMs, nowMs) {
1382
+ return runs.length > 0
1383
+ ? runs.map((run) => formatCloudRunState(run, startedAtMs, nowMs)).join(", ")
1384
+ : `no runs (${elapsedSeconds(startedAtMs, nowMs)}s)`;
1385
+ }
1386
+ function formatCloudRunState(run, startedAtMs, nowMs) {
1387
+ return `${displayRef(run.id)} ${run.status} (${elapsedSeconds(startedAtMs, nowMs)}s)`;
1388
+ }
1389
+ function elapsedSeconds(startedAtMs, nowMs) {
1390
+ return Math.max(0, Math.floor((nowMs - startedAtMs) / 1000));
1391
+ }
1338
1392
  function workbenchInstallSourceSummary(source, snapshot) {
1339
1393
  const installUrl = `${source.baseUrl}/skills/${encodeURIComponent(source.owner)}/${encodeURIComponent(source.skill)}`;
1340
1394
  return {
@@ -1495,7 +1549,6 @@ async function loadConfig() {
1495
1549
  ...(typeof parsed.baseUrl === "string" ? { baseUrl: normalizeBaseUrl(parsed.baseUrl) } : {}),
1496
1550
  ...(typeof parsed.accessToken === "string" ? { accessToken: parsed.accessToken } : {}),
1497
1551
  ...(typeof parsed.username === "string" ? { username: parsed.username } : {}),
1498
- ...(Array.isArray(parsed.installTargets) ? { installTargets: normalizeInstallTargetNames(parsed.installTargets.flatMap((entry) => typeof entry === "string" ? [entry] : [])) } : {}),
1499
1552
  };
1500
1553
  }
1501
1554
  // Single resolver for the Workbench Cloud token used by every authenticated
@@ -2250,7 +2303,7 @@ function flagSpecForParsedPrefix(positionals, flags) {
2250
2303
  return allowedFlagsForCommand({ positionals: [...positionals], flags: {} }, command);
2251
2304
  }
2252
2305
  function addFlag(flags, name, value) {
2253
- if (name === "with" || name === "to") {
2306
+ if (name === "with") {
2254
2307
  const existing = flags[name];
2255
2308
  flags[name] = Array.isArray(existing)
2256
2309
  ? [...existing, String(value)]
@@ -2274,14 +2327,6 @@ function stringFlag(parsed, name) {
2274
2327
  const value = parsed.flags[name];
2275
2328
  return typeof value === "string" ? value : undefined;
2276
2329
  }
2277
- function stringsFlag(parsed, name) {
2278
- const value = parsed.flags[name];
2279
- return Array.isArray(value)
2280
- ? value
2281
- : typeof value === "string"
2282
- ? [value]
2283
- : [];
2284
- }
2285
2330
  function intFlag(parsed, name) {
2286
2331
  const value = stringFlag(parsed, name);
2287
2332
  if (!value) {
@@ -2382,12 +2427,17 @@ async function previewPublishWithDerivedRemote(parsed) {
2382
2427
  return {
2383
2428
  remote,
2384
2429
  version,
2385
- visibility: parsePublishVisibilityFlags(parsed) ?? "private",
2430
+ visibility: parsePublishVisibilityFlags(parsed) ??
2431
+ normalizePublishVisibility(reconciledSnapshot.refs["publication/visibility"]) ??
2432
+ "private",
2386
2433
  installHandle: installHandleFromCloudRemote(remote),
2387
2434
  installUrl: remote.url,
2388
2435
  pinnedInstallUrl: `${remote.url}/releases/${encodeURIComponent(version.id)}`,
2389
2436
  };
2390
2437
  }
2438
+ function normalizePublishVisibility(value) {
2439
+ return value === "private" || value === "internal" || value === "public" ? value : undefined;
2440
+ }
2391
2441
  async function ensurePublishRemote(parsed) {
2392
2442
  const core = await coreOptions(parsed);
2393
2443
  const root = path.resolve(dirFlag(parsed) ?? process.cwd());
@@ -2460,7 +2510,7 @@ async function resolveWorkbenchInstallSourceInput(input) {
2460
2510
  const handle = normalizedOwnerSkillHandle(input);
2461
2511
  if (!handle) {
2462
2512
  throw new WorkbenchCodedError("usage", "workbench install expects OWNER/SKILL or a Workbench Cloud skill URL.", {
2463
- remediation: "Run workbench install OWNER/SKILL --to codex.",
2513
+ remediation: "Run workbench install OWNER/SKILL.",
2464
2514
  exitCode: 2,
2465
2515
  });
2466
2516
  }
@@ -2611,11 +2661,25 @@ async function workbenchCliAuthStatus() {
2611
2661
  })),
2612
2662
  };
2613
2663
  }
2664
+ async function workbenchMachineStatus(auth) {
2665
+ const inventory = await readInstalledSkillsInventory();
2666
+ return {
2667
+ installedSkillCount: inventory.skills.length,
2668
+ stores: inventory.stores,
2669
+ connectedProviders: auth.adapters
2670
+ .filter((entry) => entry.status === "connected")
2671
+ .map((entry) => ({
2672
+ adapter: entry.adapter,
2673
+ ...(entry.slot ? { slot: entry.slot } : {}),
2674
+ profile: entry.profile,
2675
+ })),
2676
+ };
2677
+ }
2614
2678
  function scoredRunValue(run) {
2615
- return run.status === "succeeded" && typeof run.score === "number" ? run.score : undefined;
2679
+ return typeof run.score === "number" ? run.score : undefined;
2616
2680
  }
2617
2681
  function scoredJobValue(job) {
2618
- return job.status === "succeeded" && typeof job.score === "number" ? job.score : undefined;
2682
+ return typeof job.score === "number" ? job.score : undefined;
2619
2683
  }
2620
2684
  function snapshotHasWorkflowCase(snapshot) {
2621
2685
  const currentVersion = snapshotVersionByRef(snapshot, snapshot.status.currentVersionId ?? snapshot.refs.current ?? "");
@@ -2628,9 +2692,12 @@ function installHandleFromStatusRemote(remote) {
2628
2692
  const source = parseWorkbenchInstallSource(publicationUrl ?? remote.url);
2629
2693
  return source ? `${source.owner}/${source.skill}` : publicationUrl ?? remote.url;
2630
2694
  }
2631
- async function statusWithCausalNext(status, auth, core) {
2695
+ async function statusWithCausalNext(status, auth, core, machine) {
2632
2696
  if (!status.project.initialized) {
2633
- return status;
2697
+ return {
2698
+ ...status,
2699
+ next: machine.installedSkillCount > 0 ? "workbench install" : status.next,
2700
+ };
2634
2701
  }
2635
2702
  const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core).catch(() => null);
2636
2703
  const lastRun = snapshot?.runs
@@ -2767,28 +2834,61 @@ function evidenceFilesForRunOrJob(snapshot, ref) {
2767
2834
  }
2768
2835
  const traceById = new Map(snapshot.traces.map((trace) => [trace.id, trace]));
2769
2836
  const artifactById = new Map(snapshot.artifacts.map((artifact) => [artifact.id, artifact]));
2770
- const files = selection.jobs.flatMap((job) => [
2837
+ const candidates = selection.jobs.flatMap((job) => [
2771
2838
  ...job.artifactIds.flatMap((artifactId) => {
2772
2839
  const artifact = artifactById.get(artifactId);
2773
2840
  return artifact
2774
- ? artifact.files.filter(isUserFacingEvidenceFile).map((file) => evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/${file.path}`))
2841
+ ? artifact.files.filter(isUserFacingEvidenceFile).map((file) => ({
2842
+ file: evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/${file.path}`),
2843
+ jobId: job.id,
2844
+ source: "artifact",
2845
+ }))
2775
2846
  : [];
2776
2847
  }),
2777
2848
  ...job.traceIds.flatMap((traceId) => {
2778
2849
  const trace = traceById.get(traceId);
2779
2850
  return trace
2780
- ? trace.files.filter(isUserFacingEvidenceFile).map((file) => evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/traces/${evidencePathSegment(trace.id)}/${file.path}`))
2851
+ ? trace.files.filter(isUserFacingTraceEvidenceFile).map((file) => ({
2852
+ file: evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/traces/${evidencePathSegment(trace.id)}/${file.path}`),
2853
+ jobId: job.id,
2854
+ source: "trace",
2855
+ }))
2781
2856
  : [];
2782
2857
  }),
2783
2858
  ]);
2859
+ return canonicalEvidenceFiles(candidates);
2860
+ }
2861
+ function canonicalEvidenceFiles(candidates) {
2784
2862
  const seen = new Set();
2785
- return files.filter((file) => {
2863
+ const sameJobArtifactFiles = new Set();
2864
+ const files = [];
2865
+ for (const candidate of candidates) {
2866
+ const file = candidate.file;
2786
2867
  if (seen.has(file.path)) {
2787
- return false;
2868
+ continue;
2788
2869
  }
2789
2870
  seen.add(file.path);
2790
- return true;
2791
- });
2871
+ const equivalentKey = sameJobEquivalentEvidenceKey(candidate);
2872
+ if (candidate.source === "trace" && sameJobArtifactFiles.has(equivalentKey)) {
2873
+ continue;
2874
+ }
2875
+ if (candidate.source === "artifact") {
2876
+ sameJobArtifactFiles.add(equivalentKey);
2877
+ }
2878
+ files.push(file);
2879
+ }
2880
+ return files;
2881
+ }
2882
+ function sameJobEquivalentEvidenceKey(candidate) {
2883
+ const file = candidate.file;
2884
+ return [
2885
+ candidate.jobId,
2886
+ path.basename(file.path),
2887
+ file.kind ?? "text",
2888
+ file.encoding ?? "utf8",
2889
+ file.executable === true ? "1" : "0",
2890
+ file.content,
2891
+ ].join("\0");
2792
2892
  }
2793
2893
  function evidenceFileWithPath(file, filePath) {
2794
2894
  return {
@@ -2800,6 +2900,13 @@ function isUserFacingEvidenceFile(file) {
2800
2900
  const normalized = file.path.replace(/\\/gu, "/").replace(/^\/+/u, "");
2801
2901
  return normalized.split("/").every((segment) => segment !== ".workbench");
2802
2902
  }
2903
+ function isUserFacingTraceEvidenceFile(file) {
2904
+ if (!isUserFacingEvidenceFile(file)) {
2905
+ return false;
2906
+ }
2907
+ const basename = path.basename(file.path.replace(/\\/gu, "/"));
2908
+ return basename !== "request.json" && basename !== "result.json" && basename !== "trace.json";
2909
+ }
2803
2910
  function evidencePathSegment(value) {
2804
2911
  return value.replace(/[^A-Za-z0-9._-]+/gu, "-") || "_";
2805
2912
  }
@@ -2849,41 +2956,6 @@ function manifestOnly(value) {
2849
2956
  }
2850
2957
  return out;
2851
2958
  }
2852
- async function resolveLocalImproverAgent(parsed, core) {
2853
- if (stringFlag(parsed, "agents")) {
2854
- return undefined;
2855
- }
2856
- const agents = await listWorkbenchAgents(core).catch(() => []);
2857
- const status = await workbenchStatusSnapshot(core).catch(() => undefined);
2858
- const defaultAgentName = status?.project.defaultAgent ?? agents[0]?.name;
2859
- const defaultAgent = agents.find((agent) => agent.name === defaultAgentName);
2860
- if (defaultAgent && workbenchSkillImproveCanUseQueuedAdapter(defaultAgent)) {
2861
- return undefined;
2862
- }
2863
- const connected = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).listStatus().catch(() => []);
2864
- const candidates = connected
2865
- .filter((entry) => entry.status === "connected" &&
2866
- (entry.adapterId === "claude" || entry.adapterId === "codex"))
2867
- .sort((left, right) => {
2868
- const adapterRank = (adapter) => adapter === "claude" ? 0 : adapter === "codex" ? 1 : 2;
2869
- return adapterRank(left.adapterId) - adapterRank(right.adapterId) ||
2870
- (Date.parse(right.updatedAt ?? "") || 0) - (Date.parse(left.updatedAt ?? "") || 0);
2871
- });
2872
- const selected = candidates[0];
2873
- if (!selected) {
2874
- throw new WorkbenchCodedError("auth_required", "workbench improve needs a connected improver.", {
2875
- remediation: "Run workbench login claude (or codex) to connect an improver.",
2876
- exitCode: 1,
2877
- });
2878
- }
2879
- return {
2880
- name: selected.adapterId,
2881
- adapter: selected.adapterId,
2882
- config: {
2883
- auth: selected.slot ? { [selected.slot]: selected.profile } : selected.profile,
2884
- },
2885
- };
2886
- }
2887
2959
  function formatLogEntry(entry) {
2888
2960
  if (entry.kind === "version") {
2889
2961
  return `${entry.createdAt}\tversion\t${displayRef(entry.id)}\tfiles=${entry.fileCount}\t${entry.message}`;
@@ -2948,17 +3020,6 @@ function findShowFile(files, requestedPath, objectRef) {
2948
3020
  const candidates = normalized === "stderr.log"
2949
3021
  ? suffixCandidates.filter((file) => file.content.length > 0)
2950
3022
  : suffixCandidates;
2951
- const canonicalCandidates = candidates.filter(isCanonicalEvidenceFileCandidate);
2952
- if (canonicalCandidates.length === 1) {
2953
- return canonicalCandidates[0];
2954
- }
2955
- const equivalentCanonicalCandidate = singleEquivalentShowFile(canonicalCandidates);
2956
- if (equivalentCanonicalCandidate) {
2957
- return equivalentCanonicalCandidate;
2958
- }
2959
- if (canonicalCandidates.length > 1) {
2960
- throw ambiguousShowPath(objectRef, requestedPath, canonicalCandidates);
2961
- }
2962
3023
  if (candidates.length === 1) {
2963
3024
  return candidates[0];
2964
3025
  }
@@ -2975,9 +3036,6 @@ function findShowFile(files, requestedPath, objectRef) {
2975
3036
  }
2976
3037
  throw ambiguousShowPath(objectRef, requestedPath, candidates.length > 0 ? candidates : suffixCandidates);
2977
3038
  }
2978
- function isCanonicalEvidenceFileCandidate(file) {
2979
- return !file.path.includes("/traces/") && !file.path.includes("/artifacts/");
2980
- }
2981
3039
  function singleEquivalentShowFile(files) {
2982
3040
  if (files.length <= 1) {
2983
3041
  return null;
@@ -3006,23 +3064,6 @@ function fileListing(kind, id, files) {
3006
3064
  function formatFileListing(kind, id, files) {
3007
3065
  return [`${kind}\t${displayRef(id)}\tfiles=${files.length}`, ...files.map((file) => file.path)].join("\n");
3008
3066
  }
3009
- async function traceIdForCaseSource(core, ref) {
3010
- const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
3011
- const trace = snapshotObjectByRef(snapshot.traces, ref, "trace");
3012
- if (trace) {
3013
- return trace.id;
3014
- }
3015
- const selection = runOrJobEvidenceSelection(snapshot, ref);
3016
- const traceId = selection.run?.traceIds[0] ?? selection.jobs[0]?.traceIds[0];
3017
- if (traceId) {
3018
- return traceId;
3019
- }
3020
- throw new WorkbenchCodedError("ref_not_found", `Run, job, or trace not found: ${ref}`, {
3021
- remediation: "Run workbench log, then workbench case add RUN_ID.",
3022
- subject: { ref },
3023
- exitCode: 1,
3024
- });
3025
- }
3026
3067
  async function evalCoverageSummaries(core, runs) {
3027
3068
  const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
3028
3069
  const jobsByRun = new Map();
@@ -3101,6 +3142,12 @@ function formatStatusSnapshot(status) {
3101
3142
  const lines = [
3102
3143
  `Root: ${status.project.root}`,
3103
3144
  `Initialized: ${status.project.initialized ? "yes" : "no"}`,
3145
+ ...(status.project.initialized ? [] : [
3146
+ `Installed skills: ${status.machine?.installedSkillCount ?? 0}`,
3147
+ `Connected providers: ${status.machine?.connectedProviders.length
3148
+ ? status.machine.connectedProviders.map((entry) => `${entry.adapter}/${entry.profile}`).join(", ")
3149
+ : "none"}`,
3150
+ ]),
3104
3151
  ...(status.project.currentVersionId ? [`Current version: ${displayRef(status.project.currentVersionId)}`] : []),
3105
3152
  ...(status.project.defaultSkill ? [`Default skill: ${status.project.defaultSkill}`] : []),
3106
3153
  ...(status.project.defaultAgent ? [`Default agent: ${status.project.defaultAgent}`] : []),
@@ -3130,6 +3177,36 @@ function formatStatusSnapshot(status) {
3130
3177
  ];
3131
3178
  return lines.join("\n");
3132
3179
  }
3180
+ function formatInstalledInventory(inventory) {
3181
+ if (inventory.skills.length === 0) {
3182
+ return [
3183
+ "No skills installed.",
3184
+ ...(inventory.next ? [`next: ${inventory.next}`] : []),
3185
+ ].join("\n");
3186
+ }
3187
+ const lines = [
3188
+ "store\tname\tversion\tstatus\tsource",
3189
+ ...inventory.skills.map(formatInstalledSkill),
3190
+ ...(inventory.next ? [`next: ${inventory.next}`] : []),
3191
+ ];
3192
+ return lines.join("\n");
3193
+ }
3194
+ function formatInstalledSkill(skill) {
3195
+ return [
3196
+ skill.store,
3197
+ skill.name,
3198
+ skill.versionId ? shortInstalledVersion(skill.versionId) : "-",
3199
+ skill.status,
3200
+ skill.handle ?? "(no provenance)",
3201
+ ].join("\t");
3202
+ }
3203
+ function shortInstalledVersion(versionId) {
3204
+ return versionId.startsWith("v_") && versionId.length > 10
3205
+ ? displayRef(versionId)
3206
+ : versionId.length > 12
3207
+ ? versionId.slice(0, 12)
3208
+ : versionId;
3209
+ }
3133
3210
  function formatVersion(version) {
3134
3211
  return `${displayRef(version.id)}\t${version.hash.slice(0, 12)}\t${version.message}`;
3135
3212
  }