@workbench-ai/workbench 0.0.70 → 0.0.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AA4DA,MAAM,WAAW,KAAK;IACpB,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;CAC/B;AAsRD,wBAAsB,MAAM,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,EAAE,EAAE,GAAE,KAGzD,GAAG,OAAO,CAAC,MAAM,CAAC,CAsMlB"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AA2DA,MAAM,WAAW,KAAK;IACpB,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;CAC/B;AAkUD,wBAAsB,MAAM,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,EAAE,EAAE,GAAE,KAGzD,GAAG,OAAO,CAAC,MAAM,CAAC,CAwMlB"}
package/dist/index.js CHANGED
@@ -4,10 +4,10 @@ import { createRequire } from "node:module";
4
4
  import os from "node:os";
5
5
  import path from "node:path";
6
6
  import { gzipSync } from "node:zlib";
7
- import { addWorkbenchCase, addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchCases, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, publishWorkbenchVersion, removeWorkbenchCase, removeWorkbenchAgent, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
7
+ import { addWorkbenchCase, addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchInspectionSnapshot, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, publishWorkbenchVersion, removeWorkbenchAgent, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchSkillImproveCanUseQueuedAdapter, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
8
8
  import { normalizeWorkbenchSkillName } from "@workbench-ai/workbench-contract";
9
9
  import { emitError, emitResult } from "./output.js";
10
- import { installSnapshotToTargets, installTargetsToJson, normalizeInstallSnapshotPath, resolveInstallTargets, supportedInstallTargets, } from "./install-targets.js";
10
+ import { installSnapshotToTargets, normalizeInstallSnapshotPath, resolveInstallTargets, supportedInstallTargets, } from "./install-targets.js";
11
11
  import { startWorkbenchOpenServer } from "./open-server.js";
12
12
  const require = createRequire(import.meta.url);
13
13
  const HELP = [
@@ -23,7 +23,7 @@ const HELP = [
23
23
  " workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
24
24
  " workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
25
25
  " workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
26
- " workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--list] [--dry-run] [--json]",
26
+ " workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--dry-run] [--json]",
27
27
  "",
28
28
  "More:",
29
29
  " workbench help --all",
@@ -36,7 +36,7 @@ const HELP_ALL = [
36
36
  " workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
37
37
  " workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
38
38
  " workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
39
- " workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--list] [--dry-run] [--json]",
39
+ " workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--dry-run] [--json]",
40
40
  "",
41
41
  "Inspect:",
42
42
  " workbench status [--dir DIR] [--json]",
@@ -47,7 +47,7 @@ const HELP_ALL = [
47
47
  " workbench open [--host HOST] [--port PORT] [--no-open] [--json]",
48
48
  "",
49
49
  "Configure:",
50
- " workbench case add [RUN_ID] | list | rm ID [--json]",
50
+ " workbench case add RUN_ID [--json]",
51
51
  " workbench agent add NAME --adapter X [--model M] [--with k=v]... | list | rm NAME [--json]",
52
52
  "",
53
53
  "Share and auth:",
@@ -65,28 +65,40 @@ const COMMAND_HELP = {
65
65
  " workbench new [DIR] [--json]",
66
66
  "",
67
67
  "Creates a Workbench skill project.",
68
+ "",
69
+ "Example:",
70
+ " workbench new earnings-prep",
68
71
  ].join("\n"),
69
72
  eval: [
70
73
  "Usage:",
71
74
  " workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
72
75
  "",
73
76
  "Runs eval jobs for the selected version, measured skills, and agents. Omitted selectors use manifest defaults.",
77
+ "",
78
+ "Example:",
79
+ " workbench eval -n 5",
74
80
  ].join("\n"),
75
81
  improve: [
76
82
  "Usage:",
77
83
  " workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
78
84
  "",
79
85
  "Creates one improved child version from evidence. The selected skills and agents must resolve to exactly one entry each.",
86
+ "",
87
+ "Example:",
88
+ " workbench improve --budget 1 -n 1",
80
89
  ].join("\n"),
81
90
  compare: [
82
91
  "Usage:",
83
92
  " workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
84
93
  "",
85
94
  "Compares recorded eval evidence across selected skills, agents, and versions.",
95
+ "",
96
+ "Example:",
97
+ " workbench compare --agents all",
86
98
  ].join("\n"),
87
99
  install: [
88
100
  "Usage:",
89
- " workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--list] [--dry-run] [--json]",
101
+ " workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--dry-run] [--json]",
90
102
  "",
91
103
  "Installs published Workbench Cloud source into local agent targets.",
92
104
  "",
@@ -98,12 +110,18 @@ const COMMAND_HELP = {
98
110
  " workbench status [--dir DIR] [--json]",
99
111
  "",
100
112
  "Reports project, worktree, run, per-remote sync/publication, and auth state. --json emits the workbench.status.v1 dashboard.",
113
+ "",
114
+ "Example:",
115
+ " workbench status --json",
101
116
  ].join("\n"),
102
117
  logout: [
103
118
  "Usage:",
104
119
  " workbench logout [PROVIDER] [--json]",
105
120
  "",
106
121
  "With no provider, logs out of Workbench Cloud. With a provider such as codex or claude, removes local adapter auth.",
122
+ "",
123
+ "Example:",
124
+ " workbench logout claude",
107
125
  ].join("\n"),
108
126
  show: [
109
127
  "Usage:",
@@ -111,38 +129,54 @@ const COMMAND_HELP = {
111
129
  " workbench show REF:PATH [--json]",
112
130
  "",
113
131
  "Shows a Workbench object, lists files for file-backed objects, or prints one file.",
132
+ "",
133
+ "Example:",
134
+ " workbench show run_abc12345:result.json",
114
135
  ].join("\n"),
115
136
  log: [
116
137
  "Usage:",
117
138
  " workbench log [--runs|--versions] [--json]",
118
139
  "",
119
140
  "Shows one reverse-chronological timeline of versions and runs.",
141
+ "",
142
+ "Example:",
143
+ " workbench log --runs",
120
144
  ].join("\n"),
121
145
  diff: [
122
146
  "Usage:",
123
147
  " workbench diff [A..B] [--json]",
124
148
  "",
125
149
  "Shows changed files between two Workbench source versions.",
150
+ "",
151
+ "Example:",
152
+ " workbench diff 26059f9a..eac5699c",
126
153
  ].join("\n"),
127
154
  switch: [
128
155
  "Usage:",
129
156
  " workbench switch VERSION [--json]",
130
157
  "",
131
158
  "Switches the working skill source to a recorded Workbench version.",
159
+ "",
160
+ "Example:",
161
+ " workbench switch 26059f9a",
132
162
  ].join("\n"),
133
163
  open: [
134
164
  "Usage:",
135
165
  " workbench open [--host HOST] [--port PORT] [--no-open] [--json]",
136
166
  "",
137
167
  "Serves or emits the read-only Workbench inspection snapshot.",
168
+ "",
169
+ "Example:",
170
+ " workbench open --no-open",
138
171
  ].join("\n"),
139
172
  case: [
140
173
  "Usage:",
141
- " workbench case list [--json]",
142
- " workbench case add [RUN_ID] [--json]",
143
- " workbench case rm ID [--json]",
174
+ " workbench case add RUN_ID [--json]",
144
175
  "",
145
- "Lists cases, creates a draft case, or removes a case.",
176
+ "Captures a regression case from a recorded run.",
177
+ "",
178
+ "Example:",
179
+ " workbench case add run_abc12345",
146
180
  ].join("\n"),
147
181
  agent: [
148
182
  "Usage:",
@@ -151,18 +185,27 @@ const COMMAND_HELP = {
151
185
  " workbench agent rm NAME [--json]",
152
186
  "",
153
187
  "Lists, adds, or removes eval agent configurations.",
188
+ "",
189
+ "Example:",
190
+ " workbench agent add claude --adapter claude --model sonnet",
154
191
  ].join("\n"),
155
192
  sync: [
156
193
  "Usage:",
157
194
  " workbench sync [REMOTE] [--dry-run] [--dir DIR] [--json]",
158
195
  "",
159
196
  "Plumbing command: synchronizes local evidence and version objects with a Workbench remote.",
197
+ "",
198
+ "Example:",
199
+ " workbench sync cloud --dry-run",
160
200
  ].join("\n"),
161
201
  publish: [
162
202
  "Usage:",
163
203
  " workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--dir DIR] [--json]",
164
204
  "",
165
205
  "Publishes installable skill source to Workbench Cloud. --as sets the linked OWNER/SKILL handle.",
206
+ "",
207
+ "Example:",
208
+ " workbench publish --as acme/earnings-prep --dry-run",
166
209
  ].join("\n"),
167
210
  login: [
168
211
  "Usage:",
@@ -170,6 +213,9 @@ const COMMAND_HELP = {
170
213
  " workbench logout [PROVIDER] [--json]",
171
214
  "",
172
215
  "Connects the CLI to Workbench Cloud or captures local adapter auth for a provider.",
216
+ "",
217
+ "Example:",
218
+ " workbench login --start-only --no-open",
173
219
  ].join("\n"),
174
220
  };
175
221
  const COMMON_FLAGS = {
@@ -207,7 +253,7 @@ const COMMAND_FLAGS = {
207
253
  samples: "positive-integer",
208
254
  skills: "string",
209
255
  },
210
- install: { ...COMMON_FLAGS, ...HELP_FLAG, "dry-run": "boolean", list: "boolean", to: "repeat-string", yes: "boolean" },
256
+ install: { ...COMMON_FLAGS, ...HELP_FLAG, "dry-run": "boolean", to: "repeat-string", yes: "boolean" },
211
257
  log: { ...PROJECT_FLAGS, ...HELP_FLAG, runs: "boolean", versions: "boolean" },
212
258
  login: {
213
259
  ...COMMON_FLAGS,
@@ -243,9 +289,7 @@ const COMMAND_FLAGS = {
243
289
  const SUBCOMMAND_FLAGS = {
244
290
  case: {
245
291
  flags: {
246
- list: { ...PROJECT_FLAGS, ...HELP_FLAG },
247
292
  add: { ...PROJECT_FLAGS, ...HELP_FLAG },
248
- rm: { ...PROJECT_FLAGS, ...HELP_FLAG },
249
293
  },
250
294
  },
251
295
  agent: {
@@ -315,26 +359,28 @@ export async function runCli(argv, io = {
315
359
  return emitEvalFailure(runs, failedRuns, artifactIds, parsed, io);
316
360
  }
317
361
  const deltas = await evalDeltas(core, runs);
318
- const nextCommands = evalSuccessNextCommands(runs);
362
+ const next = await evalSuccessNextCommand(core, runs);
319
363
  return emitResult("workbench.cli.eval.v1", {
320
364
  result: runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
321
365
  deltas: deltas,
322
- nextCommands: nextCommands,
366
+ next: next,
323
367
  }, parsed, io, () => [
324
368
  runs.map(formatRun).join("\n"),
325
369
  ...deltas.map(formatEvalDelta),
326
- ...(nextCommands[0] ? [`next: ${nextCommands[0]}`] : []),
370
+ ...(next ? [`next: ${next}`] : []),
327
371
  ].filter(Boolean).join("\n"));
328
372
  }
329
373
  if (command === "improve") {
330
374
  if (parsed.flags.cloud === true) {
331
375
  return await handleCloudImprove(parsed, io);
332
376
  }
377
+ const improverAgent = await resolveLocalImproverAgent(parsed, core);
333
378
  const result = await improveWorkbenchSkill({
334
379
  ...core,
335
380
  version: optionalPositional(parsed, 1),
336
381
  skill: stringFlag(parsed, "skills"),
337
382
  agent: stringFlag(parsed, "agents"),
383
+ ...(improverAgent ? { improverAgent } : {}),
338
384
  budget: intFlag(parsed, "budget"),
339
385
  samples: intFlag(parsed, "samples"),
340
386
  });
@@ -350,12 +396,12 @@ export async function runCli(argv, io = {
350
396
  skills: stringFlag(parsed, "skills"),
351
397
  agents: stringFlag(parsed, "agents"),
352
398
  });
353
- return output(comparison, parsed, io, () => formatComparison(comparison));
399
+ return output(manifestOnly(comparison), parsed, io, () => formatComparison(comparison));
354
400
  }
355
401
  if (command === "switch") {
356
402
  const versionRef = requiredPositional(parsed, 1, "workbench switch requires VERSION.");
357
403
  const version = await switchWorkbenchVersion(versionRef, core);
358
- return output(versionSummary(version), parsed, io, () => `Switched to ${version.id}.`);
404
+ return output(versionSummary(version), parsed, io, () => `Switched to ${displayRef(version.id)}.`);
359
405
  }
360
406
  if (command === "diff") {
361
407
  const range = optionalPositional(parsed, 1) ?? await defaultDiffRange(core);
@@ -390,7 +436,7 @@ export async function runCli(argv, io = {
390
436
  }, parsed, io, () => `${result.dryRun ? "Would sync" : "Synced"} ${result.remote.name}: pushed ${result.pushed}, pulled ${result.pulled}${result.upToDate ? " (up to date)" : ""}.`);
391
437
  }
392
438
  if (command === "publish") {
393
- const preview = parsed.flags["dry-run"] === true && !stringFlag(parsed, "as")
439
+ const preview = parsed.flags["dry-run"] === true
394
440
  ? await previewPublishWithDerivedRemote(parsed)
395
441
  : undefined;
396
442
  if (preview) {
@@ -403,7 +449,7 @@ export async function runCli(argv, io = {
403
449
  pinnedInstallUrl: preview.pinnedInstallUrl,
404
450
  dryRun: true,
405
451
  }, parsed, io, () => [
406
- `Would publish ${preview.version.id} to remote ${preview.remote.name}.`,
452
+ `Would publish ${displayRef(preview.version.id)} to remote ${preview.remote.name}.`,
407
453
  `Visibility: ${preview.visibility}`,
408
454
  `Install: ${preview.installUrl}`,
409
455
  `Pinned: ${preview.pinnedInstallUrl}`,
@@ -427,7 +473,7 @@ export async function runCli(argv, io = {
427
473
  pinnedInstallUrl: result.pinnedInstallUrl,
428
474
  ...(result.dryRun ? { dryRun: true } : {}),
429
475
  }, parsed, io, () => [
430
- `${result.dryRun ? "Would publish" : "Published"} ${result.version.id} to remote ${result.remote.name}.`,
476
+ `${result.dryRun ? "Would publish" : "Published"} ${displayRef(result.version.id)} to remote ${result.remote.name}.`,
431
477
  `Visibility: ${result.visibility}`,
432
478
  `Install: ${result.installUrl}`,
433
479
  `Pinned: ${result.pinnedInstallUrl}`,
@@ -437,7 +483,7 @@ export async function runCli(argv, io = {
437
483
  if (command === "open") {
438
484
  if (parsed.flags.json === true) {
439
485
  const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
440
- return output(snapshot, parsed, io, () => "Read-only Workbench inspection data is available with --json.");
486
+ return output(manifestOnly(snapshot), parsed, io, () => "Read-only Workbench inspection data is available with --json.");
441
487
  }
442
488
  // The browser server serves committed object state through a read-only
443
489
  // snapshot path, so long-running commands do not block page loads.
@@ -462,14 +508,15 @@ export async function runCli(argv, io = {
462
508
  async function handleStatus(parsed, io) {
463
509
  const status = await workbenchStatusSnapshot(await coreOptions(parsed));
464
510
  const auth = await workbenchCliAuthStatus();
511
+ const cliStatus = statusWithCausalNext(status, auth);
465
512
  return emitResult("workbench.status.v1", {
466
- project: status.project,
467
- worktree: status.worktree,
468
- runs: status.runs,
469
- remotes: status.remotes,
513
+ project: cliStatus.project,
514
+ worktree: cliStatus.worktree,
515
+ runs: cliStatus.runs,
516
+ remotes: cliStatus.remotes,
470
517
  auth: auth,
471
- next: status.next,
472
- }, parsed, io, () => formatStatusSnapshot({ ...status, auth }));
518
+ next: cliStatus.next,
519
+ }, parsed, io, () => formatStatusSnapshot({ ...cliStatus, auth }));
473
520
  }
474
521
  async function handleLog(parsed, io) {
475
522
  if (parsed.flags.runs === true && parsed.flags.versions === true) {
@@ -491,7 +538,7 @@ async function handleLog(parsed, io) {
491
538
  remediation: "Run workbench log, workbench log --runs, or workbench log --versions.",
492
539
  });
493
540
  }
494
- const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(await coreOptions(parsed));
541
+ const snapshot = await createWorkbenchInspectionSnapshot(await coreOptions(parsed));
495
542
  const includeRuns = parsed.flags.versions !== true;
496
543
  const includeVersions = parsed.flags.runs !== true;
497
544
  const entries = [
@@ -534,21 +581,25 @@ async function handleShow(parsed, io) {
534
581
  return output(value, parsed, io, () => formatShow(value));
535
582
  }
536
583
  const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
537
- const version = snapshot.versions.find((entry) => entry.id === objectRef);
584
+ const version = snapshotVersionByRef(snapshot, objectRef);
538
585
  if (version) {
539
586
  return output(fileListing("version", version.id, version.files), parsed, io, () => formatFileListing("version", version.id, version.files));
540
587
  }
541
- const trace = snapshot.traces.find((entry) => entry.id === objectRef);
588
+ const trace = snapshotObjectByRef(snapshot.traces, objectRef, "trace");
542
589
  if (trace) {
543
590
  return output(fileListing("trace", trace.id, trace.files), parsed, io, () => formatFileListing("trace", trace.id, trace.files));
544
591
  }
545
- const artifact = snapshot.artifacts.find((entry) => entry.id === objectRef);
592
+ const artifact = snapshotObjectByRef(snapshot.artifacts, objectRef, "artifact");
546
593
  if (artifact) {
547
594
  return output(fileListing("artifact", artifact.id, artifact.files), parsed, io, () => formatFileListing("artifact", artifact.id, artifact.files));
548
595
  }
549
596
  const details = evidenceDetailsForRunOrJob(snapshot, objectRef);
550
- if (details.length > 0) {
551
- return output(details, parsed, io, () => details.map(formatTraceDetail).join("\n"));
597
+ const evidenceFiles = evidenceFilesForRunOrJob(snapshot, objectRef);
598
+ if (details.length > 0 || evidenceFiles.length > 0) {
599
+ return output({
600
+ details: details,
601
+ files: evidenceFiles.map(fileSummary),
602
+ }, parsed, io, () => formatRunOrJobEvidence(details, evidenceFiles));
552
603
  }
553
604
  const value = await showWorkbenchRef(ref, core);
554
605
  return output(value, parsed, io, () => formatShow(value));
@@ -581,22 +632,19 @@ async function handleAgent(parsed, io) {
581
632
  throw new WorkbenchUserError(`Unsupported agent command: ${subcommand}`);
582
633
  }
583
634
  async function handleCase(parsed, io) {
584
- const subcommand = requiredPositional(parsed, 1, "workbench case requires list|add|rm.");
585
- if (subcommand === "list") {
586
- const cases = await listWorkbenchCases(await coreOptions(parsed));
587
- return output(cases, parsed, io, () => cases.map((entry) => `${entry.id}\t${entry.path}`).join("\n") || "No cases.");
588
- }
635
+ const subcommand = requiredPositional(parsed, 1, "workbench case requires add.");
589
636
  if (subcommand === "add") {
590
637
  const core = await coreOptions(parsed);
591
- const sourceRef = optionalPositional(parsed, 2);
592
- const record = await addWorkbenchCase({ ...core, fromTraceId: sourceRef ? await traceIdForCaseSource(core, sourceRef) : undefined });
593
- return output(record, parsed, io, () => `Added case ${record.id}.`);
594
- }
595
- if (subcommand === "rm") {
596
- const result = await removeWorkbenchCase(requiredPositional(parsed, 2, "workbench case rm requires CASE_ID."), await coreOptions(parsed));
597
- return output(result, parsed, io, () => `Removed case ${result.removed}.`);
638
+ const sourceRef = requiredPositional(parsed, 2, "workbench case add requires RUN_ID.");
639
+ rejectExtraInput(parsed, {
640
+ maxPositionals: 3,
641
+ message: "workbench case add accepts one RUN_ID argument.",
642
+ remediation: "Run workbench case add RUN_ID.",
643
+ });
644
+ const record = await addWorkbenchCase({ ...core, fromTraceId: await traceIdForCaseSource(core, sourceRef) });
645
+ return output(record, parsed, io, () => `Added draft case ${record.id}. Edit .workbench/cases/${record.path}/case.yaml before using it as score evidence.`);
598
646
  }
599
- throw new WorkbenchUserError(`Unsupported case command: ${subcommand}`);
647
+ throw new WorkbenchUserError(`Unknown command: workbench case ${subcommand}`);
600
648
  }
601
649
  async function handleAdapterLogin(provider, parsed, io) {
602
650
  const target = parseAuthTarget(provider, authProfileFlag(parsed));
@@ -714,7 +762,7 @@ async function handleLogin(parsed, io) {
714
762
  }
715
763
  if (parsed.flags["start-only"] === true && parsed.flags.wait === true) {
716
764
  throw new WorkbenchCodedError("usage", "workbench login accepts only one of --start-only or --wait.", {
717
- remediation: "Run workbench login --start-only or workbench login --wait --timeout 120.",
765
+ remediation: "Run workbench login --start-only or workbench login --wait.",
718
766
  exitCode: 2,
719
767
  });
720
768
  }
@@ -723,22 +771,17 @@ async function handleLogin(parsed, io) {
723
771
  const timeoutSeconds = intFlag(parsed, "timeout");
724
772
  if (startOnly && timeoutSeconds !== undefined) {
725
773
  throw new WorkbenchCodedError("usage", "workbench login --timeout only applies with --wait.", {
726
- remediation: "Run workbench login --start-only, then workbench login --wait --timeout 120.",
727
- exitCode: 2,
728
- });
729
- }
730
- if (waitOnly && timeoutSeconds === undefined) {
731
- throw new WorkbenchCodedError("usage", "workbench login --wait requires --timeout N.", {
732
- remediation: "Run workbench login --wait --timeout 120.",
774
+ remediation: "Run workbench login --start-only, then workbench login --wait.",
733
775
  exitCode: 2,
734
776
  });
735
777
  }
736
778
  const config = await loadConfig();
737
- const baseUrl = selectWorkbenchBaseUrl({
738
- explicitBaseUrl: stringFlag(parsed, "base-url"),
779
+ const explicitBaseUrl = stringFlag(parsed, "base-url");
780
+ const pending = waitOnly ? await readPendingDeviceAuthorization(explicitBaseUrl) : null;
781
+ const baseUrl = pending?.baseUrl ?? selectWorkbenchBaseUrl({
782
+ explicitBaseUrl,
739
783
  configBaseUrl: config.baseUrl,
740
784
  });
741
- const pending = waitOnly ? await readPendingDeviceAuthorization(baseUrl) : null;
742
785
  const record = pending ?? await startDeviceAuthorization(baseUrl);
743
786
  const freshAuthorization = pending === null;
744
787
  if (startOnly) {
@@ -753,8 +796,8 @@ async function handleLogin(parsed, io) {
753
796
  verificationUriComplete: record.verification_uri_complete,
754
797
  userCode: record.user_code,
755
798
  expiresAt: record.expiresAt,
756
- resume: "workbench login --wait --timeout 120",
757
- }, parsed, io, () => `Open ${record.verification_uri_complete}\nCode: ${record.user_code}\nResume: workbench login --wait --timeout 120`);
799
+ resume: "workbench login --wait",
800
+ }, parsed, io, () => `Open ${record.verification_uri_complete}\nCode: ${record.user_code}\nResume: workbench login --wait`);
758
801
  }
759
802
  await writePendingDeviceAuthorization(record);
760
803
  if (freshAuthorization && !parsed.flags.json) {
@@ -801,9 +844,6 @@ async function handleLogout(parsed, io) {
801
844
  const config = await loadConfig();
802
845
  const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
803
846
  const tokenPresent = Boolean(config.accessToken);
804
- if (tokenPresent && !baseUrl) {
805
- throw new WorkbenchUserError("Missing Workbench API URL. Set WORKBENCH_API_URL or run `workbench login --base-url URL`.");
806
- }
807
847
  let revoke = "skipped";
808
848
  if (config.accessToken && baseUrl) {
809
849
  try {
@@ -856,18 +896,6 @@ async function handleInstall(parsed, io) {
856
896
  const snapshot = await fetchWorkbenchInstallSourceSnapshot(workbenchSource, source);
857
897
  const sourceSummary = workbenchInstallSourceSummary(workbenchSource, snapshot);
858
898
  const config = await loadConfig();
859
- if (parsed.flags.list === true) {
860
- return emitResult("workbench.cli.install.v1", {
861
- source: sourceSummary,
862
- skills: [snapshot.name],
863
- fileCount: snapshot.files.length,
864
- targets: installTargetsToJson(supportedInstallTargets()),
865
- }, parsed, io, () => [
866
- `${snapshot.name}\t${snapshot.versionId}\tfiles=${snapshot.files.length}`,
867
- "Targets:",
868
- ...supportedInstallTargets().map((target) => ` ${target.agent}\t${target.destination}`),
869
- ].join("\n"));
870
- }
871
899
  const toTargets = stringsFlag(parsed, "to");
872
900
  const selectedTargets = toTargets.length > 0 ? normalizeInstallTargetNames(toTargets) : await defaultInstallTargetNames(config);
873
901
  const targets = resolveInstallTargets({
@@ -905,17 +933,17 @@ async function handleCloudEval(parsed, io) {
905
933
  return emitEvalFailure(started.runs, failedRuns, artifactIds, parsed, io);
906
934
  }
907
935
  const deltas = await evalDeltas(started.core, started.runs);
908
- const nextCommands = cloudEvalNextCommands(started.runs);
936
+ const next = await evalSuccessNextCommand(started.core, started.runs);
909
937
  return emitResult("workbench.cli.eval.v1", {
910
938
  result: started.runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
911
939
  deltas: deltas,
912
- nextCommands: nextCommands,
940
+ next: next,
913
941
  cloud: cloudExecutionSummary(started),
914
942
  }, parsed, io, () => [
915
943
  `Completed hosted eval on ${started.remote.url}.`,
916
944
  started.runs.map(formatRun).join("\n"),
917
945
  ...deltas.map(formatEvalDelta),
918
- ...(nextCommands[0] ? [`next: ${nextCommands[0]}`] : []),
946
+ ...(next ? [`next: ${next}`] : []),
919
947
  ].filter(Boolean).join("\n"));
920
948
  }
921
949
  async function handleCloudImprove(parsed, io) {
@@ -934,17 +962,17 @@ async function handleCloudImprove(parsed, io) {
934
962
  });
935
963
  }
936
964
  const switchedVersionId = await switchHostedImproveVersionIfPromoted(started);
937
- const nextCommands = cloudImproveNextCommands(started.runs);
965
+ const next = cloudImproveNextCommand(started.runs);
938
966
  return emitResult("workbench.cli.improve.v1", {
939
967
  result: started.runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
940
- nextCommands: nextCommands,
968
+ next: next,
941
969
  cloud: cloudExecutionSummary(started),
942
970
  ...(switchedVersionId ? { switchedVersionId } : {}),
943
971
  }, parsed, io, () => [
944
972
  `Completed hosted improve on ${started.remote.url}.`,
945
973
  started.runs.map(formatRun).join("\n"),
946
- ...(switchedVersionId ? [`Switched local source to ${switchedVersionId}.`] : []),
947
- ...(nextCommands[0] ? [`next: ${nextCommands[0]}`] : []),
974
+ ...(switchedVersionId ? [`Switched local source to ${displayRef(switchedVersionId)}.`] : []),
975
+ ...(next ? [`next: ${next}`] : []),
948
976
  ].filter(Boolean).join("\n"));
949
977
  }
950
978
  async function defaultInstallTargetNames(config) {
@@ -1197,21 +1225,18 @@ function cloudExecutionRequestBody(command, parsed) {
1197
1225
  ...(command === "improve" ? { budget: intFlag(parsed, "budget") } : {}),
1198
1226
  };
1199
1227
  }
1200
- function cloudEvalNextCommands(runs) {
1201
- return cloudExecutionNextCommands(runs, "workbench publish");
1202
- }
1203
- function cloudImproveNextCommands(runs) {
1204
- return cloudExecutionNextCommands(runs, "workbench eval");
1228
+ function cloudImproveNextCommand(runs) {
1229
+ return cloudExecutionNextCommand(runs, "workbench eval");
1205
1230
  }
1206
- function cloudExecutionNextCommands(runs, successCommand) {
1231
+ function cloudExecutionNextCommand(runs, successCommand) {
1207
1232
  const first = runs[0];
1208
1233
  if (!first) {
1209
- return ["workbench log --runs"];
1234
+ return "workbench log --runs";
1210
1235
  }
1211
1236
  if (first.status === "running" || first.status === "failed" || first.status === "canceled") {
1212
- return [`workbench show ${first.id}`];
1237
+ return `workbench show ${displayRef(first.id)}`;
1213
1238
  }
1214
- return [successCommand];
1239
+ return successCommand;
1215
1240
  }
1216
1241
  function cloudExecutionSummary(started) {
1217
1242
  return {
@@ -1294,12 +1319,13 @@ async function fetchWorkbenchInstallSourceSnapshot(source, displaySource) {
1294
1319
  throw new WorkbenchCodedError("auth_required", token
1295
1320
  ? `Workbench Cloud rejected the provided token while installing ${displaySource}.`
1296
1321
  : `Authentication is required to install ${displaySource}.`, {
1297
- remediation: `Run workbench login --base-url ${source.baseUrl}.`,
1322
+ remediation: "Run workbench login.",
1298
1323
  exitCode: 1,
1299
1324
  });
1300
1325
  }
1301
1326
  if (!response.ok) {
1302
- throw new WorkbenchCodedError("install_failed", `Unable to download Workbench source ${displaySource}: ${response.status} ${readResponseError(text) ?? response.statusText}`, {
1327
+ const excerpt = readResponseError(text);
1328
+ throw new WorkbenchCodedError("install_failed", `Unable to download Workbench source ${displaySource}: ${response.status}${excerpt ? ` ${excerpt}` : response.statusText ? ` ${response.statusText}` : ""}`, {
1303
1329
  subject: { source: displaySource, status: response.status },
1304
1330
  exitCode: 1,
1305
1331
  });
@@ -1420,18 +1446,15 @@ function deviceAuthPath() {
1420
1446
  return process.env.WORKBENCH_DEVICE_AUTH?.trim() || path.join(path.dirname(configPath()), "device-auth.json");
1421
1447
  }
1422
1448
  function selectWorkbenchBaseUrl(input = {}) {
1423
- const baseUrl = optionalWorkbenchBaseUrl(input);
1424
- if (!baseUrl) {
1425
- throw new WorkbenchUserError("Missing Workbench API URL. Pass --base-url URL, set WORKBENCH_API_URL, or run `workbench login --base-url URL`.");
1426
- }
1427
- return baseUrl;
1449
+ return optionalWorkbenchBaseUrl(input);
1428
1450
  }
1429
1451
  function optionalWorkbenchBaseUrl(input = {}) {
1430
1452
  const value = input.explicitBaseUrl ??
1431
1453
  input.originBaseUrl ??
1432
1454
  process.env.WORKBENCH_API_URL ??
1433
- input.configBaseUrl;
1434
- return value ? normalizeBaseUrl(value) : undefined;
1455
+ input.configBaseUrl ??
1456
+ DEFAULT_WORKBENCH_CLOUD_BASE_URL;
1457
+ return normalizeBaseUrl(value);
1435
1458
  }
1436
1459
  function normalizeBaseUrl(value) {
1437
1460
  return value.trim().replace(/\/+$/u, "");
@@ -1449,7 +1472,8 @@ async function requestDeviceAuthorization(baseUrl) {
1449
1472
  });
1450
1473
  }
1451
1474
  if (!response.ok) {
1452
- throw new WorkbenchCodedError("login_denied", `Device login failed: ${readResponseError(text) ?? response.statusText}`, {
1475
+ const excerpt = readResponseError(text);
1476
+ throw new WorkbenchCodedError("login_denied", `Device login failed: ${response.status}${excerpt ? ` ${excerpt}` : response.statusText ? ` ${response.statusText}` : ""}`, {
1453
1477
  exitCode: 1,
1454
1478
  });
1455
1479
  }
@@ -1500,7 +1524,7 @@ async function pollDeviceToken(baseUrl, authorization, timeoutSeconds) {
1500
1524
  }
1501
1525
  throw new WorkbenchCodedError("login_pending", "Device login is still waiting for browser authorization.", {
1502
1526
  retryable: true,
1503
- remediation: "Authorize the device in the browser, then run workbench login --wait --timeout 120.",
1527
+ remediation: "Authorize the device in the browser, then run workbench login --wait.",
1504
1528
  subject: {
1505
1529
  retryAfterSeconds: Math.max(1, Math.ceil(intervalMs / 1000)),
1506
1530
  verificationUri: authorization.verification_uri,
@@ -1524,7 +1548,8 @@ async function fetchWorkbenchUsername(baseUrl, accessToken) {
1524
1548
  }
1525
1549
  async function readPendingDeviceAuthorization(baseUrl) {
1526
1550
  const record = await readDeviceAuthorizationJson(deviceAuthPath());
1527
- if (!record || record.baseUrl !== baseUrl || Date.parse(record.expiresAt) <= Date.now()) {
1551
+ const expectedBaseUrl = baseUrl ? normalizeBaseUrl(baseUrl) : undefined;
1552
+ if (!record || (expectedBaseUrl && record.baseUrl !== expectedBaseUrl) || Date.parse(record.expiresAt) <= Date.now()) {
1528
1553
  return null;
1529
1554
  }
1530
1555
  return record;
@@ -1614,7 +1639,8 @@ async function apiRequest(apiPath, options = {}, baseUrlOverride) {
1614
1639
  }
1615
1640
  throw requestError;
1616
1641
  }
1617
- const requestError = new WorkbenchApiRequestError(response.status, readResponseError(text) ?? `Request failed with status ${response.status}${response.statusText ? ` ${response.statusText}` : ""}.`, text);
1642
+ const excerpt = readResponseError(text);
1643
+ const requestError = new WorkbenchApiRequestError(response.status, `Request failed with status ${response.status}${response.statusText ? ` ${response.statusText}` : ""}${excerpt ? `: ${excerpt}` : ""}.`, text);
1618
1644
  lastError = requestError;
1619
1645
  if (canRetry && attempt < API_REQUEST_MAX_ATTEMPTS && isTransientApiRequestError(requestError)) {
1620
1646
  await sleep(250 * attempt);
@@ -1707,12 +1733,22 @@ function readResponseError(text) {
1707
1733
  const parsed = JSON.parse(text);
1708
1734
  const record = asRecord(parsed);
1709
1735
  const error = record?.error ?? record?.message;
1710
- return typeof error === "string" && error.trim() ? error : null;
1736
+ return typeof error === "string" && error.trim() ? oneLineExcerpt(error) : null;
1711
1737
  }
1712
1738
  catch {
1713
- return text.trim() || null;
1739
+ if (/<(?:!doctype|html|head|body)\b/iu.test(text)) {
1740
+ return null;
1741
+ }
1742
+ return oneLineExcerpt(text);
1714
1743
  }
1715
1744
  }
1745
+ function oneLineExcerpt(text) {
1746
+ const line = text.replace(/\s+/gu, " ").trim();
1747
+ if (!line) {
1748
+ return null;
1749
+ }
1750
+ return line.length > 180 ? `${line.slice(0, 177)}...` : line;
1751
+ }
1716
1752
  function parseWorkbenchCloudErrorBody(text) {
1717
1753
  try {
1718
1754
  const record = asRecord(JSON.parse(text));
@@ -2227,19 +2263,15 @@ function parsePublishVisibilityFlags(parsed) {
2227
2263
  }
2228
2264
  async function previewPublishWithDerivedRemote(parsed) {
2229
2265
  const root = path.resolve(dirFlag(parsed) ?? process.cwd());
2230
- const core = await coreOptions(parsed);
2231
- await listWorkbenchVersions(core);
2232
2266
  const reconciledSnapshot = await createWorkbenchReadOnlyInspectionSnapshot({ dir: root });
2233
2267
  const link = cloudRemoteLinkTargetFromRemotes(reconciledSnapshot.remotes);
2234
- if (link.existing) {
2235
- return undefined;
2236
- }
2237
- const remote = await derivePublishCloudRemote(parsed, "workbench publish", link.name);
2268
+ const remote = stringFlag(parsed, "as") || !link.existing
2269
+ ? await derivePublishCloudRemote(parsed, "workbench publish", link.name)
2270
+ : link.existing;
2238
2271
  const requestedVersion = optionalPositional(parsed, 1);
2239
- const versionId = requestedVersion && requestedVersion !== "current"
2240
- ? requestedVersion
2241
- : reconciledSnapshot.status.currentVersionId ?? reconciledSnapshot.refs.current;
2242
- const version = reconciledSnapshot.versions.find((entry) => entry.id === versionId);
2272
+ const version = requestedVersion && requestedVersion !== "current"
2273
+ ? snapshotVersionByRef(reconciledSnapshot, requestedVersion)
2274
+ : snapshotVersionByRef(reconciledSnapshot, reconciledSnapshot.status.currentVersionId ?? reconciledSnapshot.refs.current ?? "");
2243
2275
  if (!version) {
2244
2276
  throw new WorkbenchCodedError("version_not_found", `Version not found: ${requestedVersion ?? "current"}`, {
2245
2277
  remediation: "Run workbench log --versions.",
@@ -2390,7 +2422,7 @@ async function artifactIdsByRunId(core, runs) {
2390
2422
  return byRun;
2391
2423
  }
2392
2424
  function emitEvalFailure(runs, failedRuns, artifactIds, parsed, io) {
2393
- const nextCommands = evalFailureNextCommands(failedRuns);
2425
+ const next = evalFailureNextCommand(failedRuns);
2394
2426
  if (parsed.flags.json === true) {
2395
2427
  io.stdout.write(`${JSON.stringify({
2396
2428
  schema: "workbench.cli.eval.v1",
@@ -2401,14 +2433,14 @@ function emitEvalFailure(runs, failedRuns, artifactIds, parsed, io) {
2401
2433
  evidenceSaved: true,
2402
2434
  runs: runs.map((run) => runFailureSummary(run, artifactIds.get(run.id) ?? [])),
2403
2435
  failedRuns: failedRuns.map((run) => runFailureSummary(run, artifactIds.get(run.id) ?? [])),
2404
- nextCommands,
2436
+ next,
2405
2437
  }, null, 2)}\n`);
2406
2438
  return 1;
2407
2439
  }
2408
2440
  io.stdout.write([
2409
2441
  "Eval failed; evidence was saved.",
2410
2442
  ...failedRuns.map(formatRun),
2411
- ...(nextCommands[0] ? [`next: ${nextCommands[0]}`] : []),
2443
+ ...(next ? [`next: ${next}`] : []),
2412
2444
  ].join("\n") + "\n");
2413
2445
  return 1;
2414
2446
  }
@@ -2441,17 +2473,12 @@ function runFailureSummary(run, artifactIds) {
2441
2473
  artifactIds: [...artifactIds],
2442
2474
  };
2443
2475
  }
2444
- function evalFailureNextCommands(failedRuns) {
2476
+ function evalFailureNextCommand(failedRuns) {
2445
2477
  const first = failedRuns[0];
2446
2478
  if (!first) {
2447
- return ["workbench log --runs"];
2479
+ return "workbench log --runs";
2448
2480
  }
2449
- return [
2450
- `workbench show ${first.id}`,
2451
- `workbench show ${first.id}:stderr.log`,
2452
- `workbench case add ${first.id}`,
2453
- `workbench improve --agents ${first.agentName} --budget 1 -n 1`,
2454
- ];
2481
+ return `workbench show ${displayRef(first.id)}`;
2455
2482
  }
2456
2483
  function output(value, parsed, io, text) {
2457
2484
  return emitResult(commandSchema(parsed), { result: value }, parsed, io, text);
@@ -2484,12 +2511,207 @@ async function workbenchCliAuthStatus() {
2484
2511
  })),
2485
2512
  };
2486
2513
  }
2514
+ function statusWithCausalNext(status, auth) {
2515
+ const cloudAuthMissing = auth.workbenchCloud.status !== "authenticated";
2516
+ const needsCloudAuth = cloudAuthMissing && status.remotes.some((remote) => remote.kind === "workbench-cloud" &&
2517
+ (remote.sync.status !== "up_to_date" || remote.publication.status === "unpublished"));
2518
+ if (!needsCloudAuth) {
2519
+ return status;
2520
+ }
2521
+ return {
2522
+ ...status,
2523
+ next: "workbench login",
2524
+ };
2525
+ }
2526
+ function displayRef(id) {
2527
+ const version = /^v_([0-9a-f]{8,})$/iu.exec(id);
2528
+ if (version?.[1]) {
2529
+ return version[1].slice(0, 8);
2530
+ }
2531
+ const separator = id.indexOf("_");
2532
+ if (separator > 0 && separator < id.length - 1) {
2533
+ const prefix = id.slice(0, separator);
2534
+ const suffix = id.slice(separator + 1);
2535
+ return `${prefix}_${suffix.slice(0, 8)}`;
2536
+ }
2537
+ return id.length > 8 ? id.slice(0, 8) : id;
2538
+ }
2539
+ function shortenCommandRefs(command) {
2540
+ return command.replace(/\b(?:v_[0-9a-f]{8,}|(?:run|job|trace|artifact)_[a-z0-9_-]+)/giu, (match) => displayRef(match));
2541
+ }
2542
+ function snapshotVersionByRef(snapshot, ref) {
2543
+ const requested = ref.trim();
2544
+ const normalized = requested === "current" ? snapshot.refs.current ?? "" : requested;
2545
+ if (!normalized) {
2546
+ return undefined;
2547
+ }
2548
+ const candidates = snapshot.versions.filter((version) => snapshotVersionRefMatches(version, normalized));
2549
+ if (candidates.length > 1) {
2550
+ throw new WorkbenchCodedError("ref_ambiguous", `Version ref is ambiguous: ${ref}. Candidates: ${candidates.map((version) => displayRef(version.id)).join(", ")}.`, {
2551
+ subject: { ref, candidates: candidates.map((version) => version.id) },
2552
+ exitCode: 2,
2553
+ });
2554
+ }
2555
+ return candidates[0];
2556
+ }
2557
+ function snapshotVersionRefMatches(version, ref) {
2558
+ const withoutVersionPrefix = ref.startsWith("v_") ? ref.slice(2) : ref;
2559
+ return version.id === ref ||
2560
+ version.hash === ref ||
2561
+ version.id.startsWith(ref) ||
2562
+ version.hash.startsWith(ref) ||
2563
+ version.hash.startsWith(withoutVersionPrefix) ||
2564
+ version.id.startsWith(`v_${withoutVersionPrefix}`);
2565
+ }
2566
+ function snapshotObjectByRef(entries, ref, kind) {
2567
+ const normalized = ref.trim();
2568
+ if (!normalized) {
2569
+ return undefined;
2570
+ }
2571
+ const candidates = entries.filter((entry) => objectRefMatches(entry.id, normalized));
2572
+ if (candidates.length > 1) {
2573
+ throw new WorkbenchCodedError("ref_ambiguous", `${capitalize(kind)} ref is ambiguous: ${ref}. Candidates: ${candidates.map((entry) => displayRef(entry.id)).slice(0, 8).join(", ")}.`, {
2574
+ subject: { ref, candidates: candidates.map((entry) => entry.id).slice(0, 20) },
2575
+ exitCode: 2,
2576
+ });
2577
+ }
2578
+ return candidates[0];
2579
+ }
2580
+ function objectRefMatches(id, ref) {
2581
+ if (id === ref || id.startsWith(ref)) {
2582
+ return true;
2583
+ }
2584
+ const separator = id.indexOf("_");
2585
+ return separator > 0 && id.slice(separator + 1).startsWith(ref);
2586
+ }
2587
+ function capitalize(value) {
2588
+ return value.length > 0 ? `${value[0].toUpperCase()}${value.slice(1)}` : value;
2589
+ }
2590
+ function runOrJobEvidenceSelection(snapshot, ref) {
2591
+ const run = snapshotObjectByRef(snapshot.runs, ref, "run");
2592
+ const job = snapshotObjectByRef(snapshot.jobs, ref, "job");
2593
+ if (run && job) {
2594
+ throw new WorkbenchCodedError("ref_ambiguous", `Run/job ref is ambiguous: ${ref}. Candidates: ${displayRef(run.id)}, ${displayRef(job.id)}.`, {
2595
+ subject: { ref, candidates: [run.id, job.id] },
2596
+ exitCode: 2,
2597
+ });
2598
+ }
2599
+ if (run) {
2600
+ return {
2601
+ run,
2602
+ jobs: snapshot.jobs.filter((entry) => entry.runId === run.id),
2603
+ };
2604
+ }
2605
+ return job ? { jobs: [job] } : { jobs: [] };
2606
+ }
2607
+ function evidenceFilesForRunOrJob(snapshot, ref) {
2608
+ const selection = runOrJobEvidenceSelection(snapshot, ref);
2609
+ if (!selection.run && selection.jobs.length === 0) {
2610
+ return [];
2611
+ }
2612
+ const traceById = new Map(snapshot.traces.map((trace) => [trace.id, trace]));
2613
+ const artifactById = new Map(snapshot.artifacts.map((artifact) => [artifact.id, artifact]));
2614
+ const files = selection.jobs.flatMap((job) => [
2615
+ ...job.traceIds.flatMap((traceId) => {
2616
+ const trace = traceById.get(traceId);
2617
+ return trace
2618
+ ? trace.files.map((file) => evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/traces/${evidencePathSegment(trace.id)}/${file.path}`))
2619
+ : [];
2620
+ }),
2621
+ ...job.artifactIds.flatMap((artifactId) => {
2622
+ const artifact = artifactById.get(artifactId);
2623
+ return artifact
2624
+ ? artifact.files.map((file) => evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/artifacts/${evidencePathSegment(artifact.id)}/${file.path}`))
2625
+ : [];
2626
+ }),
2627
+ ]);
2628
+ const seen = new Set();
2629
+ return files.filter((file) => {
2630
+ if (seen.has(file.path)) {
2631
+ return false;
2632
+ }
2633
+ seen.add(file.path);
2634
+ return true;
2635
+ });
2636
+ }
2637
+ function evidenceFileWithPath(file, filePath) {
2638
+ return {
2639
+ ...file,
2640
+ path: filePath.replace(/\\/gu, "/").replace(/^\/+/u, ""),
2641
+ };
2642
+ }
2643
+ function evidencePathSegment(value) {
2644
+ return value.replace(/[^A-Za-z0-9._-]+/gu, "-") || "_";
2645
+ }
2646
+ function formatRunOrJobEvidence(details, files) {
2647
+ const detailLines = details.map(formatTraceDetail).filter(Boolean);
2648
+ const fileLines = files.length > 0 ? ["Files:", ...files.map((file) => file.path)] : [];
2649
+ return [...detailLines, ...fileLines].join("\n") || "No evidence.";
2650
+ }
2651
+ function manifestOnly(value) {
2652
+ if (value === null || typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
2653
+ return value;
2654
+ }
2655
+ if (Array.isArray(value)) {
2656
+ return value.map(manifestOnly);
2657
+ }
2658
+ if (!value || typeof value !== "object") {
2659
+ return null;
2660
+ }
2661
+ const record = value;
2662
+ if (typeof record.path === "string" && typeof record.content === "string") {
2663
+ return fileSummary(record);
2664
+ }
2665
+ const out = {};
2666
+ for (const [key, child] of Object.entries(record)) {
2667
+ if (child === undefined) {
2668
+ continue;
2669
+ }
2670
+ out[key] = manifestOnly(child);
2671
+ }
2672
+ return out;
2673
+ }
2674
+ async function resolveLocalImproverAgent(parsed, core) {
2675
+ if (stringFlag(parsed, "agents")) {
2676
+ return undefined;
2677
+ }
2678
+ const agents = await listWorkbenchAgents(core).catch(() => []);
2679
+ const status = await workbenchStatusSnapshot(core).catch(() => undefined);
2680
+ const defaultAgentName = status?.project.defaultAgent ?? agents[0]?.name;
2681
+ const defaultAgent = agents.find((agent) => agent.name === defaultAgentName);
2682
+ if (defaultAgent && workbenchSkillImproveCanUseQueuedAdapter(defaultAgent)) {
2683
+ return undefined;
2684
+ }
2685
+ const connected = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).listStatus().catch(() => []);
2686
+ const candidates = connected
2687
+ .filter((entry) => entry.status === "connected" &&
2688
+ (entry.adapterId === "claude" || entry.adapterId === "codex"))
2689
+ .sort((left, right) => {
2690
+ const adapterRank = (adapter) => adapter === "claude" ? 0 : adapter === "codex" ? 1 : 2;
2691
+ return adapterRank(left.adapterId) - adapterRank(right.adapterId) ||
2692
+ (Date.parse(right.updatedAt ?? "") || 0) - (Date.parse(left.updatedAt ?? "") || 0);
2693
+ });
2694
+ const selected = candidates[0];
2695
+ if (!selected) {
2696
+ throw new WorkbenchCodedError("auth_required", "workbench improve needs a connected improver.", {
2697
+ remediation: "Run workbench login claude (or codex) to connect an improver.",
2698
+ exitCode: 1,
2699
+ });
2700
+ }
2701
+ return {
2702
+ name: selected.adapterId,
2703
+ adapter: selected.adapterId,
2704
+ config: {
2705
+ auth: selected.slot ? { [selected.slot]: selected.profile } : selected.profile,
2706
+ },
2707
+ };
2708
+ }
2487
2709
  function formatLogEntry(entry) {
2488
2710
  if (entry.kind === "version") {
2489
- return `${entry.createdAt}\tversion\t${entry.id}\tfiles=${entry.fileCount}\t${entry.message}`;
2711
+ return `${entry.createdAt}\tversion\t${displayRef(entry.id)}\tfiles=${entry.fileCount}\t${entry.message}`;
2490
2712
  }
2491
2713
  const score = entry.score === undefined ? "n/a" : entry.score.toFixed(3);
2492
- return `${entry.createdAt}\trun\t${entry.id}\t${entry.status}\tversion=${entry.versionId}\tskill=${entry.skillName}\tagent=${entry.agentName}\tscore=${score}`;
2714
+ return `${entry.createdAt}\trun\t${displayRef(entry.id)}\t${entry.status}\tversion=${displayRef(entry.versionId)}\tskill=${entry.skillName}\tagent=${entry.agentName}\tscore=${score}`;
2493
2715
  }
2494
2716
  function splitShowRef(ref) {
2495
2717
  const index = ref.indexOf(":");
@@ -2500,18 +2722,14 @@ function splitShowRef(ref) {
2500
2722
  }
2501
2723
  async function fileForRunOrJobRef(core, objectRef, requestedPath) {
2502
2724
  const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
2503
- const run = snapshot.runs.find((entry) => entry.id === objectRef);
2504
- const job = snapshot.jobs.find((entry) => entry.id === objectRef);
2505
- if (!run && !job) {
2725
+ const selection = runOrJobEvidenceSelection(snapshot, objectRef);
2726
+ if (!selection.run && selection.jobs.length === 0) {
2506
2727
  return null;
2507
2728
  }
2508
- const traceIds = run?.traceIds ?? job?.traceIds ?? [];
2509
- const traces = snapshot.traces.filter((trace) => traceIds.includes(trace.id));
2510
- for (const trace of traces) {
2511
- const file = findShowFile(trace.files, requestedPath);
2512
- if (file) {
2513
- return file;
2514
- }
2729
+ const files = evidenceFilesForRunOrJob(snapshot, objectRef);
2730
+ const file = findShowFile(files, requestedPath, objectRef);
2731
+ if (file) {
2732
+ return file;
2515
2733
  }
2516
2734
  throw new WorkbenchCodedError("ref_not_found", `File not found in ${objectRef}: ${requestedPath}`, {
2517
2735
  remediation: `Run workbench show ${objectRef}.`,
@@ -2520,12 +2738,8 @@ async function fileForRunOrJobRef(core, objectRef, requestedPath) {
2520
2738
  });
2521
2739
  }
2522
2740
  function evidenceDetailsForRunOrJob(snapshot, ref) {
2523
- const run = snapshot.runs.find((entry) => entry.id === ref);
2524
- const job = snapshot.jobs.find((entry) => entry.id === ref);
2525
- const jobs = run
2526
- ? snapshot.jobs.filter((entry) => entry.runId === run.id)
2527
- : job ? [job] : [];
2528
- return jobs.flatMap((entry) => {
2741
+ const selection = runOrJobEvidenceSelection(snapshot, ref);
2742
+ return selection.jobs.flatMap((entry) => {
2529
2743
  const detail = workbenchJobEvidenceForSnapshot(snapshot, {
2530
2744
  runId: entry.runId,
2531
2745
  jobId: entry.id,
@@ -2536,12 +2750,58 @@ function evidenceDetailsForRunOrJob(snapshot, ref) {
2536
2750
  execution.trace.events.length > 0 ||
2537
2751
  execution.trace.summaries.length > 0));
2538
2752
  }
2539
- function findShowFile(files, requestedPath) {
2753
+ function findShowFile(files, requestedPath, objectRef) {
2540
2754
  const normalized = requestedPath.replace(/\\/gu, "/");
2541
- return files.find((file) => file.path === normalized) ??
2542
- files.find((file) => file.path.endsWith(`/${normalized}`)) ??
2543
- files.find((file) => path.basename(file.path) === normalized) ??
2544
- null;
2755
+ const exact = files.filter((file) => file.path === normalized);
2756
+ if (exact.length === 1) {
2757
+ return exact[0];
2758
+ }
2759
+ const exactEquivalent = singleEquivalentShowFile(exact);
2760
+ if (exactEquivalent) {
2761
+ return exactEquivalent;
2762
+ }
2763
+ if (exact.length > 1) {
2764
+ throw ambiguousShowPath(objectRef, requestedPath, exact);
2765
+ }
2766
+ const suffixCandidates = files.filter((file) => file.path.endsWith(`/${normalized}`) || path.basename(file.path) === normalized);
2767
+ if (suffixCandidates.length === 0) {
2768
+ return null;
2769
+ }
2770
+ const candidates = normalized === "stderr.log"
2771
+ ? suffixCandidates.filter((file) => file.content.length > 0)
2772
+ : suffixCandidates;
2773
+ if (candidates.length === 1) {
2774
+ return candidates[0];
2775
+ }
2776
+ const equivalentCandidate = singleEquivalentShowFile(candidates);
2777
+ if (equivalentCandidate) {
2778
+ return equivalentCandidate;
2779
+ }
2780
+ if (candidates.length === 0 && suffixCandidates.length === 1) {
2781
+ return suffixCandidates[0];
2782
+ }
2783
+ const equivalentSuffixCandidate = singleEquivalentShowFile(suffixCandidates);
2784
+ if (equivalentSuffixCandidate) {
2785
+ return equivalentSuffixCandidate;
2786
+ }
2787
+ throw ambiguousShowPath(objectRef, requestedPath, candidates.length > 0 ? candidates : suffixCandidates);
2788
+ }
2789
+ function singleEquivalentShowFile(files) {
2790
+ if (files.length <= 1) {
2791
+ return null;
2792
+ }
2793
+ const first = files[0];
2794
+ return files.every((file) => file.kind === first.kind && file.encoding === first.encoding && file.content === first.content)
2795
+ ? first
2796
+ : null;
2797
+ }
2798
+ function ambiguousShowPath(objectRef, requestedPath, candidates) {
2799
+ const candidatePaths = candidates.map((file) => file.path);
2800
+ return new WorkbenchCodedError("ref_ambiguous", `File path is ambiguous in ${objectRef}: ${requestedPath}. Candidates: ${candidatePaths.join(", ")}.`, {
2801
+ remediation: `Run workbench show ${objectRef}.`,
2802
+ subject: { ref: objectRef, path: requestedPath, candidates: candidatePaths },
2803
+ exitCode: 2,
2804
+ });
2545
2805
  }
2546
2806
  function fileListing(kind, id, files) {
2547
2807
  return {
@@ -2552,17 +2812,16 @@ function fileListing(kind, id, files) {
2552
2812
  };
2553
2813
  }
2554
2814
  function formatFileListing(kind, id, files) {
2555
- return [`${kind}\t${id}\tfiles=${files.length}`, ...files.map((file) => file.path)].join("\n");
2815
+ return [`${kind}\t${displayRef(id)}\tfiles=${files.length}`, ...files.map((file) => file.path)].join("\n");
2556
2816
  }
2557
2817
  async function traceIdForCaseSource(core, ref) {
2558
2818
  const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
2559
- const trace = snapshot.traces.find((entry) => entry.id === ref);
2819
+ const trace = snapshotObjectByRef(snapshot.traces, ref, "trace");
2560
2820
  if (trace) {
2561
2821
  return trace.id;
2562
2822
  }
2563
- const run = snapshot.runs.find((entry) => entry.id === ref);
2564
- const job = snapshot.jobs.find((entry) => entry.id === ref);
2565
- const traceId = run?.traceIds[0] ?? job?.traceIds[0];
2823
+ const selection = runOrJobEvidenceSelection(snapshot, ref);
2824
+ const traceId = selection.run?.traceIds[0] ?? selection.jobs[0]?.traceIds[0];
2566
2825
  if (traceId) {
2567
2826
  return traceId;
2568
2827
  }
@@ -2594,21 +2853,35 @@ async function evalDeltas(core, runs) {
2594
2853
  });
2595
2854
  }
2596
2855
  function formatEvalDelta(delta) {
2597
- const score = delta.score === undefined ? "n/a" : delta.score.toFixed(3);
2856
+ if (delta.score === undefined) {
2857
+ return "";
2858
+ }
2859
+ const score = delta.score.toFixed(3);
2598
2860
  if (delta.previousScore === undefined || delta.delta === undefined) {
2599
- return `${delta.skillName} ${delta.versionId} ${score} (was n/a)`;
2861
+ return `${delta.skillName} ${displayRef(delta.versionId)} ${score}`;
2600
2862
  }
2601
2863
  const sign = delta.delta >= 0 ? "+" : "";
2602
- return `${delta.skillName} ${delta.versionId} ${score} (was ${delta.previousScore.toFixed(3)}, ${sign}${delta.delta.toFixed(3)})`;
2864
+ return `${delta.skillName} ${displayRef(delta.versionId)} ${score} (was ${delta.previousScore.toFixed(3)}, ${sign}${delta.delta.toFixed(3)})`;
2603
2865
  }
2604
- function evalSuccessNextCommands(runs) {
2605
- return runs.length > 0 ? ["workbench publish"] : ["workbench eval"];
2866
+ async function evalSuccessNextCommand(core, runs) {
2867
+ if (runs.length === 0) {
2868
+ return "workbench eval";
2869
+ }
2870
+ if (!runs.some((run) => typeof run.score === "number")) {
2871
+ return "edit .workbench/cases, then run workbench eval";
2872
+ }
2873
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
2874
+ const currentVersion = snapshotVersionByRef(snapshot, snapshot.status.currentVersionId ?? snapshot.refs.current ?? "");
2875
+ const caseFiles = currentVersion?.files.filter((file) => file.kind === "text" &&
2876
+ /^\.workbench\/cases\/[^/]+\/case\.ya?ml$/u.test(file.path)) ?? [];
2877
+ const hasWorkflowCase = caseFiles.some((file) => file.kind === "text" && !/\n\s*smoke:\s*true(?:\s|$)/u.test(`\n${file.content}`));
2878
+ return hasWorkflowCase ? "workbench publish" : "edit .workbench/cases, then run workbench eval";
2606
2879
  }
2607
2880
  function formatStatusSnapshot(status) {
2608
2881
  const lines = [
2609
2882
  `Root: ${status.project.root}`,
2610
2883
  `Initialized: ${status.project.initialized ? "yes" : "no"}`,
2611
- ...(status.project.currentVersionId ? [`Current version: ${status.project.currentVersionId}`] : []),
2884
+ ...(status.project.currentVersionId ? [`Current version: ${displayRef(status.project.currentVersionId)}`] : []),
2612
2885
  ...(status.project.defaultSkill ? [`Default skill: ${status.project.defaultSkill}`] : []),
2613
2886
  ...(status.project.defaultAgent ? [`Default agent: ${status.project.defaultAgent}`] : []),
2614
2887
  `Runs: ${status.runs.total}${status.runs.lastStatus ? ` (last ${status.runs.lastStatus})` : ""}`,
@@ -2618,7 +2891,7 @@ function formatStatusSnapshot(status) {
2618
2891
  ? [
2619
2892
  "publication=published",
2620
2893
  remote.publication.visibility ? `visibility=${remote.publication.visibility}` : undefined,
2621
- remote.publication.versionId ? `version=${remote.publication.versionId}` : undefined,
2894
+ remote.publication.versionId ? `version=${displayRef(remote.publication.versionId)}` : undefined,
2622
2895
  remote.publication.installUrl ? `install=${remote.publication.installUrl}` : undefined,
2623
2896
  remote.publication.pinnedInstallUrl ? `pinned=${remote.publication.pinnedInstallUrl}` : undefined,
2624
2897
  ].filter(Boolean).join("\t")
@@ -2629,17 +2902,16 @@ function formatStatusSnapshot(status) {
2629
2902
  ? [
2630
2903
  ` error[${remote.sync.lastError.code}]: ${remote.sync.lastError.message}`,
2631
2904
  ...(remote.sync.lastAttemptAt ? [` last attempt: ${remote.sync.lastAttemptAt}`] : []),
2632
- ...(remote.sync.nextCommand ? [` next: ${remote.sync.nextCommand}`] : []),
2633
2905
  ]
2634
2906
  : []),
2635
2907
  ];
2636
2908
  })] : ["Remotes: none"]),
2637
- ...(status.next[0] ? [`next: ${status.next[0]}`] : []),
2909
+ ...(status.next ? [`next: ${shortenCommandRefs(status.next)}`] : []),
2638
2910
  ];
2639
2911
  return lines.join("\n");
2640
2912
  }
2641
2913
  function formatVersion(version) {
2642
- return `${version.id}\t${version.hash.slice(0, 12)}\t${version.message}`;
2914
+ return `${displayRef(version.id)}\t${version.hash.slice(0, 12)}\t${version.message}`;
2643
2915
  }
2644
2916
  function versionSummary(version) {
2645
2917
  return {
@@ -2657,11 +2929,11 @@ function formatAgent(agent) {
2657
2929
  function formatRun(run) {
2658
2930
  const score = run.score === undefined ? "n/a" : run.score.toFixed(3);
2659
2931
  const latency = run.latencyMs === undefined ? "n/a" : `${run.latencyMs}ms`;
2660
- return `${run.id}\t${run.kind}\t${run.status}\tversion=${run.versionId}\tskill=${run.skillName}\tagent=${run.agentName}\tscore=${score}\tlatency=${latency}`;
2932
+ return `${displayRef(run.id)}\t${run.kind}\t${run.status}\tversion=${displayRef(run.versionId)}\tskill=${run.skillName}\tagent=${run.agentName}\tscore=${score}\tlatency=${latency}`;
2661
2933
  }
2662
2934
  function formatImproveResult(result) {
2663
2935
  return [
2664
- `Improved ${result.version.parentIds[0] ?? "current"} -> ${result.version.id}. ${formatRun(result.run)}`,
2936
+ `Improved ${result.version.parentIds[0] ? displayRef(result.version.parentIds[0]) : "current"} -> ${displayRef(result.version.id)}. ${formatRun(result.run)}`,
2665
2937
  result.switched
2666
2938
  ? "Switched to improved version."
2667
2939
  : `Did not switch: ${result.promotionReason}`,
@@ -2670,26 +2942,26 @@ function formatImproveResult(result) {
2670
2942
  function formatJob(job) {
2671
2943
  const score = job.score === undefined ? "n/a" : job.score.toFixed(3);
2672
2944
  const duration = job.durationMs === undefined ? "n/a" : `${job.durationMs}ms`;
2673
- return `${job.id}\trun=${job.runId}\tcase=${job.caseId}\tsample=${job.sample}\t${job.status}\tscore=${score}\tduration=${duration}`;
2945
+ return `${displayRef(job.id)}\trun=${displayRef(job.runId)}\tcase=${job.caseId}\tsample=${job.sample}\t${job.status}\tscore=${score}\tduration=${duration}`;
2674
2946
  }
2675
2947
  function formatComparison(comparison) {
2676
2948
  const lines = ["version\tskill\tagent\tstatus\tscore\tcost\tlatency\trun"];
2677
2949
  for (const cell of comparison.cells) {
2678
2950
  lines.push([
2679
- cell.versionId,
2951
+ displayRef(cell.versionId),
2680
2952
  cell.skillName,
2681
2953
  `${cell.agentName}@${shortObjectId(cell.agentHash)}`,
2682
2954
  cell.status ?? "not-run",
2683
2955
  cell.score === undefined ? "n/a" : cell.score.toFixed(3),
2684
2956
  cell.costUsd === undefined ? "n/a" : `$${cell.costUsd.toFixed(4)}`,
2685
2957
  cell.latencyMs === undefined ? "n/a" : `${cell.latencyMs}ms`,
2686
- cell.runId ?? "n/a",
2958
+ cell.runId ? displayRef(cell.runId) : "n/a",
2687
2959
  ].join("\t"));
2688
2960
  }
2689
2961
  return lines.join("\n");
2690
2962
  }
2691
2963
  function shortObjectId(id) {
2692
- return id.length > 12 ? id.slice(0, 12) : id;
2964
+ return id.length > 8 ? id.slice(0, 8) : id;
2693
2965
  }
2694
2966
  function formatTrace(trace) {
2695
2967
  const result = asRecord(trace.result);
@@ -2698,7 +2970,7 @@ function formatTrace(trace) {
2698
2970
  const error = typeof result?.error === "string" ? result.error.split(/\r?\n/u)[0] : undefined;
2699
2971
  const files = trace.files.slice(0, 5).map((file) => file.path).join(",");
2700
2972
  return [
2701
- `${trace.id}\trun=${trace.runId}\tjob=${trace.jobId ?? "n/a"}\tversion=${trace.versionId}\tskill=${trace.skillName}\tagent=${trace.agentName}`,
2973
+ `${displayRef(trace.id)}\trun=${displayRef(trace.runId)}\tjob=${trace.jobId ? displayRef(trace.jobId) : "n/a"}\tversion=${displayRef(trace.versionId)}\tskill=${trace.skillName}\tagent=${trace.agentName}`,
2702
2974
  status ? `status=${status}` : undefined,
2703
2975
  score ? `score=${score}` : undefined,
2704
2976
  error ? `error=${error}` : undefined,
@@ -2726,7 +2998,7 @@ function formatTraceDetail(detail) {
2726
2998
  return detail.executions.map((execution) => {
2727
2999
  const sessionLabels = execution.sessions.map((session) => session.label).join(",");
2728
3000
  return [
2729
- `${execution.id}\trun=${detail.runId}\tjobs=${execution.jobIds.join(",")}\tstatus=${execution.status}`,
3001
+ `${execution.id}\trun=${displayRef(detail.runId)}\tjobs=${execution.jobIds.map(displayRef).join(",")}\tstatus=${execution.status}`,
2730
3002
  `events=${execution.trace.events.length}`,
2731
3003
  `spans=${execution.trace.spans.length}`,
2732
3004
  `summaries=${execution.trace.summaries.length}`,
@@ -2735,7 +3007,7 @@ function formatTraceDetail(detail) {
2735
3007
  }).join("\n");
2736
3008
  }
2737
3009
  function formatArtifact(artifact) {
2738
- return `${artifact.id}\trun=${artifact.runId}\tjob=${artifact.jobId}\t${artifact.kind}\tfiles=${artifact.files.length}`;
3010
+ return `${displayRef(artifact.id)}\trun=${displayRef(artifact.runId)}\tjob=${displayRef(artifact.jobId)}\t${artifact.kind}\tfiles=${artifact.files.length}`;
2739
3011
  }
2740
3012
  function artifactSummary(artifact) {
2741
3013
  return {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@workbench-ai/workbench",
3
- "version": "0.0.70",
3
+ "version": "0.0.71",
4
4
  "repository": {
5
5
  "type": "git",
6
6
  "url": "git+https://github.com/workbench-ai/workbench.git",
@@ -21,10 +21,10 @@
21
21
  ],
22
22
  "dependencies": {
23
23
  "yaml": "^2.8.2",
24
- "@workbench-ai/workbench-contract": "0.0.70",
25
- "@workbench-ai/workbench-built-in-adapters": "0.0.70",
26
- "@workbench-ai/workbench-protocol": "0.0.70",
27
- "@workbench-ai/workbench-core": "0.0.70"
24
+ "@workbench-ai/workbench-built-in-adapters": "0.0.71",
25
+ "@workbench-ai/workbench-core": "0.0.71",
26
+ "@workbench-ai/workbench-contract": "0.0.71",
27
+ "@workbench-ai/workbench-protocol": "0.0.71"
28
28
  },
29
29
  "devDependencies": {
30
30
  "@tailwindcss/postcss": "^4.2.2",
@@ -35,7 +35,7 @@
35
35
  "react-dom": "^19.2.0",
36
36
  "typescript": "^5.9.2",
37
37
  "vitest": "^3.2.4",
38
- "@workbench-ai/workbench-ui": "0.0.1"
38
+ "@workbench-ai/workbench-ui": "0.0.71"
39
39
  },
40
40
  "scripts": {
41
41
  "build": "rm -rf dist && tsc -p tsconfig.json && chmod 755 dist/workbench.js && node ./scripts/build-dev-open-assets.mjs",