@workbench-ai/workbench 0.0.68 → 0.0.69

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/dev-open/client.css +416 -107
  2. package/dist/dev-open/client.js +272 -231
  3. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-400-normal.woff +0 -0
  4. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-400-normal.woff2 +0 -0
  5. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-500-normal.woff +0 -0
  6. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-500-normal.woff2 +0 -0
  7. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-600-normal.woff +0 -0
  8. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-600-normal.woff2 +0 -0
  9. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-400-normal.woff +0 -0
  10. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-400-normal.woff2 +0 -0
  11. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-500-normal.woff +0 -0
  12. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-500-normal.woff2 +0 -0
  13. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-600-normal.woff +0 -0
  14. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-600-normal.woff2 +0 -0
  15. package/dist/dev-open/fonts/ibm-plex-mono-latin-400-normal.woff +0 -0
  16. package/dist/dev-open/fonts/ibm-plex-mono-latin-400-normal.woff2 +0 -0
  17. package/dist/dev-open/fonts/ibm-plex-mono-latin-500-normal.woff +0 -0
  18. package/dist/dev-open/fonts/ibm-plex-mono-latin-500-normal.woff2 +0 -0
  19. package/dist/dev-open/fonts/ibm-plex-mono-latin-600-normal.woff +0 -0
  20. package/dist/dev-open/fonts/ibm-plex-mono-latin-600-normal.woff2 +0 -0
  21. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-400-normal.woff +0 -0
  22. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-400-normal.woff2 +0 -0
  23. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-500-normal.woff +0 -0
  24. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-500-normal.woff2 +0 -0
  25. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-600-normal.woff +0 -0
  26. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-600-normal.woff2 +0 -0
  27. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-400-normal.woff +0 -0
  28. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-400-normal.woff2 +0 -0
  29. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-500-normal.woff +0 -0
  30. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-500-normal.woff2 +0 -0
  31. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-600-normal.woff +0 -0
  32. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-600-normal.woff2 +0 -0
  33. package/dist/dev-open/fonts/libre-caslon-display-latin-400-normal.woff +0 -0
  34. package/dist/dev-open/fonts/libre-caslon-display-latin-400-normal.woff2 +0 -0
  35. package/dist/dev-open/fonts/libre-caslon-display-latin-ext-400-normal.woff +0 -0
  36. package/dist/dev-open/fonts/libre-caslon-display-latin-ext-400-normal.woff2 +0 -0
  37. package/dist/index.d.ts.map +1 -1
  38. package/dist/index.js +1101 -244
  39. package/dist/install-targets.d.ts +35 -0
  40. package/dist/install-targets.d.ts.map +1 -0
  41. package/dist/install-targets.js +188 -0
  42. package/dist/open-server.d.ts.map +1 -1
  43. package/dist/open-server.js +72 -4
  44. package/dist/output.d.ts +22 -0
  45. package/dist/output.d.ts.map +1 -0
  46. package/dist/output.js +38 -0
  47. package/package.json +4 -4
package/dist/index.js CHANGED
@@ -4,61 +4,72 @@ import { createRequire } from "node:module";
4
4
  import os from "node:os";
5
5
  import path from "node:path";
6
6
  import { gzipSync } from "node:zlib";
7
- import { addWorkbenchCase, addWorkbenchRemote, addWorkbenchAgent, checkWorkbenchSkill, compareWorkbench, createWorkbenchAdapterAuthBundle, createWorkbenchInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, filesForWorkbenchRef, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchCases, listWorkbenchRemotes, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, publishWorkbenchVersion, removeWorkbenchCase, removeWorkbenchAgent, setDefaultWorkbenchAgent, showWorkbenchCase, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchStatus, WorkbenchUserError, } from "@workbench-ai/workbench-core";
7
+ import { addWorkbenchCase, addWorkbenchRemote, addWorkbenchAgent, checkWorkbenchSkill, compareWorkbench, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, filesForWorkbenchRef, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchCases, listWorkbenchRemotes, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, publishWorkbenchVersion, removeWorkbenchCase, removeWorkbenchAgent, removeWorkbenchRemote, setDefaultWorkbenchAgent, showWorkbenchCase, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
8
+ import { emitError, emitResult } from "./output.js";
9
+ import { installSnapshotToTargets, installTargetsToJson, normalizeInstallSnapshotPath, resolveInstallTargets, supportedInstallTargets, } from "./install-targets.js";
8
10
  import { startWorkbenchOpenServer } from "./open-server.js";
9
11
  const require = createRequire(import.meta.url);
10
12
  const HELP = [
11
13
  "Usage:",
12
14
  " workbench <command> [options]",
13
15
  "",
14
- "Skill lifecycle:",
16
+ "Primary loop:",
15
17
  " workbench init [DIR] [--json]",
16
- " workbench status [--dir DIR] [--json]",
17
18
  " workbench check [--dir DIR] [--json]",
19
+ " workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [--samples N] [--rerun] [--json]",
20
+ " workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
21
+ " workbench improve [VERSION] [--skill SKILL] [--agent AGENT] [--budget N] [--samples N] [--json]",
22
+ "",
23
+ "Inspect:",
24
+ " workbench status [--dir DIR] [--json]",
18
25
  " workbench versions [--dir DIR] [--json]",
19
26
  " workbench switch VERSION [--dir DIR] [--json]",
20
27
  " workbench diff [A..B] [--dir DIR] [--json]",
21
- " workbench sync [REMOTE] [--dir DIR] [--json]",
22
- "",
23
- "Evaluate and improve:",
24
- " workbench eval [VERSION] [--skill SKILL|all] [--agent AGENT|all] [--samples N] [--rerun] [--json]",
25
- " workbench improve [VERSION] [--skill primary] [--agent AGENT] [--budget N] [--samples N] [--json]",
26
- " workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
27
- " workbench retry RUN_ID [--json]",
28
- "",
29
- "Evidence:",
30
28
  " workbench show REF[:PATH] [--json]",
31
29
  " workbench files REF [--json]",
32
- " workbench list runs|jobs|traces|artifacts|sessions|remotes [--json]",
30
+ " workbench list runs|jobs|traces|artifacts|sessions [--json]",
33
31
  " workbench trace RUN_ID|JOB_ID|TRACE_ID [--json]",
32
+ " workbench open [--host HOST] [--port PORT] [--no-open] [--json]",
34
33
  "",
35
- "Configuration and sync:",
34
+ "Configure:",
36
35
  " workbench agent list|add|show|default|remove ...",
37
36
  " workbench skills list",
38
37
  " workbench case list|add|show|remove ...",
39
- " workbench remote add origin URL",
40
- " workbench remote list",
38
+ "",
39
+ "Share and auth:",
40
+ " workbench remote add --name NAME --url URL [--replace] [--dry-run] [--dir DIR] [--json]",
41
+ " workbench remote list [--dir DIR] [--json]",
42
+ " workbench remote remove NAME [--dir DIR] [--json]",
43
+ " workbench sync [REMOTE] [--dry-run] [--dir DIR] [--json]",
44
+ " workbench publish [VERSION] [--remote REMOTE] [--visibility private|internal|public] [--dry-run] [--dir DIR] [--json]",
45
+ " workbench install --source SOURCE [--agent codex|claude]... [--local] [--yes] [--list] [--dry-run] [--json]",
41
46
  " workbench auth status [ADAPTER[/SLOT]] [--profile PROFILE] [--json]",
42
47
  " workbench auth connect ADAPTER[/SLOT] [--method METHOD] [--profile PROFILE] [--profile-root DIR] [--local-only] [--json]",
43
48
  " workbench auth disconnect ADAPTER[/SLOT] [--profile PROFILE] [--local-only] [--json]",
44
- " workbench login [--base-url URL] [--no-open] [--json]",
49
+ " workbench login [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--json]",
45
50
  " workbench logout [--json]",
46
- " workbench publish [VERSION] [--visibility private|public] [--json]",
47
- " workbench open [--host HOST] [--port PORT] [--no-open] [--json]",
51
+ "",
52
+ "Remote URLs:",
53
+ " https://HOST/skills/OWNER/SKILL Workbench Cloud skill remote",
54
+ " file:///absolute/path local file remote",
48
55
  "",
49
56
  "Examples:",
50
57
  " workbench init ./earnings-prep",
51
- " workbench eval --agent default --samples 1",
52
- " workbench versions",
53
- " workbench switch v001",
54
- " workbench retry run_000002 --json",
55
- " workbench show trace_job_000002:stderr.log",
56
- " workbench auth connect codex --method api-key",
57
- " workbench publish --visibility public",
58
+ " workbench check --dir ./earnings-prep",
59
+ " workbench eval --agents default --samples 1",
60
+ " workbench compare",
61
+ " workbench status --json",
62
+ " workbench remote add --name origin --url https://v2.workbench.ai/skills/acme/earnings-prep",
63
+ " workbench publish --remote origin --visibility public --json",
64
+ " workbench install --source https://v2.workbench.ai/skills/acme/earnings-prep --agent codex --yes",
58
65
  "",
59
66
  "Environment:",
60
67
  " CODEX_HOME and CLAUDE_HOME override read-only session discovery roots.",
61
68
  " WORKBENCH_API_URL selects a Workbench Cloud API base URL for login, auth, and HTTP remotes.",
69
+ " WORKBENCH_API_TOKEN supplies a Workbench Cloud token without a login (WORKBENCH_SMOKE_BEARER_TOKEN is a fallback).",
70
+ " WORKBENCH_CONFIG overrides the CLI config path (default ~/.workbench/config.json).",
71
+ " WORKBENCH_DEVICE_AUTH overrides the pending device login record path.",
72
+ " WORKBENCH_ADAPTER_AUTH_STORE overrides the local adapter auth store directory.",
62
73
  ].join("\n");
63
74
  const COMMAND_HELP = {
64
75
  auth: [
@@ -68,24 +79,62 @@ const COMMAND_HELP = {
68
79
  " workbench auth disconnect ADAPTER[/SLOT] [--profile PROFILE] [--local-only] [--json]",
69
80
  "",
70
81
  "Stores adapter credentials locally and uploads them to Workbench Cloud when logged in unless --local-only is passed. Codex supports oauth and api-key. Claude supports oauth, api-key, and bedrock.",
82
+ "",
83
+ "Examples:",
84
+ " workbench auth status --json",
85
+ " workbench auth connect codex --method api-key",
86
+ " workbench auth disconnect codex --json",
71
87
  ].join("\n"),
72
88
  eval: [
73
89
  "Usage:",
74
- " workbench eval [VERSION] [--skill SKILL|all] [--agent AGENT|all] [--samples N] [--rerun] [--json]",
90
+ " workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [--samples N] [--rerun] [--json]",
75
91
  "",
76
- "Runs local eval jobs for the selected version, skill, and agent.",
92
+ "Runs eval jobs for the selected version, measured skills, and agents. Omitted selectors use manifest defaults.",
77
93
  ].join("\n"),
78
94
  improve: [
79
95
  "Usage:",
80
- " workbench improve [VERSION] [--agent AGENT] [--budget N] [--samples N] [--json]",
96
+ " workbench improve [VERSION] [--skill SKILL] [--agent AGENT] [--budget N] [--samples N] [--json]",
81
97
  "",
82
- "Creates an improved child version from evidence and switches to it when it beats the incumbent.",
98
+ "Creates one improved child version from evidence. Pass singular --skill and --agent when defaults expand to multiple entries.",
83
99
  ].join("\n"),
84
- retry: [
100
+ install: [
85
101
  "Usage:",
86
- " workbench retry RUN_ID [--json]",
102
+ " workbench install --source SOURCE [--agent codex|claude]... [--local] [--yes] [--list] [--dry-run] [--json]",
87
103
  "",
88
- "Retries failed jobs from a prior run by replaying only their case/sample pairs locally.",
104
+ "Installs published Workbench Cloud source into explicit local agent targets.",
105
+ "",
106
+ "Example:",
107
+ " workbench install --source https://v2.workbench.ai/skills/acme/earnings-prep --agent codex --yes",
108
+ ].join("\n"),
109
+ remote: [
110
+ "Usage:",
111
+ " workbench remote add --name NAME --url URL [--replace] [--dry-run] [--dir DIR] [--json]",
112
+ " workbench remote list [--dir DIR] [--json]",
113
+ " workbench remote remove NAME [--dir DIR] [--json]",
114
+ "",
115
+ "Remotes exchange Workbench object packs. Only Workbench Cloud remotes can publish installable source.",
116
+ "",
117
+ "Examples:",
118
+ " workbench remote add --name origin --url https://v2.workbench.ai/skills/acme/earnings-prep",
119
+ " workbench remote add --name scratch --url file:///tmp/earnings-prep-remote --replace",
120
+ ].join("\n"),
121
+ status: [
122
+ "Usage:",
123
+ " workbench status [--dir DIR] [--json]",
124
+ "",
125
+ "Reports project, worktree, run, per-remote sync/publication, and auth state. --json emits the workbench.status.v1 dashboard.",
126
+ "",
127
+ "Example:",
128
+ " workbench status --json",
129
+ ].join("\n"),
130
+ logout: [
131
+ "Usage:",
132
+ " workbench logout [--json]",
133
+ "",
134
+ "Revokes and removes the local Workbench Cloud token. Reports whether the token was revoked and whether local adapter auth records remain.",
135
+ "",
136
+ "Example:",
137
+ " workbench logout --json",
89
138
  ].join("\n"),
90
139
  show: [
91
140
  "Usage:",
@@ -96,9 +145,9 @@ const COMMAND_HELP = {
96
145
  ].join("\n"),
97
146
  list: [
98
147
  "Usage:",
99
- " workbench list runs|jobs|traces|artifacts|sessions|remotes [--json]",
148
+ " workbench list runs|jobs|traces|artifacts|sessions [--json]",
100
149
  "",
101
- "Lists Workbench evidence, remotes, or read-only native Codex/Claude session files.",
150
+ "Lists Workbench evidence or read-only native Codex/Claude session files.",
102
151
  ].join("\n"),
103
152
  versions: [
104
153
  "Usage:",
@@ -114,30 +163,49 @@ const COMMAND_HELP = {
114
163
  ].join("\n"),
115
164
  sync: [
116
165
  "Usage:",
117
- " workbench sync [REMOTE] [--json]",
166
+ " workbench sync [REMOTE] [--dry-run] [--dir DIR] [--json]",
167
+ "",
168
+ "Synchronizes local evidence and version objects with a Workbench remote. --dry-run reports what would be exchanged.",
118
169
  "",
119
- "Synchronizes local evidence and version objects with a Workbench remote.",
170
+ "Examples:",
171
+ " workbench sync origin --json",
172
+ " workbench sync origin --dry-run --json",
120
173
  ].join("\n"),
121
174
  publish: [
122
175
  "Usage:",
123
- " workbench publish [VERSION] [--visibility private|public] [--json]",
176
+ " workbench publish [VERSION] [--remote REMOTE] [--visibility private|internal|public] [--dry-run] [--dir DIR] [--json]",
124
177
  "",
125
- "Publishes installable skill source from the selected version to a Workbench source remote.",
178
+ "Publishes installable skill source from the selected version to a Workbench Cloud remote.",
179
+ "",
180
+ "Examples:",
181
+ " workbench publish --remote origin --visibility private --json",
182
+ " workbench publish <version-id> --remote origin --dry-run --json",
126
183
  ].join("\n"),
127
184
  login: [
128
185
  "Usage:",
129
- " workbench login [--base-url URL] [--no-open] [--json]",
186
+ " workbench login [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--json]",
130
187
  " workbench logout [--json]",
131
188
  "",
132
189
  "Connects the CLI to Workbench Cloud with the device login flow.",
190
+ "",
191
+ "Examples:",
192
+ " workbench login --start-only --json",
193
+ " workbench login --wait --timeout 120 --json",
133
194
  ].join("\n"),
134
195
  };
135
196
  const BOOLEAN_FLAGS = new Set([
136
197
  "help",
198
+ "dry-run",
137
199
  "json",
200
+ "local",
138
201
  "local-only",
202
+ "list",
139
203
  "no-open",
204
+ "start-only",
205
+ "replace",
140
206
  "rerun",
207
+ "wait",
208
+ "yes",
141
209
  ]);
142
210
  const FLAG_DEFINITIONS = {
143
211
  adapter: "string",
@@ -145,18 +213,26 @@ const FLAG_DEFINITIONS = {
145
213
  budget: "positive-integer",
146
214
  dir: "string",
147
215
  from: "string",
216
+ "dry-run": "boolean",
148
217
  help: "boolean",
149
218
  host: "string",
150
219
  json: "boolean",
220
+ local: "boolean",
151
221
  "local-only": "boolean",
222
+ list: "boolean",
152
223
  method: "string",
153
224
  model: "string",
225
+ name: "string",
154
226
  "no-open": "boolean",
155
227
  port: "positive-integer",
156
228
  profile: "string",
157
229
  "profile-root": "string",
230
+ remote: "string",
231
+ replace: "boolean",
158
232
  rerun: "boolean",
159
233
  samples: "positive-integer",
234
+ source: "string",
235
+ "start-only": "boolean",
160
236
  agent: "string",
161
237
  agents: "string",
162
238
  skill: "string",
@@ -164,26 +240,30 @@ const FLAG_DEFINITIONS = {
164
240
  version: "boolean",
165
241
  versions: "string",
166
242
  visibility: "string",
243
+ timeout: "positive-integer",
244
+ url: "string",
245
+ wait: "boolean",
167
246
  with: "repeat-string",
247
+ yes: "boolean",
168
248
  };
169
249
  const COMMAND_FLAGS = {
170
250
  check: ["dir", "json"],
171
251
  compare: ["agents", "dir", "json", "skills", "versions"],
172
252
  diff: ["dir", "json"],
173
- eval: ["agent", "dir", "json", "rerun", "samples", "skill"],
253
+ eval: ["agents", "dir", "json", "rerun", "samples", "skills"],
174
254
  files: ["dir", "json"],
175
255
  improve: ["agent", "budget", "dir", "json", "samples", "skill"],
176
256
  init: ["dir", "json"],
257
+ install: ["agent", "dry-run", "json", "list", "local", "source", "yes"],
177
258
  list: ["dir", "json"],
178
- login: ["base-url", "json", "no-open"],
259
+ login: ["base-url", "json", "no-open", "start-only", "timeout", "wait"],
179
260
  logout: ["json"],
180
261
  open: ["dir", "host", "json", "no-open", "port"],
181
- publish: ["dir", "json", "visibility"],
182
- retry: ["dir", "json"],
262
+ publish: ["dir", "dry-run", "json", "remote", "visibility"],
183
263
  show: ["dir", "json"],
184
264
  status: ["dir", "json"],
185
265
  switch: ["dir", "json"],
186
- sync: ["dir", "json"],
266
+ sync: ["dir", "dry-run", "json"],
187
267
  trace: ["dir", "json"],
188
268
  versions: ["dir", "json"],
189
269
  };
@@ -206,8 +286,9 @@ const SUBCOMMAND_FLAGS = {
206
286
  },
207
287
  remote: {
208
288
  flags: {
209
- add: ["dir", "json"],
289
+ add: ["dir", "dry-run", "json", "name", "replace", "url"],
210
290
  list: ["dir", "json"],
291
+ remove: ["dir", "json"],
211
292
  },
212
293
  },
213
294
  skills: {
@@ -246,20 +327,31 @@ export async function runCli(argv, io = {
246
327
  return 0;
247
328
  }
248
329
  validateCommandFlags(parsed, command);
249
- const core = await coreOptions(parsed);
250
330
  if (command === "login") {
251
331
  return await handleLogin(parsed, io);
252
332
  }
253
333
  if (command === "logout") {
254
334
  return await handleLogout(parsed, io);
255
335
  }
336
+ if (command === "install") {
337
+ return await handleInstall(parsed, io);
338
+ }
339
+ const core = await coreOptions(parsed);
256
340
  if (command === "init") {
257
341
  const status = await initWorkbenchSkill({ dir: parsed.positionals[1] ?? dirFlag(parsed) });
258
342
  return output(status, parsed, io, () => `Initialized Workbench skill at ${status.root}.`);
259
343
  }
260
344
  if (command === "status") {
261
- const status = await workbenchStatus(core);
262
- return output(status, parsed, io, () => formatStatus(status));
345
+ const status = await workbenchStatusSnapshot(core);
346
+ const auth = await workbenchCliAuthStatus();
347
+ return emitResult("workbench.status.v1", {
348
+ project: status.project,
349
+ worktree: status.worktree,
350
+ runs: status.runs,
351
+ remotes: status.remotes,
352
+ auth: auth,
353
+ next: status.next,
354
+ }, parsed, io, () => formatStatusSnapshot({ ...status, auth }));
263
355
  }
264
356
  if (command === "check") {
265
357
  const result = await checkWorkbenchSkill(core);
@@ -269,13 +361,17 @@ export async function runCli(argv, io = {
269
361
  const runs = await evalWorkbenchSkill({
270
362
  ...core,
271
363
  version: optionalPositional(parsed, 1),
272
- skill: stringFlag(parsed, "skill"),
273
- agent: stringFlag(parsed, "agent"),
364
+ skill: stringFlag(parsed, "skills"),
365
+ agent: stringFlag(parsed, "agents"),
274
366
  samples: intFlag(parsed, "samples"),
275
367
  rerun: parsed.flags.rerun === true,
276
368
  });
277
- const code = output(runs, parsed, io, () => runs.map(formatRun).join("\n"));
278
- return runs.some((run) => run.status === "failed" || run.status === "canceled") ? 1 : code;
369
+ const artifactIds = await artifactIdsByRunId(core, runs);
370
+ const failedRuns = runs.filter((run) => run.status === "failed" || run.status === "canceled");
371
+ if (failedRuns.length > 0) {
372
+ return emitEvalFailure(runs, failedRuns, artifactIds, parsed, io);
373
+ }
374
+ return output(runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])), parsed, io, () => runs.map(formatRun).join("\n"));
279
375
  }
280
376
  if (command === "improve") {
281
377
  const result = await improveWorkbenchSkill({
@@ -286,7 +382,10 @@ export async function runCli(argv, io = {
286
382
  budget: intFlag(parsed, "budget"),
287
383
  samples: intFlag(parsed, "samples"),
288
384
  });
289
- return output(result, parsed, io, () => formatImproveResult(result));
385
+ return output({
386
+ ...result,
387
+ version: versionSummary(result.version),
388
+ }, parsed, io, () => formatImproveResult(result));
290
389
  }
291
390
  if (command === "compare") {
292
391
  const comparison = await compareWorkbench({
@@ -297,35 +396,14 @@ export async function runCli(argv, io = {
297
396
  });
298
397
  return output(comparison, parsed, io, () => formatComparison(comparison));
299
398
  }
300
- if (command === "retry") {
301
- const runId = requiredPositional(parsed, 1, "workbench retry requires RUN_ID.");
302
- const snapshot = await createWorkbenchInspectionSnapshot(core);
303
- const run = snapshot.runs.find((entry) => entry.id === runId);
304
- if (!run) {
305
- throw new WorkbenchUserError(`Run not found: ${runId}`);
306
- }
307
- const retrySelection = retrySamplesForFailedJobs(snapshot.jobs, run);
308
- const retry = await evalWorkbenchSkill({
309
- ...core,
310
- version: run.versionId,
311
- skill: run.skillName,
312
- agent: run.agentName,
313
- kind: "retry",
314
- parentRunId: run.id,
315
- samples: retrySelection.samples,
316
- selectedSamples: retrySelection.selectedSamples,
317
- });
318
- const code = output(retry, parsed, io, () => retry.map(formatRun).join("\n"));
319
- return retry.some((entry) => entry.status === "failed" || entry.status === "canceled") ? 1 : code;
320
- }
321
399
  if (command === "versions") {
322
400
  const versions = await listWorkbenchVersions(core);
323
- return output(versions, parsed, io, () => versions.map(formatVersion).join("\n") || "No versions.");
401
+ return output(versions.map(versionSummary), parsed, io, () => versions.map(formatVersion).join("\n") || "No versions.");
324
402
  }
325
403
  if (command === "switch") {
326
404
  const versionRef = requiredPositional(parsed, 1, "workbench switch requires VERSION.");
327
405
  const version = await switchWorkbenchVersion(versionRef, core);
328
- return output(version, parsed, io, () => `Switched to ${version.id}.`);
406
+ return output(versionSummary(version), parsed, io, () => `Switched to ${version.id}.`);
329
407
  }
330
408
  if (command === "diff") {
331
409
  const range = requiredPositional(parsed, 1, "workbench diff requires A..B.");
@@ -344,14 +422,20 @@ export async function runCli(argv, io = {
344
422
  if (command === "files") {
345
423
  const ref = requiredPositional(parsed, 1, "workbench files requires REF.");
346
424
  const files = await filesForWorkbenchRef(ref, core);
347
- return output(files, parsed, io, () => files.map((file) => file.path).join("\n") || "No files.");
425
+ return output(files.map(fileSummary), parsed, io, () => files.map((file) => file.path).join("\n") || "No files.");
348
426
  }
349
427
  if (command === "list") {
350
428
  return await handleList(parsed, io);
351
429
  }
352
430
  if (command === "trace") {
353
- const ref = requiredPositional(parsed, 1, "workbench trace requires RUN_ID or TRACE_ID.");
354
- const snapshot = await createWorkbenchInspectionSnapshot(core);
431
+ const ref = optionalPositional(parsed, 1);
432
+ if (!ref) {
433
+ throw new WorkbenchCodedError("usage", "workbench trace requires RUN_ID, JOB_ID, or TRACE_ID.", {
434
+ remediation: "Run workbench list runs --json or workbench list traces --json.",
435
+ exitCode: 2,
436
+ });
437
+ }
438
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
355
439
  const run = snapshot.runs.find((entry) => entry.id === ref);
356
440
  const job = snapshot.jobs.find((entry) => entry.id === ref);
357
441
  const traces = run
@@ -360,7 +444,27 @@ export async function runCli(argv, io = {
360
444
  ? snapshot.traces.filter((trace) => job.traceIds.includes(trace.id))
361
445
  : snapshot.traces.filter((trace) => trace.id === ref);
362
446
  if (traces.length === 0) {
363
- throw new WorkbenchUserError(`Trace not found: ${ref}`);
447
+ const jobs = run
448
+ ? snapshot.jobs.filter((entry) => entry.runId === run.id)
449
+ : job ? [job] : [];
450
+ const details = jobs.flatMap((entry) => {
451
+ const detail = workbenchJobEvidenceForSnapshot(snapshot, {
452
+ runId: entry.runId,
453
+ jobId: entry.id,
454
+ });
455
+ return detail ? [detail] : [];
456
+ }).filter((detail) => detail.executions.some((execution) => execution.sessions.length > 0 ||
457
+ execution.trace.spans.length > 0 ||
458
+ execution.trace.events.length > 0 ||
459
+ execution.trace.summaries.length > 0));
460
+ if (details.length > 0) {
461
+ return output(details, parsed, io, () => details.map(formatTraceDetail).join("\n"));
462
+ }
463
+ throw new WorkbenchCodedError("ref_not_found", `Trace not found: ${ref}`, {
464
+ remediation: "Run workbench list runs --json, workbench list jobs --json, or workbench list traces --json.",
465
+ subject: { ref },
466
+ exitCode: 1,
467
+ });
364
468
  }
365
469
  return output(traces, parsed, io, () => traces.map(formatTrace).join("\n"));
366
470
  }
@@ -380,56 +484,74 @@ export async function runCli(argv, io = {
380
484
  const result = await syncWorkbenchRemote({
381
485
  ...core,
382
486
  remote: optionalPositional(parsed, 1),
487
+ dryRun: parsed.flags["dry-run"] === true,
383
488
  });
384
- return output(result, parsed, io, () => `Synced ${result.remote.name}: pushed ${result.pushed}, pulled ${result.pulled}.`);
489
+ return emitResult("workbench.cli.sync.v1", {
490
+ remote: result.remote,
491
+ pushed: result.pushed,
492
+ pulled: result.pulled,
493
+ upToDate: result.upToDate,
494
+ publication: result.publication,
495
+ ...(result.dryRun ? { dryRun: true } : {}),
496
+ }, parsed, io, () => `${result.dryRun ? "Would sync" : "Synced"} ${result.remote.name}: pushed ${result.pushed}, pulled ${result.pulled}${result.upToDate ? " (up to date)" : ""}.`);
385
497
  }
386
498
  if (command === "publish") {
387
499
  const result = await publishWorkbenchVersion({
388
500
  ...core,
389
501
  version: optionalPositional(parsed, 1),
502
+ remote: stringFlag(parsed, "remote"),
503
+ dryRun: parsed.flags["dry-run"] === true,
390
504
  visibility: parsePublishVisibility(stringFlag(parsed, "visibility")),
391
505
  });
392
- return output(result, parsed, io, () => `Published ${result.version.id} to ${result.installUrl}.`);
506
+ return emitResult("workbench.cli.publish.v1", {
507
+ remote: result.remote,
508
+ version: versionSummary(result.version),
509
+ visibility: result.visibility,
510
+ installUrl: result.installUrl,
511
+ pinnedInstallUrl: result.pinnedInstallUrl,
512
+ ...(result.dryRun ? { dryRun: true } : {}),
513
+ }, parsed, io, () => [
514
+ `${result.dryRun ? "Would publish" : "Published"} ${result.version.id} to remote ${result.remote.name}.`,
515
+ `Visibility: ${result.visibility}`,
516
+ `Install: ${result.installUrl}`,
517
+ `Pinned: ${result.pinnedInstallUrl}`,
518
+ ].join("\n"));
393
519
  }
394
520
  if (command === "auth") {
395
521
  return await handleAuth(parsed, io);
396
522
  }
397
523
  if (command === "open") {
398
- const snapshot = await createWorkbenchInspectionSnapshot(core);
399
- if (parsed.flags.json !== true) {
400
- const server = await startWorkbenchOpenServer({
401
- dir: dirFlag(parsed),
402
- authToken: core.authToken,
403
- host: stringFlag(parsed, "host"),
404
- port: intFlag(parsed, "port"),
405
- });
406
- io.stdout.write(`Workbench: ${server.url}\n`);
407
- if (parsed.flags["no-open"] !== true) {
408
- await openBrowser(server.url).catch(() => undefined);
409
- }
410
- await new Promise(() => { });
524
+ if (parsed.flags.json === true) {
525
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
526
+ return output(snapshot, parsed, io, () => "Read-only Workbench inspection data is available with --json.");
527
+ }
528
+ // The browser server serves committed object state through a read-only
529
+ // snapshot path, so long-running commands do not block page loads.
530
+ const server = await startWorkbenchOpenServer({
531
+ dir: dirFlag(parsed),
532
+ authToken: core.authToken,
533
+ host: stringFlag(parsed, "host"),
534
+ port: intFlag(parsed, "port"),
535
+ });
536
+ io.stdout.write(`Workbench: ${server.url}\n`);
537
+ if (parsed.flags["no-open"] !== true) {
538
+ await openBrowser(server.url).catch(() => undefined);
411
539
  }
412
- return output(snapshot, parsed, io, () => "Read-only Workbench inspection data is available with --json.");
540
+ return await new Promise(() => { });
413
541
  }
414
542
  throw new WorkbenchUserError(`Unknown command: ${command}\n\n${HELP}`);
415
543
  }
416
544
  catch (error) {
417
- const message = error instanceof Error ? error.message : String(error);
418
- if (parsed.flags.json === true) {
419
- io.stdout.write(`${JSON.stringify({ ok: false, error: message }, null, 2)}\n`);
420
- return error instanceof WorkbenchUserError ? 2 : 1;
421
- }
422
- io.stderr.write(`${message}\n`);
423
- return error instanceof WorkbenchUserError ? 2 : 1;
545
+ return emitError(error, parsed, io);
424
546
  }
425
547
  }
426
548
  async function handleList(parsed, io) {
427
- const kind = requiredPositional(parsed, 1, "workbench list requires runs|jobs|traces|artifacts|sessions|remotes.");
549
+ const kind = requiredPositional(parsed, 1, "workbench list requires runs|jobs|traces|artifacts|sessions.");
428
550
  if (kind === "sessions") {
429
551
  const sessions = await listLocalAgentSessions();
430
552
  return output(sessions, parsed, io, () => sessions.map(formatSession).join("\n") || "No local sessions.");
431
553
  }
432
- const snapshot = await createWorkbenchInspectionSnapshot(await coreOptions(parsed));
554
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(await coreOptions(parsed));
433
555
  if (kind === "runs") {
434
556
  return output(snapshot.runs, parsed, io, () => snapshot.runs.map(formatRun).join("\n") || "No runs.");
435
557
  }
@@ -437,13 +559,10 @@ async function handleList(parsed, io) {
437
559
  return output(snapshot.jobs, parsed, io, () => snapshot.jobs.map(formatJob).join("\n") || "No jobs.");
438
560
  }
439
561
  if (kind === "traces") {
440
- return output(snapshot.traces, parsed, io, () => snapshot.traces.map(formatTrace).join("\n") || "No traces.");
562
+ return output(snapshot.traces.map(traceSummary), parsed, io, () => snapshot.traces.map(formatTrace).join("\n") || "No traces.");
441
563
  }
442
564
  if (kind === "artifacts") {
443
- return output(snapshot.artifacts, parsed, io, () => snapshot.artifacts.map(formatArtifact).join("\n") || "No artifacts.");
444
- }
445
- if (kind === "remotes") {
446
- return output(snapshot.remotes, parsed, io, () => snapshot.remotes.map((remote) => `${remote.name}\t${remote.url}`).join("\n") || "No remotes.");
565
+ return output(snapshot.artifacts.map(artifactSummary), parsed, io, () => snapshot.artifacts.map(formatArtifact).join("\n") || "No artifacts.");
447
566
  }
448
567
  throw new WorkbenchUserError(`Unsupported list target: ${kind}`);
449
568
  }
@@ -472,13 +591,17 @@ async function handleAgent(parsed, io) {
472
591
  const name = requiredPositional(parsed, 2, "workbench agent show requires NAME.");
473
592
  const agent = (await listWorkbenchAgents(await coreOptions(parsed))).find((entry) => entry.name === name);
474
593
  if (!agent) {
475
- throw new WorkbenchUserError(`Agent not found: ${name}`);
594
+ throw new WorkbenchCodedError("ref_not_found", `Agent not found: ${name}`, {
595
+ remediation: "Run workbench agent list.",
596
+ subject: { agent: name },
597
+ exitCode: 1,
598
+ });
476
599
  }
477
600
  return output(agent, parsed, io, () => formatAgent(agent));
478
601
  }
479
602
  if (subcommand === "default") {
480
- const agent = await setDefaultWorkbenchAgent(requiredPositional(parsed, 2, "workbench agent default requires NAME."), await coreOptions(parsed));
481
- return output(agent, parsed, io, () => `Default agent: ${agent.name}`);
603
+ const result = await setDefaultWorkbenchAgent(requiredPositional(parsed, 2, "workbench agent default requires NAME."), await coreOptions(parsed));
604
+ return output(result, parsed, io, () => `Default agent: ${result.defaultAgent}`);
482
605
  }
483
606
  if (subcommand === "remove") {
484
607
  const result = await removeWorkbenchAgent(requiredPositional(parsed, 2, "workbench agent remove requires NAME."), await coreOptions(parsed));
@@ -491,9 +614,13 @@ async function handleSkills(parsed, io) {
491
614
  if (subcommand !== "list") {
492
615
  throw new WorkbenchUserError(`Unsupported skills command: ${subcommand}`);
493
616
  }
494
- const snapshot = await createWorkbenchInspectionSnapshot(await coreOptions(parsed));
617
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(await coreOptions(parsed));
495
618
  return output(snapshot.skillSources, parsed, io, () => snapshot.skillSources.map((source) => {
496
- const where = source.kind === "remote" ? `${source.from}${source.ref ? `#${source.ref}` : ""}` : source.path;
619
+ const where = source.kind === "remote"
620
+ ? `${source.from}${source.ref ? `#${source.ref}` : ""}`
621
+ : source.kind === "none"
622
+ ? "baseline:none"
623
+ : source.path;
497
624
  return `${source.name}\t${source.kind}\t${where}\tincludes=${source.includes?.length ?? 0}`;
498
625
  }).join("\n") || "No skills.");
499
626
  }
@@ -518,14 +645,46 @@ async function handleCase(parsed, io) {
518
645
  throw new WorkbenchUserError(`Unsupported case command: ${subcommand}`);
519
646
  }
520
647
  async function handleRemote(parsed, io) {
521
- const subcommand = requiredPositional(parsed, 1, "workbench remote requires add|list.");
648
+ const subcommand = requiredPositional(parsed, 1, "workbench remote requires add|list|remove.");
522
649
  if (subcommand === "add") {
523
- const remote = await addWorkbenchRemote(requiredPositional(parsed, 2, "workbench remote add requires NAME."), requiredPositional(parsed, 3, "workbench remote add requires URL."), await coreOptions(parsed));
524
- return output(remote, parsed, io, () => `Added remote ${remote.name}\t${remote.url}`);
650
+ const name = requiredFlag(parsed, {
651
+ flag: "name",
652
+ usage: "workbench remote add requires --name NAME.",
653
+ remediation: "Run workbench remote add --name origin --url https://HOST/skills/OWNER/SKILL.",
654
+ });
655
+ const url = requiredFlag(parsed, {
656
+ flag: "url",
657
+ usage: "workbench remote add requires --url URL.",
658
+ remediation: `Run workbench remote add --name ${name} --url https://HOST/skills/OWNER/SKILL.`,
659
+ });
660
+ rejectExtraInput(parsed, {
661
+ maxPositionals: 2,
662
+ message: "workbench remote add accepts --name NAME and --url URL, not positional NAME or URL.",
663
+ remediation: "Run workbench remote add --name origin --url https://HOST/skills/OWNER/SKILL.",
664
+ });
665
+ const result = await addWorkbenchRemote(name, url, {
666
+ ...(await coreOptions(parsed)),
667
+ replace: parsed.flags.replace === true,
668
+ dryRun: parsed.flags["dry-run"] === true,
669
+ });
670
+ return emitResult("workbench.cli.remote-add.v1", {
671
+ remote: result.remote,
672
+ operation: result.operation,
673
+ ...(result.dryRun ? { dryRun: true } : {}),
674
+ }, parsed, io, () => `${result.dryRun ? "Would update" : "Remote"} ${result.remote.name}: ${result.operation}\t${result.remote.kind}\t${result.remote.url}`);
525
675
  }
526
676
  if (subcommand === "list") {
527
677
  const remotes = await listWorkbenchRemotes(await coreOptions(parsed));
528
- return output(remotes, parsed, io, () => remotes.map((remote) => `${remote.name}\t${remote.url}`).join("\n") || "No remotes.");
678
+ return emitResult("workbench.cli.remote-list.v1", {
679
+ remotes: remotes,
680
+ }, parsed, io, () => remotes.map((remote) => `${remote.name}\t${remote.kind}\t${remote.url}`).join("\n") || "No remotes.");
681
+ }
682
+ if (subcommand === "remove") {
683
+ const result = await removeWorkbenchRemote(requiredPositional(parsed, 2, "workbench remote remove requires NAME."), await coreOptions(parsed));
684
+ return emitResult("workbench.cli.remote-remove.v1", {
685
+ remote: result.remote,
686
+ removed: result.removed,
687
+ }, parsed, io, () => result.removed ? `Removed remote ${result.remote}.` : `Remote ${result.remote} was not configured.`);
529
688
  }
530
689
  throw new WorkbenchUserError(`Unsupported remote command: ${subcommand}`);
531
690
  }
@@ -535,13 +694,25 @@ async function handleAuth(parsed, io) {
535
694
  const targetRaw = optionalPositional(parsed, 2);
536
695
  const profile = authProfileFlag(parsed);
537
696
  const store = localWorkbenchAdapterAuthStore(adapterAuthStoreRoot());
697
+ const cliAuth = await workbenchCliAuthStatus();
538
698
  if (targetRaw) {
539
699
  const status = await store.status(parseAuthTarget(targetRaw, profile));
540
- return output({ ok: true, command: "status", status }, parsed, io, () => formatAuthStatusRecord(status));
700
+ return emitResult("workbench.cli.auth-status.v1", {
701
+ workbenchCloud: cliAuth.workbenchCloud,
702
+ adapters: [authStatusRecordToJson(status)],
703
+ }, parsed, io, () => [
704
+ formatWorkbenchCloudAuthStatus(cliAuth.workbenchCloud),
705
+ "Adapter auth:",
706
+ formatAuthStatusRecord(status),
707
+ ].join("\n"));
541
708
  }
542
709
  const statuses = await store.listStatus();
543
710
  const required = await requiredAgentAuthStatuses(parsed, statuses);
544
- return output({ ok: true, command: "status", adapterStatuses: statuses, required }, parsed, io, () => formatAuthStatusList(statuses, required));
711
+ return emitResult("workbench.cli.auth-status.v1", {
712
+ workbenchCloud: cliAuth.workbenchCloud,
713
+ adapters: cliAuth.adapters,
714
+ required: required,
715
+ }, parsed, io, () => formatAuthStatusList(cliAuth.workbenchCloud, statuses, required));
545
716
  }
546
717
  if (subcommand === "connect") {
547
718
  const targetRaw = requiredPositional(parsed, 2, "workbench auth connect requires ADAPTER[/SLOT].");
@@ -554,33 +725,33 @@ async function handleAuth(parsed, io) {
554
725
  });
555
726
  const saved = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).put(bundle);
556
727
  const remote = await uploadAdapterConnection(saved, parsed);
557
- return output({
558
- ok: true,
559
- command: "connect",
560
- adapter: saved.adapterId,
561
- ...(saved.slot ? { slot: saved.slot } : {}),
562
- profile: saved.profile,
563
- method: saved.method,
564
- status: saved.status,
565
- version: saved.version,
566
- updatedAt: saved.updatedAt,
567
- remote,
568
- }, parsed, io, () => `Connected ${formatAuthTarget(saved)} ${saved.method} auth v${saved.version}; remote: ${remote.status}${remote.reason ? ` (${remote.reason})` : ""}.`);
728
+ return emitResult("workbench.cli.auth-connect.v1", {
729
+ localAdapter: {
730
+ adapter: saved.adapterId,
731
+ ...(saved.slot ? { slot: saved.slot } : {}),
732
+ profile: saved.profile,
733
+ method: saved.method,
734
+ status: saved.status,
735
+ version: saved.version,
736
+ updatedAt: saved.updatedAt,
737
+ },
738
+ workbenchCloud: remote,
739
+ }, parsed, io, () => `Connected ${formatAuthTarget(saved)} ${saved.method} auth v${saved.version}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
569
740
  }
570
741
  if (subcommand === "disconnect") {
571
742
  const targetRaw = requiredPositional(parsed, 2, "workbench auth disconnect requires ADAPTER[/SLOT].");
572
743
  const target = parseAuthTarget(targetRaw, authProfileFlag(parsed));
573
744
  await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).disconnect(target);
574
745
  const remote = await deleteAdapterConnectionRemote(target, parsed);
575
- return output({
576
- ok: true,
577
- command: "disconnect",
578
- adapter: target.adapterId,
579
- ...(target.slot ? { slot: target.slot } : {}),
580
- profile: target.profile,
581
- status: "disconnected",
582
- remote,
583
- }, parsed, io, () => `Disconnected ${formatAuthTarget(target)}; remote: ${remote.status}${remote.reason ? ` (${remote.reason})` : ""}.`);
746
+ return emitResult("workbench.cli.auth-disconnect.v1", {
747
+ localAdapter: {
748
+ adapter: target.adapterId,
749
+ ...(target.slot ? { slot: target.slot } : {}),
750
+ profile: target.profile,
751
+ status: "disconnected",
752
+ },
753
+ workbenchCloud: remote,
754
+ }, parsed, io, () => `Disconnected ${formatAuthTarget(target)}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
584
755
  }
585
756
  throw new WorkbenchUserError(`Unsupported auth command: ${subcommand}`);
586
757
  }
@@ -604,7 +775,7 @@ function validateCommandFlags(parsed, command) {
604
775
  if (!allowedSet.has(name) && name !== "help" && name !== "version") {
605
776
  throw new WorkbenchUserError(`Unsupported flag --${name} for workbench ${command}.`);
606
777
  }
607
- validateFlagValue(name, value);
778
+ validateFlagValue(name, value, command === "install" && (name === "agent" || name === "skill"));
608
779
  }
609
780
  }
610
781
  function allowedFlagsForCommand(parsed, command) {
@@ -615,11 +786,23 @@ function allowedFlagsForCommand(parsed, command) {
615
786
  const subcommand = parsed.positionals[1] ?? subcommands.defaultSubcommand;
616
787
  return subcommand ? subcommands.flags[subcommand] ?? ["json"] : ["json"];
617
788
  }
618
- function validateFlagValue(name, value) {
789
+ function validateFlagValue(name, value, repeatString = false) {
619
790
  const kind = FLAG_DEFINITIONS[name];
620
791
  if (!kind) {
621
792
  return;
622
793
  }
794
+ if (repeatString) {
795
+ if (Array.isArray(value)) {
796
+ if (value.some((entry) => !entry.trim())) {
797
+ throw new WorkbenchUserError(`--${name} requires a non-empty value.`);
798
+ }
799
+ return;
800
+ }
801
+ if (typeof value === "string" && value.trim()) {
802
+ return;
803
+ }
804
+ throw new WorkbenchUserError(`--${name} requires a non-empty value.`);
805
+ }
623
806
  if (kind === "boolean") {
624
807
  if (value !== true) {
625
808
  throw new WorkbenchUserError(`--${name} does not accept a value.`);
@@ -649,30 +832,83 @@ async function handleLogin(parsed, io) {
649
832
  if (parsed.positionals.length > 1) {
650
833
  throw new WorkbenchUserError("workbench login accepts no positional arguments.");
651
834
  }
835
+ if (parsed.flags["start-only"] === true && parsed.flags.wait === true) {
836
+ throw new WorkbenchCodedError("usage", "workbench login accepts only one of --start-only or --wait.", {
837
+ remediation: "Run workbench login --start-only or workbench login --wait --timeout 120.",
838
+ exitCode: 2,
839
+ });
840
+ }
841
+ const startOnly = parsed.flags["start-only"] === true;
842
+ const waitOnly = parsed.flags.wait === true;
843
+ const timeoutSeconds = intFlag(parsed, "timeout");
844
+ if (startOnly && timeoutSeconds !== undefined) {
845
+ throw new WorkbenchCodedError("usage", "workbench login --timeout only applies with --wait.", {
846
+ remediation: "Run workbench login --start-only, then workbench login --wait --timeout 120.",
847
+ exitCode: 2,
848
+ });
849
+ }
850
+ if (waitOnly && timeoutSeconds === undefined) {
851
+ throw new WorkbenchCodedError("usage", "workbench login --wait requires --timeout N.", {
852
+ remediation: "Run workbench login --wait --timeout 120.",
853
+ exitCode: 2,
854
+ });
855
+ }
652
856
  const config = await loadConfig();
653
857
  const baseUrl = selectWorkbenchBaseUrl({
654
858
  explicitBaseUrl: stringFlag(parsed, "base-url"),
655
859
  configBaseUrl: config.baseUrl,
656
860
  });
657
- const authorization = await requestDeviceAuthorization(baseUrl);
658
- if (parsed.flags.json === true) {
659
- io.stdout.write(`${JSON.stringify({ ok: true, status: "authorization_pending", ...authorization }, null, 2)}\n`);
660
- }
661
- else {
662
- io.stdout.write(`Open ${authorization.verification_uri_complete}\nCode: ${authorization.user_code}\n`);
663
- }
664
- if (parsed.flags["no-open"] !== true) {
665
- await openBrowser(authorization.verification_uri_complete).catch(() => undefined);
666
- }
667
- const token = await pollDeviceToken(baseUrl, authorization);
668
- await writeConfig({ schema: CONFIG_SCHEMA, baseUrl, accessToken: token.access_token });
669
- if (parsed.flags.json === true) {
670
- io.stdout.write(`${JSON.stringify({ ok: true, baseUrl, expiresIn: token.expires_in ?? null }, null, 2)}\n`);
861
+ const pending = waitOnly ? await readPendingDeviceAuthorization(baseUrl) : null;
862
+ const record = pending ?? await startDeviceAuthorization(baseUrl);
863
+ const freshAuthorization = pending === null;
864
+ if (startOnly) {
865
+ await writePendingDeviceAuthorization(record);
866
+ if (parsed.flags["no-open"] !== true) {
867
+ await openBrowser(record.verification_uri_complete).catch(() => undefined);
868
+ }
869
+ return emitResult("workbench.cli.login.v1", {
870
+ status: "authorization_pending",
871
+ baseUrl,
872
+ verificationUri: record.verification_uri,
873
+ verificationUriComplete: record.verification_uri_complete,
874
+ userCode: record.user_code,
875
+ expiresAt: record.expiresAt,
876
+ resume: "workbench login --wait --timeout 120",
877
+ }, parsed, io, () => `Open ${record.verification_uri_complete}\nCode: ${record.user_code}\nResume: workbench login --wait --timeout 120`);
878
+ }
879
+ await writePendingDeviceAuthorization(record);
880
+ if (freshAuthorization && !parsed.flags.json) {
881
+ io.stdout.write(`Open ${record.verification_uri_complete}\nCode: ${record.user_code}\n`);
882
+ }
883
+ if (!waitOnly && parsed.flags["no-open"] !== true) {
884
+ await openBrowser(record.verification_uri_complete).catch(() => undefined);
885
+ }
886
+ let token;
887
+ try {
888
+ token = await pollDeviceToken(baseUrl, record, timeoutSeconds);
671
889
  }
672
- else {
673
- io.stdout.write(`Workbench API: ${baseUrl}\n`);
890
+ catch (error) {
891
+ const denied = error instanceof WorkbenchCodedError && error.code === "login_denied";
892
+ const expired = Date.parse(record.expiresAt) <= Date.now();
893
+ if (denied || expired) {
894
+ await clearPendingDeviceAuthorization();
895
+ }
896
+ throw error;
674
897
  }
675
- return 0;
898
+ const username = await fetchWorkbenchUsername(baseUrl, token.access_token).catch(() => undefined);
899
+ await writeConfig({
900
+ schema: CONFIG_SCHEMA,
901
+ baseUrl,
902
+ accessToken: token.access_token,
903
+ ...(username ? { username } : {}),
904
+ });
905
+ await clearPendingDeviceAuthorization();
906
+ return emitResult("workbench.cli.login.v1", {
907
+ status: "authenticated",
908
+ baseUrl,
909
+ ...(username ? { username } : {}),
910
+ ...(token.expires_in !== undefined ? { expiresIn: token.expires_in } : {}),
911
+ }, parsed, io, () => `Workbench Cloud: authenticated${username ? ` as ${username}` : ""}\nWorkbench API: ${baseUrl}`);
676
912
  }
677
913
  async function handleLogout(parsed, io) {
678
914
  if (parsed.positionals.length > 1) {
@@ -680,18 +916,257 @@ async function handleLogout(parsed, io) {
680
916
  }
681
917
  const config = await loadConfig();
682
918
  const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
683
- if (config.accessToken && !baseUrl) {
919
+ const tokenPresent = Boolean(config.accessToken);
920
+ if (tokenPresent && !baseUrl) {
684
921
  throw new WorkbenchUserError("Missing Workbench API URL. Set WORKBENCH_API_URL or run `workbench login --base-url URL`.");
685
922
  }
923
+ let revoke = "skipped";
686
924
  if (config.accessToken && baseUrl) {
687
- await fetch(`${baseUrl}/api/oauth/revoke`, {
688
- method: "POST",
689
- headers: { "content-type": "application/json" },
690
- body: JSON.stringify({ token: config.accessToken }),
691
- }).catch(() => undefined);
925
+ try {
926
+ const response = await fetch(`${baseUrl}/api/oauth/revoke`, {
927
+ method: "POST",
928
+ headers: { "content-type": "application/json" },
929
+ body: JSON.stringify({ token: config.accessToken }),
930
+ });
931
+ revoke = response.ok ? "revoked" : "failed";
932
+ }
933
+ catch {
934
+ revoke = "failed";
935
+ }
936
+ }
937
+ const configRemoved = tokenPresent;
938
+ if (tokenPresent) {
939
+ await writeConfig({ schema: CONFIG_SCHEMA, ...(baseUrl ? { baseUrl } : {}) });
940
+ }
941
+ const adapterStatuses = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).listStatus().catch(() => []);
942
+ const adapterAuthRetained = adapterStatuses.length > 0;
943
+ return emitResult("workbench.cli.logout.v1", {
944
+ ...(baseUrl ? { baseUrl } : {}),
945
+ tokenPresent,
946
+ revoke,
947
+ configRemoved,
948
+ adapterAuthRetained,
949
+ }, parsed, io, () => [
950
+ `Logged out of Workbench${baseUrl ? ` (${baseUrl})` : ""}.`,
951
+ `Token: ${tokenPresent ? "present" : "absent"}; revoke ${revoke}; config ${configRemoved ? "removed" : "unchanged"}.`,
952
+ adapterAuthRetained
953
+ ? "Local adapter auth records were retained; run workbench auth disconnect ADAPTER to remove them."
954
+ : "No local adapter auth records remain.",
955
+ ].join("\n"));
956
+ }
957
+ async function handleInstall(parsed, io) {
958
+ const source = requiredFlag(parsed, {
959
+ flag: "source",
960
+ usage: "workbench install requires --source SOURCE.",
961
+ remediation: "Run workbench install --source https://HOST/skills/OWNER/SKILL --agent codex.",
962
+ });
963
+ rejectExtraInput(parsed, {
964
+ maxPositionals: 1,
965
+ message: "workbench install accepts --source SOURCE, not positional SOURCE.",
966
+ remediation: "Run workbench install --source https://HOST/skills/OWNER/SKILL --agent codex.",
967
+ });
968
+ if (parsed.flags.list !== true && stringsFlag(parsed, "agent").length === 0 && parsed.flags.local !== true) {
969
+ throw new WorkbenchCodedError("install_target_required", "workbench install requires an explicit target.", {
970
+ remediation: "Run workbench install --source SOURCE --agent codex, workbench install --source SOURCE --agent claude, or workbench install --source SOURCE --local.",
971
+ exitCode: 2,
972
+ });
973
+ }
974
+ const workbenchSource = parseWorkbenchInstallSource(source);
975
+ if (!workbenchSource) {
976
+ throw new WorkbenchCodedError("usage", "workbench install requires a Workbench Cloud source URL.", {
977
+ remediation: "Run workbench install --source https://HOST/skills/OWNER/SKILL --agent codex.",
978
+ exitCode: 2,
979
+ });
980
+ }
981
+ const snapshot = await fetchWorkbenchInstallSourceSnapshot(workbenchSource, source);
982
+ const sourceSummary = workbenchInstallSourceSummary(workbenchSource, snapshot);
983
+ if (parsed.flags.list === true) {
984
+ return emitResult("workbench.cli.install.v1", {
985
+ source: sourceSummary,
986
+ skills: [snapshot.name],
987
+ fileCount: snapshot.files.length,
988
+ targets: installTargetsToJson(supportedInstallTargets()),
989
+ }, parsed, io, () => [
990
+ `${snapshot.name}\t${snapshot.versionId}\tfiles=${snapshot.files.length}`,
991
+ "Targets:",
992
+ ...supportedInstallTargets().map((target) => ` ${target.agent}\t${target.destination}`),
993
+ ].join("\n"));
994
+ }
995
+ const targets = resolveInstallTargets({
996
+ agents: stringsFlag(parsed, "agent"),
997
+ local: parsed.flags.local === true,
998
+ skillName: snapshot.name,
999
+ });
1000
+ const result = await installSnapshotToTargets({
1001
+ snapshot,
1002
+ targets,
1003
+ overwrite: parsed.flags.yes === true,
1004
+ dryRun: parsed.flags["dry-run"] === true,
1005
+ });
1006
+ return emitResult("workbench.cli.install.v1", {
1007
+ source: sourceSummary,
1008
+ result: result.result,
1009
+ targets: result.targets,
1010
+ filesCopied: result.filesCopied,
1011
+ ...(parsed.flags["dry-run"] === true ? { dryRun: true } : {}),
1012
+ }, parsed, io, () => [
1013
+ parsed.flags["dry-run"] === true
1014
+ ? `Would install ${snapshot.name}: filesCopied=${result.filesCopied}`
1015
+ : `Installed ${snapshot.name}: ${result.result}`,
1016
+ ...result.targets.map((target) => ` ${target.agent}\t${target.previous}\t${target.destination}`),
1017
+ ].join("\n"));
1018
+ }
1019
+ function workbenchInstallSourceSummary(source, snapshot) {
1020
+ const installUrl = `${source.baseUrl}/skills/${encodeURIComponent(source.owner)}/${encodeURIComponent(source.skill)}`;
1021
+ return {
1022
+ kind: "workbench-cloud",
1023
+ owner: snapshot.owner,
1024
+ skill: snapshot.name,
1025
+ versionId: snapshot.versionId,
1026
+ installUrl,
1027
+ pinnedInstallUrl: `${installUrl}/releases/${encodeURIComponent(snapshot.versionId)}`,
1028
+ };
1029
+ }
1030
+ function parseWorkbenchInstallSource(source) {
1031
+ let url;
1032
+ try {
1033
+ url = new URL(source);
1034
+ }
1035
+ catch {
1036
+ return undefined;
1037
+ }
1038
+ if (url.protocol !== "http:" && url.protocol !== "https:") {
1039
+ return undefined;
692
1040
  }
693
- await writeConfig({ schema: CONFIG_SCHEMA, ...(baseUrl ? { baseUrl } : {}) });
694
- return output({ ok: true, ...(baseUrl ? { baseUrl } : {}) }, parsed, io, () => "Logged out of Workbench.");
1041
+ const segments = url.pathname
1042
+ .split("/")
1043
+ .filter(Boolean)
1044
+ .map((segment) => decodeURIComponent(segment));
1045
+ if (segments[0] !== "skills") {
1046
+ return undefined;
1047
+ }
1048
+ if (!segments[1] || !segments[2]) {
1049
+ throw new WorkbenchUserError(`Invalid Workbench skill URL: ${source}`);
1050
+ }
1051
+ if (segments.length === 3) {
1052
+ return {
1053
+ baseUrl: url.origin,
1054
+ owner: segments[1],
1055
+ skill: segments[2],
1056
+ };
1057
+ }
1058
+ if (segments.length === 5 && segments[3] === "releases" && segments[4]) {
1059
+ return {
1060
+ baseUrl: url.origin,
1061
+ owner: segments[1],
1062
+ skill: segments[2],
1063
+ version: segments[4],
1064
+ };
1065
+ }
1066
+ throw new WorkbenchUserError(`Invalid Workbench skill URL: ${source}`);
1067
+ }
1068
+ async function fetchWorkbenchInstallSourceSnapshot(source, displaySource) {
1069
+ const token = await workbenchCloudToken({ baseUrl: source.baseUrl });
1070
+ const apiPath = source.version
1071
+ ? `/api/workbench/source/skills/${encodeURIComponent(source.owner)}/${encodeURIComponent(source.skill)}/releases/${encodeURIComponent(source.version)}/source`
1072
+ : `/api/workbench/source/skills/${encodeURIComponent(source.owner)}/${encodeURIComponent(source.skill)}/source`;
1073
+ const response = await fetch(`${source.baseUrl}${apiPath}`, {
1074
+ headers: {
1075
+ ...(token ? { authorization: `Bearer ${token}` } : {}),
1076
+ },
1077
+ });
1078
+ const text = await response.text();
1079
+ const cloudError = parseWorkbenchCloudErrorBody(text);
1080
+ if (cloudError) {
1081
+ throw new WorkbenchCodedError(cloudError.code, cloudError.message, {
1082
+ retryable: cloudError.retryable,
1083
+ ...(cloudError.remediation ? { remediation: cloudError.remediation } : {}),
1084
+ ...(cloudError.subject ? { subject: cloudError.subject } : {}),
1085
+ exitCode: response.status === 400 ? 2 : 1,
1086
+ });
1087
+ }
1088
+ if (response.status === 401) {
1089
+ throw new WorkbenchCodedError("auth_required", token
1090
+ ? `Workbench Cloud rejected the provided token while installing ${displaySource}.`
1091
+ : `Authentication is required to install ${displaySource}.`, {
1092
+ remediation: `Run workbench login --base-url ${source.baseUrl}.`,
1093
+ exitCode: 1,
1094
+ });
1095
+ }
1096
+ if (!response.ok) {
1097
+ throw new WorkbenchCodedError("install_failed", `Unable to download Workbench source ${displaySource}: ${response.status} ${readResponseError(text) ?? response.statusText}`, {
1098
+ subject: { source: displaySource, status: response.status },
1099
+ exitCode: 1,
1100
+ });
1101
+ }
1102
+ let parsed;
1103
+ try {
1104
+ parsed = text ? JSON.parse(text) : null;
1105
+ }
1106
+ catch {
1107
+ throw new WorkbenchCodedError("install_failed", `Workbench source ${displaySource} did not return JSON.`, {
1108
+ subject: { source: displaySource },
1109
+ exitCode: 1,
1110
+ });
1111
+ }
1112
+ const snapshot = parseWorkbenchInstallSourceSnapshot(parsed, displaySource);
1113
+ if (source.version && snapshot.versionId !== source.version) {
1114
+ throw new WorkbenchCodedError("install_failed", `Workbench source ${displaySource} resolved ${snapshot.versionId} instead of requested release ${source.version}.`, {
1115
+ subject: { source: displaySource, resolvedVersionId: snapshot.versionId, requestedVersionId: source.version },
1116
+ exitCode: 1,
1117
+ });
1118
+ }
1119
+ return snapshot;
1120
+ }
1121
+ function parseWorkbenchInstallSourceSnapshot(value, displaySource) {
1122
+ const record = asRecord(value);
1123
+ if (record?.schema !== "workbench.source.snapshot.v1") {
1124
+ throw new WorkbenchCodedError("install_failed", `Workbench source ${displaySource} did not return a source snapshot.`, {
1125
+ subject: { source: displaySource },
1126
+ exitCode: 1,
1127
+ });
1128
+ }
1129
+ const owner = typeof record.owner === "string" ? record.owner : "";
1130
+ const name = typeof record.name === "string" ? record.name : "";
1131
+ const versionId = typeof record.versionId === "string" ? record.versionId : "";
1132
+ const files = Array.isArray(record.files) ? record.files.map((entry) => parseWorkbenchInstallSourceFile(entry, displaySource)) : [];
1133
+ if (!owner || !name || !versionId || files.length === 0) {
1134
+ throw new WorkbenchCodedError("install_failed", `Workbench source ${displaySource} returned an incomplete source snapshot.`, {
1135
+ subject: { source: displaySource },
1136
+ exitCode: 1,
1137
+ });
1138
+ }
1139
+ return {
1140
+ schema: "workbench.source.snapshot.v1",
1141
+ owner,
1142
+ name,
1143
+ versionId,
1144
+ files,
1145
+ };
1146
+ }
1147
+ function parseWorkbenchInstallSourceFile(value, displaySource) {
1148
+ const record = asRecord(value);
1149
+ if (!record) {
1150
+ throw new WorkbenchCodedError("install_failed", `Workbench source ${displaySource} returned an invalid file entry.`, {
1151
+ subject: { source: displaySource },
1152
+ exitCode: 1,
1153
+ });
1154
+ }
1155
+ const filePath = typeof record?.path === "string" ? record.path : "";
1156
+ const content = typeof record?.content === "string" ? record.content : undefined;
1157
+ if (!filePath || content === undefined) {
1158
+ throw new WorkbenchCodedError("install_failed", `Workbench source ${displaySource} returned an invalid file entry.`, {
1159
+ subject: { source: displaySource },
1160
+ exitCode: 1,
1161
+ });
1162
+ }
1163
+ return {
1164
+ path: normalizeInstallSnapshotPath(filePath),
1165
+ ...(record.kind === "text" || record.kind === "binary" ? { kind: record.kind } : {}),
1166
+ encoding: record.encoding === "base64" ? "base64" : "utf8",
1167
+ executable: record.executable === true,
1168
+ content,
1169
+ };
695
1170
  }
696
1171
  async function loadConfig() {
697
1172
  const parsed = await readConfigJson(configPath()) ?? {};
@@ -699,11 +1174,23 @@ async function loadConfig() {
699
1174
  schema: CONFIG_SCHEMA,
700
1175
  ...(typeof parsed.baseUrl === "string" ? { baseUrl: normalizeBaseUrl(parsed.baseUrl) } : {}),
701
1176
  ...(typeof parsed.accessToken === "string" ? { accessToken: parsed.accessToken } : {}),
1177
+ ...(typeof parsed.username === "string" ? { username: parsed.username } : {}),
702
1178
  };
703
1179
  }
704
- async function workbenchRemoteAuthToken() {
1180
+ // Single resolver for the Workbench Cloud token used by every authenticated
1181
+ // path: config accessToken first, then WORKBENCH_API_TOKEN, then
1182
+ // WORKBENCH_SMOKE_BEARER_TOKEN. When a target base URL is known, the config
1183
+ // token is only used if the config base URL matches it.
1184
+ async function workbenchCloudToken(options = {}) {
705
1185
  const config = await loadConfig();
706
- return config.accessToken ?? process.env.WORKBENCH_API_TOKEN?.trim() ?? undefined;
1186
+ const configToken = config.accessToken &&
1187
+ (!options.baseUrl || !config.baseUrl || normalizeBaseUrl(config.baseUrl) === normalizeBaseUrl(options.baseUrl))
1188
+ ? config.accessToken
1189
+ : undefined;
1190
+ return configToken ?? workbenchCloudEnvToken();
1191
+ }
1192
+ function workbenchCloudEnvToken() {
1193
+ return process.env.WORKBENCH_API_TOKEN?.trim() || process.env.WORKBENCH_SMOKE_BEARER_TOKEN?.trim() || undefined;
707
1194
  }
708
1195
  async function readConfigJson(filePath) {
709
1196
  try {
@@ -723,6 +1210,9 @@ async function writeConfig(config) {
723
1210
  function configPath() {
724
1211
  return process.env.WORKBENCH_CONFIG?.trim() || path.join(os.homedir(), ".workbench", "config.json");
725
1212
  }
1213
+ function deviceAuthPath() {
1214
+ return process.env.WORKBENCH_DEVICE_AUTH?.trim() || path.join(path.dirname(configPath()), "device-auth.json");
1215
+ }
726
1216
  function selectWorkbenchBaseUrl(input = {}) {
727
1217
  const baseUrl = optionalWorkbenchBaseUrl(input);
728
1218
  if (!baseUrl) {
@@ -742,13 +1232,41 @@ function normalizeBaseUrl(value) {
742
1232
  }
743
1233
  async function requestDeviceAuthorization(baseUrl) {
744
1234
  const response = await fetch(`${baseUrl}/api/oauth/device/code`, { method: "POST" });
1235
+ const text = await response.text();
1236
+ const cloudError = parseWorkbenchCloudErrorBody(text);
1237
+ if (cloudError) {
1238
+ throw new WorkbenchCodedError(cloudError.code, cloudError.message, {
1239
+ retryable: cloudError.retryable,
1240
+ ...(cloudError.remediation ? { remediation: cloudError.remediation } : {}),
1241
+ ...(cloudError.subject ? { subject: cloudError.subject } : {}),
1242
+ exitCode: 1,
1243
+ });
1244
+ }
745
1245
  if (!response.ok) {
746
- throw new WorkbenchUserError(`Device login failed: ${readResponseError(await response.text()) ?? response.statusText}`);
1246
+ throw new WorkbenchCodedError("login_denied", `Device login failed: ${readResponseError(text) ?? response.statusText}`, {
1247
+ exitCode: 1,
1248
+ });
747
1249
  }
748
- return await response.json();
1250
+ return JSON.parse(text);
749
1251
  }
750
- async function pollDeviceToken(baseUrl, authorization) {
751
- const deadline = Date.now() + Math.max(1, authorization.expires_in) * 1000;
1252
+ async function startDeviceAuthorization(baseUrl) {
1253
+ const authorization = await requestDeviceAuthorization(baseUrl);
1254
+ return {
1255
+ schema: "workbench.cli.device-auth.v1",
1256
+ baseUrl,
1257
+ device_code: authorization.device_code,
1258
+ user_code: authorization.user_code,
1259
+ verification_uri: authorization.verification_uri,
1260
+ verification_uri_complete: authorization.verification_uri_complete,
1261
+ expiresAt: new Date(Date.now() + Math.max(1, authorization.expires_in) * 1000).toISOString(),
1262
+ ...(authorization.interval !== undefined ? { interval: authorization.interval } : {}),
1263
+ };
1264
+ }
1265
+ async function pollDeviceToken(baseUrl, authorization, timeoutSeconds) {
1266
+ const expiresAtMs = Date.parse(authorization.expiresAt);
1267
+ const expiryDeadline = Number.isFinite(expiresAtMs) ? expiresAtMs : Date.now() + 15 * 60 * 1000;
1268
+ const timeoutDeadline = timeoutSeconds ? Date.now() + timeoutSeconds * 1000 : Number.POSITIVE_INFINITY;
1269
+ const deadline = Math.min(expiryDeadline, timeoutDeadline);
752
1270
  let intervalMs = Math.max(1, authorization.interval ?? 5) * 1000;
753
1271
  while (Date.now() < deadline) {
754
1272
  const response = await fetch(`${baseUrl}/api/oauth/token`, {
@@ -768,17 +1286,87 @@ async function pollDeviceToken(baseUrl, authorization) {
768
1286
  intervalMs += 5000;
769
1287
  }
770
1288
  else if (error !== "authorization_pending") {
771
- throw new WorkbenchUserError(`Device login failed: ${error}`);
1289
+ throw new WorkbenchCodedError("login_denied", `Device login failed: ${error}`, {
1290
+ exitCode: 1,
1291
+ });
772
1292
  }
773
1293
  await sleep(intervalMs);
774
1294
  }
775
- throw new WorkbenchUserError("Device login timed out before authorization completed.");
1295
+ throw new WorkbenchCodedError("login_pending", "Device login is still waiting for browser authorization.", {
1296
+ retryable: true,
1297
+ remediation: "Authorize the device in the browser, then run workbench login --wait --timeout 120.",
1298
+ subject: {
1299
+ retryAfterSeconds: Math.max(1, Math.ceil(intervalMs / 1000)),
1300
+ verificationUri: authorization.verification_uri,
1301
+ verificationUriComplete: authorization.verification_uri_complete,
1302
+ userCode: authorization.user_code,
1303
+ expiresAt: authorization.expiresAt,
1304
+ },
1305
+ exitCode: 1,
1306
+ });
1307
+ }
1308
+ async function fetchWorkbenchUsername(baseUrl, accessToken) {
1309
+ const response = await fetch(`${baseUrl}/api/workbench/profile`, {
1310
+ headers: { authorization: `Bearer ${accessToken}` },
1311
+ });
1312
+ if (!response.ok) {
1313
+ return undefined;
1314
+ }
1315
+ const record = asRecord(await response.json());
1316
+ const profile = asRecord(record?.profile);
1317
+ return typeof profile?.username === "string" ? profile.username : undefined;
1318
+ }
1319
+ async function readPendingDeviceAuthorization(baseUrl) {
1320
+ const record = await readDeviceAuthorizationJson(deviceAuthPath());
1321
+ if (!record || record.baseUrl !== baseUrl || Date.parse(record.expiresAt) <= Date.now()) {
1322
+ return null;
1323
+ }
1324
+ return record;
1325
+ }
1326
+ async function writePendingDeviceAuthorization(record) {
1327
+ await fs.mkdir(path.dirname(deviceAuthPath()), { recursive: true });
1328
+ await fs.writeFile(deviceAuthPath(), `${JSON.stringify(record, null, 2)}\n`);
1329
+ }
1330
+ async function clearPendingDeviceAuthorization() {
1331
+ await fs.rm(deviceAuthPath(), { force: true });
1332
+ }
1333
+ async function readDeviceAuthorizationJson(filePath) {
1334
+ try {
1335
+ const record = asRecord(JSON.parse(await fs.readFile(filePath, "utf8")));
1336
+ if (record?.schema !== "workbench.cli.device-auth.v1" ||
1337
+ typeof record.baseUrl !== "string" ||
1338
+ typeof record.device_code !== "string" ||
1339
+ typeof record.user_code !== "string" ||
1340
+ typeof record.verification_uri !== "string" ||
1341
+ typeof record.verification_uri_complete !== "string" ||
1342
+ typeof record.expiresAt !== "string" ||
1343
+ !Number.isFinite(Date.parse(record.expiresAt))) {
1344
+ return null;
1345
+ }
1346
+ return {
1347
+ schema: "workbench.cli.device-auth.v1",
1348
+ baseUrl: record.baseUrl,
1349
+ device_code: record.device_code,
1350
+ user_code: record.user_code,
1351
+ verification_uri: record.verification_uri,
1352
+ verification_uri_complete: record.verification_uri_complete,
1353
+ expiresAt: record.expiresAt,
1354
+ ...(typeof record.interval === "number" ? { interval: record.interval } : {}),
1355
+ };
1356
+ }
1357
+ catch (error) {
1358
+ if (error?.code === "ENOENT") {
1359
+ return null;
1360
+ }
1361
+ throw error;
1362
+ }
776
1363
  }
777
1364
  async function apiRequest(apiPath, options = {}, baseUrlOverride) {
778
1365
  const config = await loadConfig();
779
1366
  const baseUrl = baseUrlOverride !== undefined
780
1367
  ? normalizeBaseUrl(baseUrlOverride)
781
1368
  : selectWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
1369
+ const token = await workbenchCloudToken({ baseUrl });
782
1370
  const method = options.method ?? "GET";
783
1371
  const canRetry = method === "GET";
784
1372
  const requestBody = encodeJsonRequestBody(options.body);
@@ -790,7 +1378,7 @@ async function apiRequest(apiPath, options = {}, baseUrlOverride) {
790
1378
  method,
791
1379
  headers: {
792
1380
  ...requestBody.headers,
793
- ...(config.accessToken ? { authorization: `Bearer ${config.accessToken}` } : {}),
1381
+ ...(token ? { authorization: `Bearer ${token}` } : {}),
794
1382
  },
795
1383
  body: requestBody.body,
796
1384
  });
@@ -805,6 +1393,21 @@ async function apiRequest(apiPath, options = {}, baseUrlOverride) {
805
1393
  }
806
1394
  if (!response.ok) {
807
1395
  const text = await response.text();
1396
+ const cloudError = parseWorkbenchCloudErrorBody(text);
1397
+ if (cloudError) {
1398
+ const requestError = new WorkbenchCodedError(cloudError.code, cloudError.message, {
1399
+ retryable: cloudError.retryable,
1400
+ ...(cloudError.remediation ? { remediation: cloudError.remediation } : {}),
1401
+ ...(cloudError.subject ? { subject: cloudError.subject } : {}),
1402
+ exitCode: response.status === 400 ? 2 : 1,
1403
+ });
1404
+ lastError = requestError;
1405
+ if (canRetry && attempt < API_REQUEST_MAX_ATTEMPTS && cloudError.retryable) {
1406
+ await sleep(250 * attempt);
1407
+ continue;
1408
+ }
1409
+ throw requestError;
1410
+ }
808
1411
  const requestError = new WorkbenchApiRequestError(response.status, readResponseError(text) ?? `Request failed with status ${response.status}${response.statusText ? ` ${response.statusText}` : ""}.`, text);
809
1412
  lastError = requestError;
810
1413
  if (canRetry && attempt < API_REQUEST_MAX_ATTEMPTS && isTransientApiRequestError(requestError)) {
@@ -834,26 +1437,44 @@ function encodeJsonRequestBody(body) {
834
1437
  };
835
1438
  }
836
1439
  async function uploadAdapterConnection(bundle, parsed) {
1440
+ const token = await workbenchCloudToken();
837
1441
  if (parsed.flags["local-only"] === true) {
838
- return { status: "skipped", reason: "local_only" };
1442
+ return {
1443
+ status: token ? "authenticated" : "not_authenticated",
1444
+ sync: "skipped",
1445
+ reason: "local_only",
1446
+ };
839
1447
  }
840
- const config = await loadConfig();
841
- if (!config.accessToken) {
842
- return { status: "skipped", reason: "not_authenticated" };
1448
+ if (!token) {
1449
+ return {
1450
+ status: "not_authenticated",
1451
+ sync: "skipped",
1452
+ reason: "not_authenticated",
1453
+ remediation: "Run workbench login.",
1454
+ };
843
1455
  }
844
1456
  await apiRequest(adapterConnectionApiPath(bundle), { method: "PUT", body: { bundle } });
845
- return { status: "connected" };
1457
+ return { status: "authenticated", sync: "uploaded" };
846
1458
  }
847
1459
  async function deleteAdapterConnectionRemote(target, parsed) {
1460
+ const token = await workbenchCloudToken();
848
1461
  if (parsed.flags["local-only"] === true) {
849
- return { status: "skipped", reason: "local_only" };
1462
+ return {
1463
+ status: token ? "authenticated" : "not_authenticated",
1464
+ sync: "skipped",
1465
+ reason: "local_only",
1466
+ };
850
1467
  }
851
- const config = await loadConfig();
852
- if (!config.accessToken) {
853
- return { status: "skipped", reason: "not_authenticated" };
1468
+ if (!token) {
1469
+ return {
1470
+ status: "not_authenticated",
1471
+ sync: "skipped",
1472
+ reason: "not_authenticated",
1473
+ remediation: "Run workbench login.",
1474
+ };
854
1475
  }
855
1476
  await apiRequest(adapterConnectionApiPath(target), { method: "DELETE" });
856
- return { status: "disconnected" };
1477
+ return { status: "authenticated", sync: "deleted" };
857
1478
  }
858
1479
  function adapterConnectionApiPath(target) {
859
1480
  const params = new URLSearchParams({ profile: target.profile });
@@ -883,6 +1504,25 @@ function readResponseError(text) {
883
1504
  return text.trim() || null;
884
1505
  }
885
1506
  }
1507
+ function parseWorkbenchCloudErrorBody(text) {
1508
+ try {
1509
+ const record = asRecord(JSON.parse(text));
1510
+ if (record?.schema !== "workbench.cloud.error.v1" || typeof record.code !== "string" || typeof record.message !== "string") {
1511
+ return null;
1512
+ }
1513
+ const subject = asRecord(record.subject);
1514
+ return {
1515
+ code: record.code,
1516
+ message: record.message,
1517
+ retryable: record.retryable === true,
1518
+ ...(typeof record.remediation === "string" ? { remediation: record.remediation } : {}),
1519
+ ...(subject ? { subject: subject } : {}),
1520
+ };
1521
+ }
1522
+ catch {
1523
+ return null;
1524
+ }
1525
+ }
886
1526
  function isTransientFetchError(error) {
887
1527
  return /(?:fetch failed|socket hang up|ECONNRESET|EPIPE|UND_ERR_SOCKET|terminated)/iu.test(errorMessage(error));
888
1528
  }
@@ -911,26 +1551,6 @@ async function openBrowser(url) {
911
1551
  });
912
1552
  });
913
1553
  }
914
- function retrySamplesForFailedJobs(jobs, run) {
915
- if (run.status === "running") {
916
- throw new WorkbenchUserError(`Run ${run.id} is still running; wait for it to finish before retrying.`);
917
- }
918
- const failed = jobs
919
- .filter((job) => job.runId === run.id && job.status !== "succeeded")
920
- .map((job) => ({ caseId: job.caseId, sample: job.sample }));
921
- if (failed.length === 0) {
922
- throw new WorkbenchUserError(`Run ${run.id} has no failed jobs to retry; use workbench eval to intentionally run it again.`);
923
- }
924
- const byKey = new Map();
925
- for (const sample of failed) {
926
- byKey.set(`${sample.caseId}:${sample.sample}`, sample);
927
- }
928
- const selectedSamples = [...byKey.values()].sort((left, right) => left.caseId.localeCompare(right.caseId) || left.sample - right.sample);
929
- return {
930
- samples: Math.max(1, ...selectedSamples.map((entry) => entry.sample + 1)),
931
- selectedSamples,
932
- };
933
- }
934
1554
  function adapterAuthStoreRoot() {
935
1555
  return process.env.WORKBENCH_ADAPTER_AUTH_STORE?.trim() || undefined;
936
1556
  }
@@ -1062,24 +1682,40 @@ async function requiredAgentAuthStatuses(parsed, statuses) {
1062
1682
  .filter((agent) => ["codex", "claude"].includes(agent.adapter.trim().toLowerCase()))
1063
1683
  .map(async (agent) => {
1064
1684
  const target = parseAuthTarget(agent.adapter.trim().toLowerCase(), "default");
1685
+ const local = statusMap.get(`${target.adapterId}/${target.slot ?? "_"}/${target.profile}`) ??
1686
+ await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).status(target);
1065
1687
  return {
1066
1688
  agent: agent.name,
1067
1689
  adapter: agent.adapter,
1068
- local: statusMap.get(`${target.adapterId}/${target.slot ?? "_"}/${target.profile}`) ??
1069
- await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).status(target),
1690
+ local: local.status === "connected" ? "connected" : "missing",
1070
1691
  };
1071
1692
  }));
1072
1693
  }
1073
1694
  function formatAuthStatusRecord(status) {
1074
1695
  return `${formatAuthTarget(status)}\t${status.status}${status.method ? `\t${status.method}` : ""}${status.reason ? `\t${status.reason}` : ""}`;
1075
1696
  }
1076
- function formatAuthStatusList(statuses, required) {
1697
+ function authStatusRecordToJson(status) {
1698
+ return {
1699
+ adapter: status.adapterId,
1700
+ ...(status.slot ? { slot: status.slot } : {}),
1701
+ profile: status.profile,
1702
+ status: status.status,
1703
+ ...(status.method ? { method: status.method } : {}),
1704
+ ...(status.updatedAt ? { updatedAt: status.updatedAt } : {}),
1705
+ };
1706
+ }
1707
+ function formatWorkbenchCloudAuthStatus(status) {
1708
+ return `Workbench Cloud: ${status.status}${status.baseUrl ? `\tbaseUrl=${status.baseUrl}` : ""}${status.username ? `\tuser=${status.username}` : ""}`;
1709
+ }
1710
+ function formatAuthStatusList(workbenchCloud, statuses, required) {
1077
1711
  const lines = [
1712
+ formatWorkbenchCloudAuthStatus(workbenchCloud),
1713
+ "",
1078
1714
  ...(statuses.length > 0
1079
1715
  ? ["Adapter auth:", ...statuses.map(formatAuthStatusRecord)]
1080
1716
  : ["No local adapter auth records."]),
1081
1717
  ...(required.length > 0
1082
- ? ["", "Required by agents:", ...required.map((entry) => `${entry.agent}\t${entry.adapter}\t${entry.local.status}${entry.local.method ? `\t${entry.local.method}` : ""}`)]
1718
+ ? ["", "Required by agents:", ...required.map((entry) => `${entry.agent}\t${entry.adapter}\t${entry.local}`)]
1083
1719
  : []),
1084
1720
  ];
1085
1721
  return lines.join("\n");
@@ -1108,7 +1744,7 @@ async function showLocalAgentSession(ref) {
1108
1744
  const sessions = await listLocalAgentSessions();
1109
1745
  const session = sessions.find((entry) => entry.id === ref);
1110
1746
  if (!session) {
1111
- throw new WorkbenchUserError(`Session not found: ${ref}`);
1747
+ throw new WorkbenchCodedError("ref_not_found", `Session not found: ${ref}`, { exitCode: 1 });
1112
1748
  }
1113
1749
  return {
1114
1750
  ...session,
@@ -1264,6 +1900,15 @@ function addFlag(flags, name, value) {
1264
1900
  : [String(existing), String(value)];
1265
1901
  return;
1266
1902
  }
1903
+ if (name === "agent" || name === "skill") {
1904
+ const existing = flags[name];
1905
+ flags[name] = Array.isArray(existing)
1906
+ ? [...existing, String(value)]
1907
+ : existing === undefined
1908
+ ? String(value)
1909
+ : [String(existing), String(value)];
1910
+ return;
1911
+ }
1267
1912
  flags[name] = value;
1268
1913
  }
1269
1914
  function dirFlag(parsed) {
@@ -1272,13 +1917,21 @@ function dirFlag(parsed) {
1272
1917
  async function coreOptions(parsed) {
1273
1918
  return {
1274
1919
  dir: dirFlag(parsed),
1275
- authToken: await workbenchRemoteAuthToken(),
1920
+ authToken: await workbenchCloudToken(),
1276
1921
  };
1277
1922
  }
1278
1923
  function stringFlag(parsed, name) {
1279
1924
  const value = parsed.flags[name];
1280
1925
  return typeof value === "string" ? value : undefined;
1281
1926
  }
1927
+ function stringsFlag(parsed, name) {
1928
+ const value = parsed.flags[name];
1929
+ return Array.isArray(value)
1930
+ ? value
1931
+ : typeof value === "string"
1932
+ ? [value]
1933
+ : [];
1934
+ }
1282
1935
  function intFlag(parsed, name) {
1283
1936
  const value = stringFlag(parsed, name);
1284
1937
  if (!value) {
@@ -1300,14 +1953,33 @@ function requiredPositional(parsed, index, message) {
1300
1953
  }
1301
1954
  return value;
1302
1955
  }
1956
+ function requiredFlag(parsed, input) {
1957
+ const flagValue = stringFlag(parsed, input.flag);
1958
+ if (!flagValue) {
1959
+ throw new WorkbenchCodedError("usage", input.usage, {
1960
+ remediation: input.remediation,
1961
+ exitCode: 2,
1962
+ });
1963
+ }
1964
+ return flagValue;
1965
+ }
1966
+ function rejectExtraInput(parsed, input) {
1967
+ if (parsed.positionals.length <= input.maxPositionals) {
1968
+ return;
1969
+ }
1970
+ throw new WorkbenchCodedError("usage", input.message, {
1971
+ remediation: input.remediation,
1972
+ exitCode: 2,
1973
+ });
1974
+ }
1303
1975
  function parsePublishVisibility(value) {
1304
1976
  if (value === undefined) {
1305
1977
  return undefined;
1306
1978
  }
1307
- if (value === "private" || value === "public") {
1979
+ if (value === "private" || value === "internal" || value === "public") {
1308
1980
  return value;
1309
1981
  }
1310
- throw new WorkbenchUserError("workbench publish --visibility must be private or public.");
1982
+ throw new WorkbenchUserError("workbench publish --visibility must be private, internal, or public.");
1311
1983
  }
1312
1984
  function parseWithFlags(parsed) {
1313
1985
  const raw = parsed.flags.with;
@@ -1332,34 +2004,151 @@ function parseScalar(value) {
1332
2004
  }
1333
2005
  return value;
1334
2006
  }
1335
- function output(value, parsed, io, text) {
1336
- if (parsed.flags.json === true) {
1337
- io.stdout.write(`${JSON.stringify(value, null, 2)}\n`);
2007
+ async function artifactIdsByRunId(core, runs) {
2008
+ const runIds = new Set(runs.map((run) => run.id));
2009
+ const byRun = new Map([...runIds].map((runId) => [runId, []]));
2010
+ if (runIds.size === 0) {
2011
+ return byRun;
1338
2012
  }
1339
- else {
1340
- io.stdout.write(`${text()}\n`);
2013
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
2014
+ for (const job of snapshot.jobs) {
2015
+ if (!runIds.has(job.runId)) {
2016
+ continue;
2017
+ }
2018
+ const current = byRun.get(job.runId) ?? [];
2019
+ byRun.set(job.runId, [...new Set([...current, ...job.artifactIds])]);
1341
2020
  }
1342
- return 0;
2021
+ return byRun;
2022
+ }
2023
+ function emitEvalFailure(runs, failedRuns, artifactIds, parsed, io) {
2024
+ const nextCommands = evalFailureNextCommands(failedRuns);
2025
+ if (parsed.flags.json === true) {
2026
+ io.stdout.write(`${JSON.stringify({
2027
+ schema: "workbench.cli.eval.v1",
2028
+ ok: false,
2029
+ code: "eval_runs_failed",
2030
+ message: "Eval failed; evidence was saved.",
2031
+ retryable: false,
2032
+ evidenceSaved: true,
2033
+ runs: runs.map((run) => runFailureSummary(run, artifactIds.get(run.id) ?? [])),
2034
+ failedRuns: failedRuns.map((run) => runFailureSummary(run, artifactIds.get(run.id) ?? [])),
2035
+ nextCommands,
2036
+ }, null, 2)}\n`);
2037
+ return 1;
2038
+ }
2039
+ io.stdout.write([
2040
+ "Eval failed; evidence was saved.",
2041
+ ...failedRuns.map(formatRun),
2042
+ ...(nextCommands.length > 0 ? ["next:", ...nextCommands.map((command) => ` ${command}`)] : []),
2043
+ ].join("\n") + "\n");
2044
+ return 1;
2045
+ }
2046
+ function runSummary(run, artifactIds) {
2047
+ return {
2048
+ id: run.id,
2049
+ kind: run.kind,
2050
+ status: run.status,
2051
+ versionId: run.versionId,
2052
+ skillName: run.skillName,
2053
+ agentName: run.agentName,
2054
+ ...(run.score !== undefined ? { score: run.score } : {}),
2055
+ ...(run.latencyMs !== undefined ? { latencyMs: run.latencyMs } : {}),
2056
+ ...(run.error ? { error: run.error } : {}),
2057
+ ...(run.jobIds ? { jobIds: run.jobIds } : {}),
2058
+ traceIds: run.traceIds,
2059
+ artifactIds: [...artifactIds],
2060
+ };
2061
+ }
2062
+ function runFailureSummary(run, artifactIds) {
2063
+ return {
2064
+ runId: run.id,
2065
+ agent: run.agentName,
2066
+ skill: run.skillName,
2067
+ status: run.status,
2068
+ versionId: run.versionId,
2069
+ ...(run.score !== undefined ? { score: run.score } : {}),
2070
+ ...(run.error ? { error: run.error } : {}),
2071
+ traceIds: run.traceIds,
2072
+ artifactIds: [...artifactIds],
2073
+ };
1343
2074
  }
1344
- function formatStatus(status) {
1345
- if (!status.initialized) {
1346
- return `Workbench: not initialized\nRoot: ${status.root}`;
2075
+ function evalFailureNextCommands(failedRuns) {
2076
+ const first = failedRuns[0];
2077
+ if (!first) {
2078
+ return ["workbench compare --versions all"];
1347
2079
  }
2080
+ const traceId = first.traceIds[0];
1348
2081
  return [
1349
- `Root: ${status.root}`,
1350
- `Current version: ${status.currentVersionId ?? "none"}`,
1351
- `Unversioned changes: ${status.hasUnversionedChanges ? "yes" : "no"}`,
1352
- `Default skill: ${status.defaultSkill ?? "none"}`,
1353
- `Default agent: ${status.defaultAgent ?? "none"}`,
1354
- `Versions: ${status.versionCount}`,
1355
- `Skills: ${status.skillCount}`,
1356
- `Agents: ${status.agentCount}`,
1357
- `Runs: ${status.runCount}`,
1358
- `Remotes: ${status.remoteCount}`,
1359
- ...(status.pendingSyncCount ? [`Pending sync: ${status.pendingSyncCount}`] : []),
1360
- ...(status.lastScore !== undefined ? [`Last score: ${status.lastScore}`] : []),
1361
- ...(status.automationReadiness ? [`Automation readiness: ${status.automationReadiness.label} - ${status.automationReadiness.reason}`] : []),
1362
- ].join("\n");
2082
+ "workbench compare --versions all",
2083
+ `workbench trace ${first.id}`,
2084
+ ...(traceId ? [`workbench show ${traceId}:stderr.log`] : []),
2085
+ `workbench improve --agent ${first.agentName} --budget 1 --samples 1`,
2086
+ ];
2087
+ }
2088
+ function output(value, parsed, io, text) {
2089
+ return emitResult(commandSchema(parsed), { result: value }, parsed, io, text);
2090
+ }
2091
+ function commandSchema(parsed) {
2092
+ const command = parsed.positionals[0] ?? "result";
2093
+ const subcommand = parsed.positionals[1];
2094
+ const suffix = ["auth", "remote", "agent", "case", "skills"].includes(command) && subcommand
2095
+ ? `${command}-${subcommand}`
2096
+ : command;
2097
+ return `workbench.cli.${suffix}.v1`;
2098
+ }
2099
+ async function workbenchCliAuthStatus() {
2100
+ const config = await loadConfig();
2101
+ const adapterStatuses = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).listStatus().catch(() => []);
2102
+ const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
2103
+ return {
2104
+ workbenchCloud: {
2105
+ status: config.accessToken || workbenchCloudEnvToken() ? "authenticated" : "not_authenticated",
2106
+ ...(baseUrl ? { baseUrl } : {}),
2107
+ ...(config.accessToken && config.username ? { username: config.username } : {}),
2108
+ },
2109
+ adapters: adapterStatuses.map((status) => ({
2110
+ adapter: status.adapterId,
2111
+ ...(status.slot ? { slot: status.slot } : {}),
2112
+ profile: status.profile,
2113
+ status: status.status,
2114
+ ...(status.method ? { method: status.method } : {}),
2115
+ ...(status.updatedAt ? { updatedAt: status.updatedAt } : {}),
2116
+ })),
2117
+ };
2118
+ }
2119
+ function formatStatusSnapshot(status) {
2120
+ const lines = [
2121
+ `Root: ${status.project.root}`,
2122
+ `Initialized: ${status.project.initialized ? "yes" : "no"}`,
2123
+ ...(status.project.currentVersionId ? [`Current version: ${status.project.currentVersionId}`] : []),
2124
+ ...(status.project.defaultSkill ? [`Default skill: ${status.project.defaultSkill}`] : []),
2125
+ ...(status.project.defaultAgent ? [`Default agent: ${status.project.defaultAgent}`] : []),
2126
+ `Runs: ${status.runs.total}${status.runs.lastStatus ? ` (last ${status.runs.lastStatus})` : ""}`,
2127
+ `Workbench Cloud: ${status.auth?.workbenchCloud.status ?? "not_authenticated"}${status.auth?.workbenchCloud.baseUrl ? ` ${status.auth.workbenchCloud.baseUrl}` : ""}`,
2128
+ ...(status.remotes.length > 0 ? ["Remotes:", ...status.remotes.flatMap((remote) => {
2129
+ const publication = remote.publication.status === "published"
2130
+ ? [
2131
+ "publication=published",
2132
+ remote.publication.visibility ? `visibility=${remote.publication.visibility}` : undefined,
2133
+ remote.publication.versionId ? `version=${remote.publication.versionId}` : undefined,
2134
+ remote.publication.installUrl ? `install=${remote.publication.installUrl}` : undefined,
2135
+ remote.publication.pinnedInstallUrl ? `pinned=${remote.publication.pinnedInstallUrl}` : undefined,
2136
+ ].filter(Boolean).join("\t")
2137
+ : "publication=unpublished";
2138
+ return [
2139
+ ` ${remote.name}\tkind=${remote.kind}\tsync=${remote.sync.status}\turl=${remote.url}\t${publication}`,
2140
+ ...(remote.sync.status === "error" && remote.sync.lastError
2141
+ ? [
2142
+ ` error[${remote.sync.lastError.code}]: ${remote.sync.lastError.message}`,
2143
+ ...(remote.sync.lastAttemptAt ? [` last attempt: ${remote.sync.lastAttemptAt}`] : []),
2144
+ ...(remote.sync.nextCommand ? [` next: ${remote.sync.nextCommand}`] : []),
2145
+ ]
2146
+ : []),
2147
+ ];
2148
+ })] : ["Remotes: none"]),
2149
+ ...(status.next.length > 0 ? ["Next:", ...status.next.map((command) => ` ${command}`)] : []),
2150
+ ];
2151
+ return lines.join("\n");
1363
2152
  }
1364
2153
  function formatCheck(result) {
1365
2154
  return [
@@ -1369,7 +2158,6 @@ function formatCheck(result) {
1369
2158
  `Agents: ${result.agents}`,
1370
2159
  `Skill files: ${result.plan.source.skillFiles}`,
1371
2160
  `Eval files: ${result.plan.source.evalFiles}`,
1372
- `Readiness: ${result.plan.readiness.label} - ${result.plan.readiness.reason}`,
1373
2161
  "",
1374
2162
  "Skill plan:",
1375
2163
  ...result.plan.skills.map((skill) => [
@@ -1397,6 +2185,16 @@ function formatCheck(result) {
1397
2185
  function formatVersion(version) {
1398
2186
  return `${version.id}\t${version.hash.slice(0, 12)}\t${version.message}`;
1399
2187
  }
2188
+ function versionSummary(version) {
2189
+ return {
2190
+ id: version.id,
2191
+ hash: version.hash,
2192
+ message: version.message,
2193
+ parentIds: version.parentIds,
2194
+ createdAt: version.createdAt,
2195
+ fileCount: version.files.length,
2196
+ };
2197
+ }
1400
2198
  function formatAgent(agent) {
1401
2199
  return `${agent.name}\t${agent.adapter}${agent.model ? `\t${agent.model}` : ""}`;
1402
2200
  }
@@ -1419,14 +2217,14 @@ function formatJob(job) {
1419
2217
  return `${job.id}\trun=${job.runId}\tcase=${job.caseId}\tsample=${job.sample}\t${job.status}\tscore=${score}\tduration=${duration}`;
1420
2218
  }
1421
2219
  function formatComparison(comparison) {
1422
- const lines = ["version\tskill\tagent\tscore\treadiness\tcost\tlatency\trun"];
2220
+ const lines = ["version\tskill\tagent\tstatus\tscore\tcost\tlatency\trun"];
1423
2221
  for (const cell of comparison.cells) {
1424
2222
  lines.push([
1425
2223
  cell.versionId,
1426
2224
  cell.skillName,
1427
- cell.agentName,
2225
+ `${cell.agentName}@${shortObjectId(cell.agentHash)}`,
2226
+ cell.status ?? "not-run",
1428
2227
  cell.score === undefined ? "n/a" : cell.score.toFixed(3),
1429
- cell.automationReadiness?.label ?? "n/a",
1430
2228
  cell.costUsd === undefined ? "n/a" : `$${cell.costUsd.toFixed(4)}`,
1431
2229
  cell.latencyMs === undefined ? "n/a" : `${cell.latencyMs}ms`,
1432
2230
  cell.runId ?? "n/a",
@@ -1434,6 +2232,9 @@ function formatComparison(comparison) {
1434
2232
  }
1435
2233
  return lines.join("\n");
1436
2234
  }
2235
+ function shortObjectId(id) {
2236
+ return id.length > 12 ? id.slice(0, 12) : id;
2237
+ }
1437
2238
  function formatTrace(trace) {
1438
2239
  const result = asRecord(trace.result);
1439
2240
  const status = typeof result?.status === "string" ? result.status : undefined;
@@ -1448,9 +2249,62 @@ function formatTrace(trace) {
1448
2249
  `files=${trace.files.length}${files ? ` (${files}${trace.files.length > 5 ? ",..." : ""})` : ""}`,
1449
2250
  ].filter(Boolean).join("\t");
1450
2251
  }
2252
+ function traceSummary(trace) {
2253
+ const result = asRecord(trace.result);
2254
+ return {
2255
+ id: trace.id,
2256
+ runId: trace.runId,
2257
+ ...(trace.jobId ? { jobId: trace.jobId } : {}),
2258
+ versionId: trace.versionId,
2259
+ skillName: trace.skillName,
2260
+ agentName: trace.agentName,
2261
+ createdAt: trace.createdAt,
2262
+ ...(typeof result?.status === "string" ? { status: result.status } : {}),
2263
+ ...(typeof result?.score === "number" ? { score: result.score } : {}),
2264
+ ...(typeof result?.error === "string" ? { error: singleLine(result.error) } : {}),
2265
+ fileCount: trace.files.length,
2266
+ files: trace.files.map(fileSummary),
2267
+ };
2268
+ }
2269
+ function formatTraceDetail(detail) {
2270
+ return detail.executions.map((execution) => {
2271
+ const sessionLabels = execution.sessions.map((session) => session.label).join(",");
2272
+ return [
2273
+ `${execution.id}\trun=${detail.runId}\tjobs=${execution.jobIds.join(",")}\tstatus=${execution.status}`,
2274
+ `events=${execution.trace.events.length}`,
2275
+ `spans=${execution.trace.spans.length}`,
2276
+ `summaries=${execution.trace.summaries.length}`,
2277
+ sessionLabels ? `sessions=${sessionLabels}` : undefined,
2278
+ ].filter(Boolean).join("\t");
2279
+ }).join("\n");
2280
+ }
1451
2281
  function formatArtifact(artifact) {
1452
2282
  return `${artifact.id}\trun=${artifact.runId}\tjob=${artifact.jobId}\t${artifact.kind}\tfiles=${artifact.files.length}`;
1453
2283
  }
2284
+ function artifactSummary(artifact) {
2285
+ return {
2286
+ id: artifact.id,
2287
+ runId: artifact.runId,
2288
+ jobId: artifact.jobId,
2289
+ kind: artifact.kind,
2290
+ fileCount: artifact.files.length,
2291
+ files: artifact.files.map(fileSummary),
2292
+ };
2293
+ }
2294
+ function fileSummary(file) {
2295
+ return {
2296
+ path: file.path,
2297
+ ...(file.kind ? { kind: file.kind } : {}),
2298
+ ...(file.encoding ? { encoding: file.encoding } : {}),
2299
+ ...(file.executable !== undefined ? { executable: file.executable } : {}),
2300
+ bytes: surfaceFileByteLength(file),
2301
+ };
2302
+ }
2303
+ function surfaceFileByteLength(file) {
2304
+ return file.encoding === "base64"
2305
+ ? Buffer.byteLength(file.content, "base64")
2306
+ : Buffer.byteLength(file.content, "utf8");
2307
+ }
1454
2308
  function formatSession(session) {
1455
2309
  return `${session.id}\t${session.source}\t${session.updatedAt}\t${session.bytes}b\t${session.path}${session.title ? `\t${session.title}` : ""}`;
1456
2310
  }
@@ -1473,6 +2327,9 @@ function formatShow(value) {
1473
2327
  function isSurfaceFile(value) {
1474
2328
  return Boolean(value && typeof value === "object" && "content" in value && typeof value.content === "string");
1475
2329
  }
2330
+ function singleLine(value) {
2331
+ return value.replace(/\s+/gu, " ").trim();
2332
+ }
1476
2333
  function asRecord(value) {
1477
2334
  return value && typeof value === "object" && !Array.isArray(value)
1478
2335
  ? value