@workbench-ai/workbench 0.0.69 → 0.0.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -4,119 +4,106 @@ import { createRequire } from "node:module";
4
4
  import os from "node:os";
5
5
  import path from "node:path";
6
6
  import { gzipSync } from "node:zlib";
7
- import { addWorkbenchCase, addWorkbenchRemote, addWorkbenchAgent, checkWorkbenchSkill, compareWorkbench, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, filesForWorkbenchRef, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchCases, listWorkbenchRemotes, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, publishWorkbenchVersion, removeWorkbenchCase, removeWorkbenchAgent, removeWorkbenchRemote, setDefaultWorkbenchAgent, showWorkbenchCase, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
7
+ import { addWorkbenchCase, addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchInspectionSnapshot, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, publishWorkbenchVersion, removeWorkbenchAgent, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchSkillImproveCanUseQueuedAdapter, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
8
+ import { normalizeWorkbenchSkillName } from "@workbench-ai/workbench-contract";
8
9
  import { emitError, emitResult } from "./output.js";
9
- import { installSnapshotToTargets, installTargetsToJson, normalizeInstallSnapshotPath, resolveInstallTargets, supportedInstallTargets, } from "./install-targets.js";
10
+ import { installSnapshotToTargets, normalizeInstallSnapshotPath, resolveInstallTargets, supportedInstallTargets, } from "./install-targets.js";
10
11
  import { startWorkbenchOpenServer } from "./open-server.js";
11
12
  const require = createRequire(import.meta.url);
12
13
  const HELP = [
13
14
  "Usage:",
15
+ " workbench [--json]",
14
16
  " workbench <command> [options]",
15
17
  "",
16
- "Primary loop:",
17
- " workbench init [DIR] [--json]",
18
- " workbench check [--dir DIR] [--json]",
19
- " workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [--samples N] [--rerun] [--json]",
18
+ "Bare workbench prints project status and the next useful command.",
19
+ "",
20
+ "Taught commands:",
21
+ " workbench new [DIR] [--json]",
22
+ " workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
23
+ " workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
20
24
  " workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
21
- " workbench improve [VERSION] [--skill SKILL] [--agent AGENT] [--budget N] [--samples N] [--json]",
25
+ " workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
26
+ " workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--dry-run] [--json]",
27
+ "",
28
+ "More:",
29
+ " workbench help --all",
30
+ ].join("\n");
31
+ const HELP_ALL = [
32
+ "Usage:",
33
+ " workbench # = workbench status",
34
+ " workbench new [DIR] [--json]",
35
+ " workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
36
+ " workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
37
+ " workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
38
+ " workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
39
+ " workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--dry-run] [--json]",
22
40
  "",
23
41
  "Inspect:",
24
42
  " workbench status [--dir DIR] [--json]",
25
- " workbench versions [--dir DIR] [--json]",
26
- " workbench switch VERSION [--dir DIR] [--json]",
27
- " workbench diff [A..B] [--dir DIR] [--json]",
43
+ " workbench log [--runs|--versions] [--json]",
28
44
  " workbench show REF[:PATH] [--json]",
29
- " workbench files REF [--json]",
30
- " workbench list runs|jobs|traces|artifacts|sessions [--json]",
31
- " workbench trace RUN_ID|JOB_ID|TRACE_ID [--json]",
45
+ " workbench diff [A..B] [--json]",
46
+ " workbench switch VERSION [--json]",
32
47
  " workbench open [--host HOST] [--port PORT] [--no-open] [--json]",
33
48
  "",
34
49
  "Configure:",
35
- " workbench agent list|add|show|default|remove ...",
36
- " workbench skills list",
37
- " workbench case list|add|show|remove ...",
50
+ " workbench case add RUN_ID [--json]",
51
+ " workbench agent add NAME --adapter X [--model M] [--with k=v]... | list | rm NAME [--json]",
38
52
  "",
39
53
  "Share and auth:",
40
- " workbench remote add --name NAME --url URL [--replace] [--dry-run] [--dir DIR] [--json]",
41
- " workbench remote list [--dir DIR] [--json]",
42
- " workbench remote remove NAME [--dir DIR] [--json]",
43
- " workbench sync [REMOTE] [--dry-run] [--dir DIR] [--json]",
44
- " workbench publish [VERSION] [--remote REMOTE] [--visibility private|internal|public] [--dry-run] [--dir DIR] [--json]",
45
- " workbench install --source SOURCE [--agent codex|claude]... [--local] [--yes] [--list] [--dry-run] [--json]",
46
- " workbench auth status [ADAPTER[/SLOT]] [--profile PROFILE] [--json]",
47
- " workbench auth connect ADAPTER[/SLOT] [--method METHOD] [--profile PROFILE] [--profile-root DIR] [--local-only] [--json]",
48
- " workbench auth disconnect ADAPTER[/SLOT] [--profile PROFILE] [--local-only] [--json]",
49
- " workbench login [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--json]",
50
- " workbench logout [--json]",
54
+ " workbench login [PROVIDER] [--method METHOD] [--profile P] [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--local-only] [--json]",
55
+ " workbench logout [PROVIDER] [--json]",
56
+ " workbench sync [REMOTE] [--dry-run] [--json]",
51
57
  "",
52
58
  "Remote URLs:",
53
59
  " https://HOST/skills/OWNER/SKILL Workbench Cloud skill remote",
54
- " file:///absolute/path local file remote",
55
- "",
56
- "Examples:",
57
- " workbench init ./earnings-prep",
58
- " workbench check --dir ./earnings-prep",
59
- " workbench eval --agents default --samples 1",
60
- " workbench compare",
61
- " workbench status --json",
62
- " workbench remote add --name origin --url https://v2.workbench.ai/skills/acme/earnings-prep",
63
- " workbench publish --remote origin --visibility public --json",
64
- " workbench install --source https://v2.workbench.ai/skills/acme/earnings-prep --agent codex --yes",
65
- "",
66
- "Environment:",
67
- " CODEX_HOME and CLAUDE_HOME override read-only session discovery roots.",
68
- " WORKBENCH_API_URL selects a Workbench Cloud API base URL for login, auth, and HTTP remotes.",
69
- " WORKBENCH_API_TOKEN supplies a Workbench Cloud token without a login (WORKBENCH_SMOKE_BEARER_TOKEN is a fallback).",
70
- " WORKBENCH_CONFIG overrides the CLI config path (default ~/.workbench/config.json).",
71
- " WORKBENCH_DEVICE_AUTH overrides the pending device login record path.",
72
- " WORKBENCH_ADAPTER_AUTH_STORE overrides the local adapter auth store directory.",
60
+ " file:///absolute/path local file remote for plumbing sync",
73
61
  ].join("\n");
74
62
  const COMMAND_HELP = {
75
- auth: [
63
+ new: [
76
64
  "Usage:",
77
- " workbench auth status [ADAPTER[/SLOT]] [--profile PROFILE] [--json]",
78
- " workbench auth connect ADAPTER[/SLOT] [--method api-key|oauth|bedrock] [--profile PROFILE] [--profile-root DIR] [--local-only] [--json]",
79
- " workbench auth disconnect ADAPTER[/SLOT] [--profile PROFILE] [--local-only] [--json]",
65
+ " workbench new [DIR] [--json]",
80
66
  "",
81
- "Stores adapter credentials locally and uploads them to Workbench Cloud when logged in unless --local-only is passed. Codex supports oauth and api-key. Claude supports oauth, api-key, and bedrock.",
67
+ "Creates a Workbench skill project.",
82
68
  "",
83
- "Examples:",
84
- " workbench auth status --json",
85
- " workbench auth connect codex --method api-key",
86
- " workbench auth disconnect codex --json",
69
+ "Example:",
70
+ " workbench new earnings-prep",
87
71
  ].join("\n"),
88
72
  eval: [
89
73
  "Usage:",
90
- " workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [--samples N] [--rerun] [--json]",
74
+ " workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
91
75
  "",
92
76
  "Runs eval jobs for the selected version, measured skills, and agents. Omitted selectors use manifest defaults.",
77
+ "",
78
+ "Example:",
79
+ " workbench eval -n 5",
93
80
  ].join("\n"),
94
81
  improve: [
95
82
  "Usage:",
96
- " workbench improve [VERSION] [--skill SKILL] [--agent AGENT] [--budget N] [--samples N] [--json]",
83
+ " workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
84
+ "",
85
+ "Creates one improved child version from evidence. The selected skills and agents must resolve to exactly one entry each.",
97
86
  "",
98
- "Creates one improved child version from evidence. Pass singular --skill and --agent when defaults expand to multiple entries.",
87
+ "Example:",
88
+ " workbench improve --budget 1 -n 1",
99
89
  ].join("\n"),
100
- install: [
90
+ compare: [
101
91
  "Usage:",
102
- " workbench install --source SOURCE [--agent codex|claude]... [--local] [--yes] [--list] [--dry-run] [--json]",
92
+ " workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
103
93
  "",
104
- "Installs published Workbench Cloud source into explicit local agent targets.",
94
+ "Compares recorded eval evidence across selected skills, agents, and versions.",
105
95
  "",
106
96
  "Example:",
107
- " workbench install --source https://v2.workbench.ai/skills/acme/earnings-prep --agent codex --yes",
97
+ " workbench compare --agents all",
108
98
  ].join("\n"),
109
- remote: [
99
+ install: [
110
100
  "Usage:",
111
- " workbench remote add --name NAME --url URL [--replace] [--dry-run] [--dir DIR] [--json]",
112
- " workbench remote list [--dir DIR] [--json]",
113
- " workbench remote remove NAME [--dir DIR] [--json]",
101
+ " workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--dry-run] [--json]",
114
102
  "",
115
- "Remotes exchange Workbench object packs. Only Workbench Cloud remotes can publish installable source.",
103
+ "Installs published Workbench Cloud source into local agent targets.",
116
104
  "",
117
- "Examples:",
118
- " workbench remote add --name origin --url https://v2.workbench.ai/skills/acme/earnings-prep",
119
- " workbench remote add --name scratch --url file:///tmp/earnings-prep-remote --replace",
105
+ "Example:",
106
+ " workbench install acme/earnings-prep --to codex --yes",
120
107
  ].join("\n"),
121
108
  status: [
122
109
  "Usage:",
@@ -129,180 +116,187 @@ const COMMAND_HELP = {
129
116
  ].join("\n"),
130
117
  logout: [
131
118
  "Usage:",
132
- " workbench logout [--json]",
119
+ " workbench logout [PROVIDER] [--json]",
133
120
  "",
134
- "Revokes and removes the local Workbench Cloud token. Reports whether the token was revoked and whether local adapter auth records remain.",
121
+ "With no provider, logs out of Workbench Cloud. With a provider such as codex or claude, removes local adapter auth.",
135
122
  "",
136
123
  "Example:",
137
- " workbench logout --json",
124
+ " workbench logout claude",
138
125
  ].join("\n"),
139
126
  show: [
140
127
  "Usage:",
141
128
  " workbench show REF [--json]",
142
129
  " workbench show REF:PATH [--json]",
143
130
  "",
144
- "Shows a Workbench object or a file inside a version, trace, or artifact.",
131
+ "Shows a Workbench object, lists files for file-backed objects, or prints one file.",
132
+ "",
133
+ "Example:",
134
+ " workbench show run_abc12345:result.json",
145
135
  ].join("\n"),
146
- list: [
136
+ log: [
147
137
  "Usage:",
148
- " workbench list runs|jobs|traces|artifacts|sessions [--json]",
138
+ " workbench log [--runs|--versions] [--json]",
139
+ "",
140
+ "Shows one reverse-chronological timeline of versions and runs.",
149
141
  "",
150
- "Lists Workbench evidence or read-only native Codex/Claude session files.",
142
+ "Example:",
143
+ " workbench log --runs",
151
144
  ].join("\n"),
152
- versions: [
145
+ diff: [
153
146
  "Usage:",
154
- " workbench versions [--json]",
147
+ " workbench diff [A..B] [--json]",
148
+ "",
149
+ "Shows changed files between two Workbench source versions.",
155
150
  "",
156
- "Lists Workbench skill versions.",
151
+ "Example:",
152
+ " workbench diff 26059f9a..eac5699c",
157
153
  ].join("\n"),
158
154
  switch: [
159
155
  "Usage:",
160
156
  " workbench switch VERSION [--json]",
161
157
  "",
162
158
  "Switches the working skill source to a recorded Workbench version.",
159
+ "",
160
+ "Example:",
161
+ " workbench switch 26059f9a",
162
+ ].join("\n"),
163
+ open: [
164
+ "Usage:",
165
+ " workbench open [--host HOST] [--port PORT] [--no-open] [--json]",
166
+ "",
167
+ "Serves or emits the read-only Workbench inspection snapshot.",
168
+ "",
169
+ "Example:",
170
+ " workbench open --no-open",
171
+ ].join("\n"),
172
+ case: [
173
+ "Usage:",
174
+ " workbench case add RUN_ID [--json]",
175
+ "",
176
+ "Captures a regression case from a recorded run.",
177
+ "",
178
+ "Example:",
179
+ " workbench case add run_abc12345",
180
+ ].join("\n"),
181
+ agent: [
182
+ "Usage:",
183
+ " workbench agent list [--json]",
184
+ " workbench agent add NAME --adapter X [--model M] [--with k=v]... [--json]",
185
+ " workbench agent rm NAME [--json]",
186
+ "",
187
+ "Lists, adds, or removes eval agent configurations.",
188
+ "",
189
+ "Example:",
190
+ " workbench agent add claude --adapter claude --model sonnet",
163
191
  ].join("\n"),
164
192
  sync: [
165
193
  "Usage:",
166
194
  " workbench sync [REMOTE] [--dry-run] [--dir DIR] [--json]",
167
195
  "",
168
- "Synchronizes local evidence and version objects with a Workbench remote. --dry-run reports what would be exchanged.",
196
+ "Plumbing command: synchronizes local evidence and version objects with a Workbench remote.",
169
197
  "",
170
- "Examples:",
171
- " workbench sync origin --json",
172
- " workbench sync origin --dry-run --json",
198
+ "Example:",
199
+ " workbench sync cloud --dry-run",
173
200
  ].join("\n"),
174
201
  publish: [
175
202
  "Usage:",
176
- " workbench publish [VERSION] [--remote REMOTE] [--visibility private|internal|public] [--dry-run] [--dir DIR] [--json]",
203
+ " workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--dir DIR] [--json]",
177
204
  "",
178
- "Publishes installable skill source from the selected version to a Workbench Cloud remote.",
205
+ "Publishes installable skill source to Workbench Cloud. --as sets the linked OWNER/SKILL handle.",
179
206
  "",
180
- "Examples:",
181
- " workbench publish --remote origin --visibility private --json",
182
- " workbench publish <version-id> --remote origin --dry-run --json",
207
+ "Example:",
208
+ " workbench publish --as acme/earnings-prep --dry-run",
183
209
  ].join("\n"),
184
210
  login: [
185
211
  "Usage:",
186
- " workbench login [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--json]",
187
- " workbench logout [--json]",
212
+ " workbench login [PROVIDER] [--method METHOD] [--profile P] [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--local-only] [--json]",
213
+ " workbench logout [PROVIDER] [--json]",
188
214
  "",
189
- "Connects the CLI to Workbench Cloud with the device login flow.",
215
+ "Connects the CLI to Workbench Cloud or captures local adapter auth for a provider.",
190
216
  "",
191
- "Examples:",
192
- " workbench login --start-only --json",
193
- " workbench login --wait --timeout 120 --json",
217
+ "Example:",
218
+ " workbench login --start-only --no-open",
194
219
  ].join("\n"),
195
220
  };
196
- const BOOLEAN_FLAGS = new Set([
197
- "help",
198
- "dry-run",
199
- "json",
200
- "local",
201
- "local-only",
202
- "list",
203
- "no-open",
204
- "start-only",
205
- "replace",
206
- "rerun",
207
- "wait",
208
- "yes",
209
- ]);
210
- const FLAG_DEFINITIONS = {
211
- adapter: "string",
212
- "base-url": "string",
213
- budget: "positive-integer",
221
+ const COMMON_FLAGS = {
222
+ json: "boolean",
223
+ };
224
+ const PROJECT_FLAGS = {
225
+ ...COMMON_FLAGS,
214
226
  dir: "string",
215
- from: "string",
216
- "dry-run": "boolean",
227
+ };
228
+ const HELP_FLAG = {
217
229
  help: "boolean",
218
- host: "string",
219
- json: "boolean",
220
- local: "boolean",
221
- "local-only": "boolean",
222
- list: "boolean",
223
- method: "string",
224
- model: "string",
225
- name: "string",
226
- "no-open": "boolean",
227
- port: "positive-integer",
228
- profile: "string",
229
- "profile-root": "string",
230
- remote: "string",
231
- replace: "boolean",
232
- rerun: "boolean",
233
- samples: "positive-integer",
234
- source: "string",
235
- "start-only": "boolean",
236
- agent: "string",
237
- agents: "string",
238
- skill: "string",
239
- skills: "string",
230
+ };
231
+ const VERSION_FLAG = {
240
232
  version: "boolean",
241
- versions: "string",
242
- visibility: "string",
243
- timeout: "positive-integer",
244
- url: "string",
245
- wait: "boolean",
246
- with: "repeat-string",
247
- yes: "boolean",
248
233
  };
249
234
  const COMMAND_FLAGS = {
250
- check: ["dir", "json"],
251
- compare: ["agents", "dir", "json", "skills", "versions"],
252
- diff: ["dir", "json"],
253
- eval: ["agents", "dir", "json", "rerun", "samples", "skills"],
254
- files: ["dir", "json"],
255
- improve: ["agent", "budget", "dir", "json", "samples", "skill"],
256
- init: ["dir", "json"],
257
- install: ["agent", "dry-run", "json", "list", "local", "source", "yes"],
258
- list: ["dir", "json"],
259
- login: ["base-url", "json", "no-open", "start-only", "timeout", "wait"],
260
- logout: ["json"],
261
- open: ["dir", "host", "json", "no-open", "port"],
262
- publish: ["dir", "dry-run", "json", "remote", "visibility"],
263
- show: ["dir", "json"],
264
- status: ["dir", "json"],
265
- switch: ["dir", "json"],
266
- sync: ["dir", "dry-run", "json"],
267
- trace: ["dir", "json"],
268
- versions: ["dir", "json"],
269
- };
270
- const SUBCOMMAND_FLAGS = {
271
- auth: {
272
- defaultSubcommand: "status",
273
- flags: {
274
- status: ["json", "profile"],
275
- connect: ["json", "local-only", "method", "profile", "profile-root"],
276
- disconnect: ["json", "local-only", "profile"],
277
- },
235
+ compare: { ...PROJECT_FLAGS, ...HELP_FLAG, agents: "string", skills: "string", versions: "string" },
236
+ diff: { ...PROJECT_FLAGS, ...HELP_FLAG },
237
+ eval: {
238
+ ...PROJECT_FLAGS,
239
+ ...HELP_FLAG,
240
+ agents: "string",
241
+ cloud: "boolean",
242
+ rerun: "boolean",
243
+ samples: "positive-integer",
244
+ skills: "string",
278
245
  },
279
- case: {
280
- flags: {
281
- list: ["dir", "json"],
282
- add: ["dir", "from", "json"],
283
- show: ["dir", "json"],
284
- remove: ["dir", "json"],
285
- },
246
+ help: { ...COMMON_FLAGS, ...HELP_FLAG, all: "boolean" },
247
+ improve: {
248
+ ...PROJECT_FLAGS,
249
+ ...HELP_FLAG,
250
+ agents: "string",
251
+ budget: "positive-integer",
252
+ cloud: "boolean",
253
+ samples: "positive-integer",
254
+ skills: "string",
286
255
  },
287
- remote: {
288
- flags: {
289
- add: ["dir", "dry-run", "json", "name", "replace", "url"],
290
- list: ["dir", "json"],
291
- remove: ["dir", "json"],
292
- },
256
+ install: { ...COMMON_FLAGS, ...HELP_FLAG, "dry-run": "boolean", to: "repeat-string", yes: "boolean" },
257
+ log: { ...PROJECT_FLAGS, ...HELP_FLAG, runs: "boolean", versions: "boolean" },
258
+ login: {
259
+ ...COMMON_FLAGS,
260
+ ...HELP_FLAG,
261
+ "base-url": "string",
262
+ "local-only": "boolean",
263
+ method: "string",
264
+ "no-open": "boolean",
265
+ profile: "string",
266
+ "profile-root": "string",
267
+ "start-only": "boolean",
268
+ timeout: "positive-integer",
269
+ wait: "boolean",
270
+ },
271
+ logout: { ...COMMON_FLAGS, ...HELP_FLAG },
272
+ new: { ...PROJECT_FLAGS, ...HELP_FLAG },
273
+ open: { ...PROJECT_FLAGS, ...HELP_FLAG, host: "string", "no-open": "boolean", port: "positive-integer" },
274
+ publish: {
275
+ ...PROJECT_FLAGS,
276
+ ...HELP_FLAG,
277
+ as: "string",
278
+ "dry-run": "boolean",
279
+ private: "boolean",
280
+ public: "boolean",
281
+ team: "boolean",
293
282
  },
294
- skills: {
283
+ show: { ...PROJECT_FLAGS, ...HELP_FLAG },
284
+ status: { ...PROJECT_FLAGS, ...HELP_FLAG },
285
+ switch: { ...PROJECT_FLAGS, ...HELP_FLAG },
286
+ sync: { ...PROJECT_FLAGS, ...HELP_FLAG, "dry-run": "boolean" },
287
+ version: { ...COMMON_FLAGS, ...VERSION_FLAG },
288
+ };
289
+ const SUBCOMMAND_FLAGS = {
290
+ case: {
295
291
  flags: {
296
- list: ["dir", "json"],
292
+ add: { ...PROJECT_FLAGS, ...HELP_FLAG },
297
293
  },
298
294
  },
299
295
  agent: {
300
296
  flags: {
301
- list: ["dir", "json"],
302
- add: ["adapter", "dir", "json", "model", "with"],
303
- show: ["dir", "json"],
304
- default: ["dir", "json"],
305
- remove: ["dir", "json"],
297
+ list: { ...PROJECT_FLAGS, ...HELP_FLAG },
298
+ add: { ...PROJECT_FLAGS, ...HELP_FLAG, adapter: "string", model: "string", with: "repeat-string" },
299
+ rm: { ...PROJECT_FLAGS, ...HELP_FLAG },
306
300
  },
307
301
  },
308
302
  };
@@ -313,20 +307,23 @@ export async function runCli(argv, io = {
313
307
  const parsed = parseArgs(argv);
314
308
  const command = parsed.positionals[0];
315
309
  try {
316
- if (command === "--version" || command === "-v" || command === "version" || parsed.flags.version === true) {
310
+ validateCommandFlags(parsed, command);
311
+ if (command === "version" || parsed.flags.version === true) {
317
312
  io.stdout.write(`workbench ${getCliVersion()}\n`);
318
313
  return 0;
319
314
  }
320
- if (!command || command === "help" || command === "--help" || command === "-h") {
315
+ if (command === "help") {
321
316
  const helpCommand = command === "help" ? optionalPositional(parsed, 1) : undefined;
322
- io.stdout.write(`${helpCommand ? commandHelp(helpCommand) : HELP}\n`);
317
+ io.stdout.write(`${parsed.flags.all === true ? HELP_ALL : helpCommand ? commandHelp(helpCommand) : HELP}\n`);
323
318
  return 0;
324
319
  }
325
320
  if (parsed.flags.help === true) {
326
- io.stdout.write(`${commandHelp(command)}\n`);
321
+ io.stdout.write(`${command ? commandHelp(command) : HELP}\n`);
327
322
  return 0;
328
323
  }
329
- validateCommandFlags(parsed, command);
324
+ if (!command) {
325
+ return await handleStatus(parsed, io);
326
+ }
330
327
  if (command === "login") {
331
328
  return await handleLogin(parsed, io);
332
329
  }
@@ -337,27 +334,17 @@ export async function runCli(argv, io = {
337
334
  return await handleInstall(parsed, io);
338
335
  }
339
336
  const core = await coreOptions(parsed);
340
- if (command === "init") {
337
+ if (command === "new") {
341
338
  const status = await initWorkbenchSkill({ dir: parsed.positionals[1] ?? dirFlag(parsed) });
342
- return output(status, parsed, io, () => `Initialized Workbench skill at ${status.root}.`);
339
+ return output(status, parsed, io, () => `Created Workbench skill at ${status.root}.\nnext: edit SKILL.md, then run workbench eval`);
343
340
  }
344
341
  if (command === "status") {
345
- const status = await workbenchStatusSnapshot(core);
346
- const auth = await workbenchCliAuthStatus();
347
- return emitResult("workbench.status.v1", {
348
- project: status.project,
349
- worktree: status.worktree,
350
- runs: status.runs,
351
- remotes: status.remotes,
352
- auth: auth,
353
- next: status.next,
354
- }, parsed, io, () => formatStatusSnapshot({ ...status, auth }));
355
- }
356
- if (command === "check") {
357
- const result = await checkWorkbenchSkill(core);
358
- return output(result, parsed, io, () => formatCheck(result));
342
+ return await handleStatus(parsed, io);
359
343
  }
360
344
  if (command === "eval") {
345
+ if (parsed.flags.cloud === true) {
346
+ return await handleCloudEval(parsed, io);
347
+ }
361
348
  const runs = await evalWorkbenchSkill({
362
349
  ...core,
363
350
  version: optionalPositional(parsed, 1),
@@ -371,21 +358,36 @@ export async function runCli(argv, io = {
371
358
  if (failedRuns.length > 0) {
372
359
  return emitEvalFailure(runs, failedRuns, artifactIds, parsed, io);
373
360
  }
374
- return output(runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])), parsed, io, () => runs.map(formatRun).join("\n"));
361
+ const deltas = await evalDeltas(core, runs);
362
+ const next = await evalSuccessNextCommand(core, runs);
363
+ return emitResult("workbench.cli.eval.v1", {
364
+ result: runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
365
+ deltas: deltas,
366
+ next: next,
367
+ }, parsed, io, () => [
368
+ runs.map(formatRun).join("\n"),
369
+ ...deltas.map(formatEvalDelta),
370
+ ...(next ? [`next: ${next}`] : []),
371
+ ].filter(Boolean).join("\n"));
375
372
  }
376
373
  if (command === "improve") {
374
+ if (parsed.flags.cloud === true) {
375
+ return await handleCloudImprove(parsed, io);
376
+ }
377
+ const improverAgent = await resolveLocalImproverAgent(parsed, core);
377
378
  const result = await improveWorkbenchSkill({
378
379
  ...core,
379
380
  version: optionalPositional(parsed, 1),
380
- skill: stringFlag(parsed, "skill"),
381
- agent: stringFlag(parsed, "agent"),
381
+ skill: stringFlag(parsed, "skills"),
382
+ agent: stringFlag(parsed, "agents"),
383
+ ...(improverAgent ? { improverAgent } : {}),
382
384
  budget: intFlag(parsed, "budget"),
383
385
  samples: intFlag(parsed, "samples"),
384
386
  });
385
387
  return output({
386
388
  ...result,
387
389
  version: versionSummary(result.version),
388
- }, parsed, io, () => formatImproveResult(result));
390
+ }, parsed, io, () => `${formatImproveResult(result)}\nnext: workbench eval`);
389
391
  }
390
392
  if (command === "compare") {
391
393
  const comparison = await compareWorkbench({
@@ -394,92 +396,30 @@ export async function runCli(argv, io = {
394
396
  skills: stringFlag(parsed, "skills"),
395
397
  agents: stringFlag(parsed, "agents"),
396
398
  });
397
- return output(comparison, parsed, io, () => formatComparison(comparison));
398
- }
399
- if (command === "versions") {
400
- const versions = await listWorkbenchVersions(core);
401
- return output(versions.map(versionSummary), parsed, io, () => versions.map(formatVersion).join("\n") || "No versions.");
399
+ return output(manifestOnly(comparison), parsed, io, () => formatComparison(comparison));
402
400
  }
403
401
  if (command === "switch") {
404
402
  const versionRef = requiredPositional(parsed, 1, "workbench switch requires VERSION.");
405
403
  const version = await switchWorkbenchVersion(versionRef, core);
406
- return output(versionSummary(version), parsed, io, () => `Switched to ${version.id}.`);
404
+ return output(versionSummary(version), parsed, io, () => `Switched to ${displayRef(version.id)}.`);
407
405
  }
408
406
  if (command === "diff") {
409
- const range = requiredPositional(parsed, 1, "workbench diff requires A..B.");
407
+ const range = optionalPositional(parsed, 1) ?? await defaultDiffRange(core);
410
408
  const diffs = await diffWorkbenchVersions(range, core);
411
409
  return output(diffs, parsed, io, () => diffs.map((entry) => `${entry.status}\t${entry.path}`).join("\n") || "No diff.");
412
410
  }
413
411
  if (command === "show") {
414
- const ref = requiredPositional(parsed, 1, "workbench show requires REF.");
415
- const session = await showLocalAgentSession(ref);
416
- if (session) {
417
- return output(session, parsed, io, () => formatSessionDetail(session));
418
- }
419
- const value = await showWorkbenchRef(ref, core);
420
- return output(value, parsed, io, () => formatShow(value));
421
- }
422
- if (command === "files") {
423
- const ref = requiredPositional(parsed, 1, "workbench files requires REF.");
424
- const files = await filesForWorkbenchRef(ref, core);
425
- return output(files.map(fileSummary), parsed, io, () => files.map((file) => file.path).join("\n") || "No files.");
426
- }
427
- if (command === "list") {
428
- return await handleList(parsed, io);
429
- }
430
- if (command === "trace") {
431
- const ref = optionalPositional(parsed, 1);
432
- if (!ref) {
433
- throw new WorkbenchCodedError("usage", "workbench trace requires RUN_ID, JOB_ID, or TRACE_ID.", {
434
- remediation: "Run workbench list runs --json or workbench list traces --json.",
435
- exitCode: 2,
436
- });
437
- }
438
- const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
439
- const run = snapshot.runs.find((entry) => entry.id === ref);
440
- const job = snapshot.jobs.find((entry) => entry.id === ref);
441
- const traces = run
442
- ? snapshot.traces.filter((trace) => run.traceIds.includes(trace.id))
443
- : job
444
- ? snapshot.traces.filter((trace) => job.traceIds.includes(trace.id))
445
- : snapshot.traces.filter((trace) => trace.id === ref);
446
- if (traces.length === 0) {
447
- const jobs = run
448
- ? snapshot.jobs.filter((entry) => entry.runId === run.id)
449
- : job ? [job] : [];
450
- const details = jobs.flatMap((entry) => {
451
- const detail = workbenchJobEvidenceForSnapshot(snapshot, {
452
- runId: entry.runId,
453
- jobId: entry.id,
454
- });
455
- return detail ? [detail] : [];
456
- }).filter((detail) => detail.executions.some((execution) => execution.sessions.length > 0 ||
457
- execution.trace.spans.length > 0 ||
458
- execution.trace.events.length > 0 ||
459
- execution.trace.summaries.length > 0));
460
- if (details.length > 0) {
461
- return output(details, parsed, io, () => details.map(formatTraceDetail).join("\n"));
462
- }
463
- throw new WorkbenchCodedError("ref_not_found", `Trace not found: ${ref}`, {
464
- remediation: "Run workbench list runs --json, workbench list jobs --json, or workbench list traces --json.",
465
- subject: { ref },
466
- exitCode: 1,
467
- });
468
- }
469
- return output(traces, parsed, io, () => traces.map(formatTrace).join("\n"));
412
+ return await handleShow(parsed, io);
413
+ }
414
+ if (command === "log") {
415
+ return await handleLog(parsed, io);
470
416
  }
471
417
  if (command === "agent") {
472
418
  return await handleAgent(parsed, io);
473
419
  }
474
- if (command === "skills") {
475
- return await handleSkills(parsed, io);
476
- }
477
420
  if (command === "case") {
478
421
  return await handleCase(parsed, io);
479
422
  }
480
- if (command === "remote") {
481
- return await handleRemote(parsed, io);
482
- }
483
423
  if (command === "sync") {
484
424
  const result = await syncWorkbenchRemote({
485
425
  ...core,
@@ -496,34 +436,54 @@ export async function runCli(argv, io = {
496
436
  }, parsed, io, () => `${result.dryRun ? "Would sync" : "Synced"} ${result.remote.name}: pushed ${result.pushed}, pulled ${result.pulled}${result.upToDate ? " (up to date)" : ""}.`);
497
437
  }
498
438
  if (command === "publish") {
439
+ const preview = parsed.flags["dry-run"] === true
440
+ ? await previewPublishWithDerivedRemote(parsed)
441
+ : undefined;
442
+ if (preview) {
443
+ return emitResult("workbench.cli.publish.v1", {
444
+ remote: preview.remote,
445
+ version: versionSummary(preview.version),
446
+ visibility: preview.visibility,
447
+ installHandle: preview.installHandle,
448
+ installUrl: preview.installUrl,
449
+ pinnedInstallUrl: preview.pinnedInstallUrl,
450
+ dryRun: true,
451
+ }, parsed, io, () => [
452
+ `Would publish ${displayRef(preview.version.id)} to remote ${preview.remote.name}.`,
453
+ `Visibility: ${preview.visibility}`,
454
+ `Install: ${preview.installUrl}`,
455
+ `Pinned: ${preview.pinnedInstallUrl}`,
456
+ `next: workbench install ${preview.installHandle}`,
457
+ ].join("\n"));
458
+ }
459
+ const remote = await ensurePublishRemote(parsed);
499
460
  const result = await publishWorkbenchVersion({
500
461
  ...core,
501
462
  version: optionalPositional(parsed, 1),
502
- remote: stringFlag(parsed, "remote"),
463
+ remote,
503
464
  dryRun: parsed.flags["dry-run"] === true,
504
- visibility: parsePublishVisibility(stringFlag(parsed, "visibility")),
465
+ visibility: parsePublishVisibilityFlags(parsed),
505
466
  });
506
467
  return emitResult("workbench.cli.publish.v1", {
507
468
  remote: result.remote,
508
469
  version: versionSummary(result.version),
509
470
  visibility: result.visibility,
471
+ installHandle: result.installHandle,
510
472
  installUrl: result.installUrl,
511
473
  pinnedInstallUrl: result.pinnedInstallUrl,
512
474
  ...(result.dryRun ? { dryRun: true } : {}),
513
475
  }, parsed, io, () => [
514
- `${result.dryRun ? "Would publish" : "Published"} ${result.version.id} to remote ${result.remote.name}.`,
476
+ `${result.dryRun ? "Would publish" : "Published"} ${displayRef(result.version.id)} to remote ${result.remote.name}.`,
515
477
  `Visibility: ${result.visibility}`,
516
478
  `Install: ${result.installUrl}`,
517
479
  `Pinned: ${result.pinnedInstallUrl}`,
480
+ `next: workbench install ${result.installHandle}`,
518
481
  ].join("\n"));
519
482
  }
520
- if (command === "auth") {
521
- return await handleAuth(parsed, io);
522
- }
523
483
  if (command === "open") {
524
484
  if (parsed.flags.json === true) {
525
485
  const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
526
- return output(snapshot, parsed, io, () => "Read-only Workbench inspection data is available with --json.");
486
+ return output(manifestOnly(snapshot), parsed, io, () => "Read-only Workbench inspection data is available with --json.");
527
487
  }
528
488
  // The browser server serves committed object state through a read-only
529
489
  // snapshot path, so long-running commands do not block page loads.
@@ -545,29 +505,107 @@ export async function runCli(argv, io = {
545
505
  return emitError(error, parsed, io);
546
506
  }
547
507
  }
548
- async function handleList(parsed, io) {
549
- const kind = requiredPositional(parsed, 1, "workbench list requires runs|jobs|traces|artifacts|sessions.");
550
- if (kind === "sessions") {
551
- const sessions = await listLocalAgentSessions();
552
- return output(sessions, parsed, io, () => sessions.map(formatSession).join("\n") || "No local sessions.");
508
+ async function handleStatus(parsed, io) {
509
+ const status = await workbenchStatusSnapshot(await coreOptions(parsed));
510
+ const auth = await workbenchCliAuthStatus();
511
+ const cliStatus = statusWithCausalNext(status, auth);
512
+ return emitResult("workbench.status.v1", {
513
+ project: cliStatus.project,
514
+ worktree: cliStatus.worktree,
515
+ runs: cliStatus.runs,
516
+ remotes: cliStatus.remotes,
517
+ auth: auth,
518
+ next: cliStatus.next,
519
+ }, parsed, io, () => formatStatusSnapshot({ ...cliStatus, auth }));
520
+ }
521
+ async function handleLog(parsed, io) {
522
+ if (parsed.flags.runs === true && parsed.flags.versions === true) {
523
+ throw new WorkbenchCodedError("usage", "workbench log accepts only one of --runs or --versions.", {
524
+ remediation: "Run workbench log --runs or workbench log --versions.",
525
+ exitCode: 2,
526
+ });
553
527
  }
554
- const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(await coreOptions(parsed));
555
- if (kind === "runs") {
556
- return output(snapshot.runs, parsed, io, () => snapshot.runs.map(formatRun).join("\n") || "No runs.");
528
+ if (parsed.positionals.length > 1) {
529
+ if (parsed.flags.runs === true) {
530
+ throw new WorkbenchUserError("--runs does not accept a value.");
531
+ }
532
+ if (parsed.flags.versions === true) {
533
+ throw new WorkbenchUserError("--versions does not accept a value.");
534
+ }
535
+ rejectExtraInput(parsed, {
536
+ maxPositionals: 1,
537
+ message: "workbench log does not accept refs or paths.",
538
+ remediation: "Run workbench log, workbench log --runs, or workbench log --versions.",
539
+ });
540
+ }
541
+ const snapshot = await createWorkbenchInspectionSnapshot(await coreOptions(parsed));
542
+ const includeRuns = parsed.flags.versions !== true;
543
+ const includeVersions = parsed.flags.runs !== true;
544
+ const entries = [
545
+ ...(includeVersions ? snapshot.versions.map((version) => ({
546
+ kind: "version",
547
+ id: version.id,
548
+ createdAt: version.createdAt,
549
+ message: version.message,
550
+ fileCount: version.files.length,
551
+ })) : []),
552
+ ...(includeRuns ? snapshot.runs.map((run) => ({
553
+ kind: "run",
554
+ id: run.id,
555
+ createdAt: run.createdAt,
556
+ status: run.status,
557
+ versionId: run.versionId,
558
+ skillName: run.skillName,
559
+ agentName: run.agentName,
560
+ ...(run.score !== undefined ? { score: run.score } : {}),
561
+ })) : []),
562
+ ].sort((left, right) => right.createdAt.localeCompare(left.createdAt));
563
+ return emitResult("workbench.cli.log.v1", {
564
+ entries: entries,
565
+ }, parsed, io, () => entries.map(formatLogEntry).join("\n") || "No history.");
566
+ }
567
+ async function handleShow(parsed, io) {
568
+ const ref = requiredPositional(parsed, 1, "workbench show requires REF.");
569
+ const session = await showLocalAgentSession(ref);
570
+ if (session) {
571
+ return output(session, parsed, io, () => formatSessionDetail(session));
572
+ }
573
+ const core = await coreOptions(parsed);
574
+ const [objectRef, requestedPath] = splitShowRef(ref);
575
+ if (requestedPath) {
576
+ const runOrJobFile = await fileForRunOrJobRef(core, objectRef, requestedPath);
577
+ if (runOrJobFile) {
578
+ return output(runOrJobFile, parsed, io, () => formatShow(runOrJobFile));
579
+ }
580
+ const value = await showWorkbenchRef(ref, core);
581
+ return output(value, parsed, io, () => formatShow(value));
557
582
  }
558
- if (kind === "jobs") {
559
- return output(snapshot.jobs, parsed, io, () => snapshot.jobs.map(formatJob).join("\n") || "No jobs.");
583
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
584
+ const version = snapshotVersionByRef(snapshot, objectRef);
585
+ if (version) {
586
+ return output(fileListing("version", version.id, version.files), parsed, io, () => formatFileListing("version", version.id, version.files));
560
587
  }
561
- if (kind === "traces") {
562
- return output(snapshot.traces.map(traceSummary), parsed, io, () => snapshot.traces.map(formatTrace).join("\n") || "No traces.");
588
+ const trace = snapshotObjectByRef(snapshot.traces, objectRef, "trace");
589
+ if (trace) {
590
+ return output(fileListing("trace", trace.id, trace.files), parsed, io, () => formatFileListing("trace", trace.id, trace.files));
563
591
  }
564
- if (kind === "artifacts") {
565
- return output(snapshot.artifacts.map(artifactSummary), parsed, io, () => snapshot.artifacts.map(formatArtifact).join("\n") || "No artifacts.");
592
+ const artifact = snapshotObjectByRef(snapshot.artifacts, objectRef, "artifact");
593
+ if (artifact) {
594
+ return output(fileListing("artifact", artifact.id, artifact.files), parsed, io, () => formatFileListing("artifact", artifact.id, artifact.files));
566
595
  }
567
- throw new WorkbenchUserError(`Unsupported list target: ${kind}`);
596
+ const details = evidenceDetailsForRunOrJob(snapshot, objectRef);
597
+ const evidenceFiles = evidenceFilesForRunOrJob(snapshot, objectRef);
598
+ if (details.length > 0 || evidenceFiles.length > 0) {
599
+ return output({
600
+ details: details,
601
+ files: evidenceFiles.map(fileSummary),
602
+ }, parsed, io, () => formatRunOrJobEvidence(details, evidenceFiles));
603
+ }
604
+ const value = await showWorkbenchRef(ref, core);
605
+ return output(value, parsed, io, () => formatShow(value));
568
606
  }
569
607
  async function handleAgent(parsed, io) {
570
- const subcommand = requiredPositional(parsed, 1, "workbench agent requires list|add|show|default|remove.");
608
+ const subcommand = requiredPositional(parsed, 1, "workbench agent requires list|add|rm.");
571
609
  if (subcommand === "list") {
572
610
  const agents = await listWorkbenchAgents(await coreOptions(parsed));
573
611
  return output(agents, parsed, io, () => agents.map(formatAgent).join("\n") || "No agents.");
@@ -587,173 +625,65 @@ async function handleAgent(parsed, io) {
587
625
  });
588
626
  return output(agent, parsed, io, () => `Added agent ${formatAgent(agent)}.`);
589
627
  }
590
- if (subcommand === "show") {
591
- const name = requiredPositional(parsed, 2, "workbench agent show requires NAME.");
592
- const agent = (await listWorkbenchAgents(await coreOptions(parsed))).find((entry) => entry.name === name);
593
- if (!agent) {
594
- throw new WorkbenchCodedError("ref_not_found", `Agent not found: ${name}`, {
595
- remediation: "Run workbench agent list.",
596
- subject: { agent: name },
597
- exitCode: 1,
598
- });
599
- }
600
- return output(agent, parsed, io, () => formatAgent(agent));
601
- }
602
- if (subcommand === "default") {
603
- const result = await setDefaultWorkbenchAgent(requiredPositional(parsed, 2, "workbench agent default requires NAME."), await coreOptions(parsed));
604
- return output(result, parsed, io, () => `Default agent: ${result.defaultAgent}`);
605
- }
606
- if (subcommand === "remove") {
607
- const result = await removeWorkbenchAgent(requiredPositional(parsed, 2, "workbench agent remove requires NAME."), await coreOptions(parsed));
628
+ if (subcommand === "rm") {
629
+ const result = await removeWorkbenchAgent(requiredPositional(parsed, 2, "workbench agent rm requires NAME."), await coreOptions(parsed));
608
630
  return output(result, parsed, io, () => `Removed agent ${result.removed}.`);
609
631
  }
610
632
  throw new WorkbenchUserError(`Unsupported agent command: ${subcommand}`);
611
633
  }
612
- async function handleSkills(parsed, io) {
613
- const subcommand = requiredPositional(parsed, 1, "workbench skills requires list.");
614
- if (subcommand !== "list") {
615
- throw new WorkbenchUserError(`Unsupported skills command: ${subcommand}`);
616
- }
617
- const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(await coreOptions(parsed));
618
- return output(snapshot.skillSources, parsed, io, () => snapshot.skillSources.map((source) => {
619
- const where = source.kind === "remote"
620
- ? `${source.from}${source.ref ? `#${source.ref}` : ""}`
621
- : source.kind === "none"
622
- ? "baseline:none"
623
- : source.path;
624
- return `${source.name}\t${source.kind}\t${where}\tincludes=${source.includes?.length ?? 0}`;
625
- }).join("\n") || "No skills.");
626
- }
627
634
  async function handleCase(parsed, io) {
628
- const subcommand = requiredPositional(parsed, 1, "workbench case requires list|add|show|remove.");
629
- if (subcommand === "list") {
630
- const cases = await listWorkbenchCases(await coreOptions(parsed));
631
- return output(cases, parsed, io, () => cases.map((entry) => `${entry.id}\t${entry.path}`).join("\n") || "No cases.");
632
- }
635
+ const subcommand = requiredPositional(parsed, 1, "workbench case requires add.");
633
636
  if (subcommand === "add") {
634
- const record = await addWorkbenchCase({ ...(await coreOptions(parsed)), fromTraceId: stringFlag(parsed, "from") });
635
- return output(record, parsed, io, () => `Added case ${record.id}.`);
636
- }
637
- if (subcommand === "show") {
638
- const record = await showWorkbenchCase(requiredPositional(parsed, 2, "workbench case show requires CASE_ID."), await coreOptions(parsed));
639
- return output(record, parsed, io, () => record.content);
640
- }
641
- if (subcommand === "remove") {
642
- const result = await removeWorkbenchCase(requiredPositional(parsed, 2, "workbench case remove requires CASE_ID."), await coreOptions(parsed));
643
- return output(result, parsed, io, () => `Removed case ${result.removed}.`);
644
- }
645
- throw new WorkbenchUserError(`Unsupported case command: ${subcommand}`);
646
- }
647
- async function handleRemote(parsed, io) {
648
- const subcommand = requiredPositional(parsed, 1, "workbench remote requires add|list|remove.");
649
- if (subcommand === "add") {
650
- const name = requiredFlag(parsed, {
651
- flag: "name",
652
- usage: "workbench remote add requires --name NAME.",
653
- remediation: "Run workbench remote add --name origin --url https://HOST/skills/OWNER/SKILL.",
654
- });
655
- const url = requiredFlag(parsed, {
656
- flag: "url",
657
- usage: "workbench remote add requires --url URL.",
658
- remediation: `Run workbench remote add --name ${name} --url https://HOST/skills/OWNER/SKILL.`,
659
- });
637
+ const core = await coreOptions(parsed);
638
+ const sourceRef = requiredPositional(parsed, 2, "workbench case add requires RUN_ID.");
660
639
  rejectExtraInput(parsed, {
661
- maxPositionals: 2,
662
- message: "workbench remote add accepts --name NAME and --url URL, not positional NAME or URL.",
663
- remediation: "Run workbench remote add --name origin --url https://HOST/skills/OWNER/SKILL.",
664
- });
665
- const result = await addWorkbenchRemote(name, url, {
666
- ...(await coreOptions(parsed)),
667
- replace: parsed.flags.replace === true,
668
- dryRun: parsed.flags["dry-run"] === true,
640
+ maxPositionals: 3,
641
+ message: "workbench case add accepts one RUN_ID argument.",
642
+ remediation: "Run workbench case add RUN_ID.",
669
643
  });
670
- return emitResult("workbench.cli.remote-add.v1", {
671
- remote: result.remote,
672
- operation: result.operation,
673
- ...(result.dryRun ? { dryRun: true } : {}),
674
- }, parsed, io, () => `${result.dryRun ? "Would update" : "Remote"} ${result.remote.name}: ${result.operation}\t${result.remote.kind}\t${result.remote.url}`);
675
- }
676
- if (subcommand === "list") {
677
- const remotes = await listWorkbenchRemotes(await coreOptions(parsed));
678
- return emitResult("workbench.cli.remote-list.v1", {
679
- remotes: remotes,
680
- }, parsed, io, () => remotes.map((remote) => `${remote.name}\t${remote.kind}\t${remote.url}`).join("\n") || "No remotes.");
681
- }
682
- if (subcommand === "remove") {
683
- const result = await removeWorkbenchRemote(requiredPositional(parsed, 2, "workbench remote remove requires NAME."), await coreOptions(parsed));
684
- return emitResult("workbench.cli.remote-remove.v1", {
685
- remote: result.remote,
686
- removed: result.removed,
687
- }, parsed, io, () => result.removed ? `Removed remote ${result.remote}.` : `Remote ${result.remote} was not configured.`);
688
- }
689
- throw new WorkbenchUserError(`Unsupported remote command: ${subcommand}`);
690
- }
691
- async function handleAuth(parsed, io) {
692
- const subcommand = optionalPositional(parsed, 1) ?? "status";
693
- if (subcommand === "status") {
694
- const targetRaw = optionalPositional(parsed, 2);
695
- const profile = authProfileFlag(parsed);
696
- const store = localWorkbenchAdapterAuthStore(adapterAuthStoreRoot());
697
- const cliAuth = await workbenchCliAuthStatus();
698
- if (targetRaw) {
699
- const status = await store.status(parseAuthTarget(targetRaw, profile));
700
- return emitResult("workbench.cli.auth-status.v1", {
701
- workbenchCloud: cliAuth.workbenchCloud,
702
- adapters: [authStatusRecordToJson(status)],
703
- }, parsed, io, () => [
704
- formatWorkbenchCloudAuthStatus(cliAuth.workbenchCloud),
705
- "Adapter auth:",
706
- formatAuthStatusRecord(status),
707
- ].join("\n"));
708
- }
709
- const statuses = await store.listStatus();
710
- const required = await requiredAgentAuthStatuses(parsed, statuses);
711
- return emitResult("workbench.cli.auth-status.v1", {
712
- workbenchCloud: cliAuth.workbenchCloud,
713
- adapters: cliAuth.adapters,
714
- required: required,
715
- }, parsed, io, () => formatAuthStatusList(cliAuth.workbenchCloud, statuses, required));
716
- }
717
- if (subcommand === "connect") {
718
- const targetRaw = requiredPositional(parsed, 2, "workbench auth connect requires ADAPTER[/SLOT].");
719
- const target = parseAuthTarget(targetRaw, authProfileFlag(parsed));
720
- const method = authMethod(parsed, target.adapterId);
721
- const bundle = await collectAdapterAuthBundle({
722
- target,
723
- method,
724
- profileRoot: path.resolve(stringFlag(parsed, "profile-root") ?? os.homedir()),
725
- });
726
- const saved = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).put(bundle);
727
- const remote = await uploadAdapterConnection(saved, parsed);
728
- return emitResult("workbench.cli.auth-connect.v1", {
729
- localAdapter: {
730
- adapter: saved.adapterId,
731
- ...(saved.slot ? { slot: saved.slot } : {}),
732
- profile: saved.profile,
733
- method: saved.method,
734
- status: saved.status,
735
- version: saved.version,
736
- updatedAt: saved.updatedAt,
737
- },
738
- workbenchCloud: remote,
739
- }, parsed, io, () => `Connected ${formatAuthTarget(saved)} ${saved.method} auth v${saved.version}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
740
- }
741
- if (subcommand === "disconnect") {
742
- const targetRaw = requiredPositional(parsed, 2, "workbench auth disconnect requires ADAPTER[/SLOT].");
743
- const target = parseAuthTarget(targetRaw, authProfileFlag(parsed));
744
- await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).disconnect(target);
745
- const remote = await deleteAdapterConnectionRemote(target, parsed);
746
- return emitResult("workbench.cli.auth-disconnect.v1", {
747
- localAdapter: {
748
- adapter: target.adapterId,
749
- ...(target.slot ? { slot: target.slot } : {}),
750
- profile: target.profile,
751
- status: "disconnected",
752
- },
753
- workbenchCloud: remote,
754
- }, parsed, io, () => `Disconnected ${formatAuthTarget(target)}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
755
- }
756
- throw new WorkbenchUserError(`Unsupported auth command: ${subcommand}`);
644
+ const record = await addWorkbenchCase({ ...core, fromTraceId: await traceIdForCaseSource(core, sourceRef) });
645
+ return output(record, parsed, io, () => `Added draft case ${record.id}. Edit .workbench/cases/${record.path}/case.yaml before using it as score evidence.`);
646
+ }
647
+ throw new WorkbenchUserError(`Unknown command: workbench case ${subcommand}`);
648
+ }
649
+ async function handleAdapterLogin(provider, parsed, io) {
650
+ const target = parseAuthTarget(provider, authProfileFlag(parsed));
651
+ const method = authMethod(parsed, target.adapterId);
652
+ const bundle = await collectAdapterAuthBundle({
653
+ target,
654
+ method,
655
+ profileRoot: path.resolve(stringFlag(parsed, "profile-root") ?? os.homedir()),
656
+ });
657
+ const saved = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).put(bundle);
658
+ const remote = await uploadAdapterConnection(saved, parsed);
659
+ return emitResult("workbench.cli.login.v1", {
660
+ provider: saved.adapterId,
661
+ localAdapter: {
662
+ adapter: saved.adapterId,
663
+ ...(saved.slot ? { slot: saved.slot } : {}),
664
+ profile: saved.profile,
665
+ method: saved.method,
666
+ status: saved.status,
667
+ version: saved.version,
668
+ updatedAt: saved.updatedAt,
669
+ },
670
+ workbenchCloud: remote,
671
+ }, parsed, io, () => `Connected ${formatAuthTarget(saved)} ${saved.method} auth v${saved.version}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
672
+ }
673
+ async function handleAdapterLogout(provider, parsed, io) {
674
+ const target = parseAuthTarget(provider, authProfileFlag(parsed));
675
+ await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).disconnect(target);
676
+ const remote = await deleteAdapterConnectionRemote(target, parsed);
677
+ return emitResult("workbench.cli.logout.v1", {
678
+ provider: target.adapterId,
679
+ localAdapter: {
680
+ adapter: target.adapterId,
681
+ ...(target.slot ? { slot: target.slot } : {}),
682
+ profile: target.profile,
683
+ status: "disconnected",
684
+ },
685
+ workbenchCloud: remote,
686
+ }, parsed, io, () => `Disconnected ${formatAuthTarget(target)}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
757
687
  }
758
688
  function getCliVersion() {
759
689
  const manifest = require("../package.json");
@@ -763,19 +693,17 @@ function commandHelp(command) {
763
693
  return COMMAND_HELP[command] ?? HELP;
764
694
  }
765
695
  function validateCommandFlags(parsed, command) {
766
- if (!command) {
767
- return;
768
- }
769
- const allowed = allowedFlagsForCommand(parsed, command);
696
+ const effectiveCommand = command ?? (parsed.flags.version === true ? "version" : "status");
697
+ const allowed = allowedFlagsForCommand(parsed, effectiveCommand);
770
698
  if (!allowed) {
771
699
  return;
772
700
  }
773
- const allowedSet = new Set(allowed);
701
+ const allowedSet = new Set(Object.keys(allowed));
774
702
  for (const [name, value] of Object.entries(parsed.flags)) {
775
- if (!allowedSet.has(name) && name !== "help" && name !== "version") {
776
- throw new WorkbenchUserError(`Unsupported flag --${name} for workbench ${command}.`);
703
+ if (!allowedSet.has(name)) {
704
+ throw new WorkbenchUserError(`Unsupported flag --${name} for workbench ${effectiveCommand}.`);
777
705
  }
778
- validateFlagValue(name, value, command === "install" && (name === "agent" || name === "skill"));
706
+ validateFlagValue(name, value, allowed[name]);
779
707
  }
780
708
  }
781
709
  function allowedFlagsForCommand(parsed, command) {
@@ -784,25 +712,12 @@ function allowedFlagsForCommand(parsed, command) {
784
712
  return COMMAND_FLAGS[command];
785
713
  }
786
714
  const subcommand = parsed.positionals[1] ?? subcommands.defaultSubcommand;
787
- return subcommand ? subcommands.flags[subcommand] ?? ["json"] : ["json"];
715
+ return subcommand ? subcommands.flags[subcommand] ?? { ...COMMON_FLAGS, ...HELP_FLAG } : { ...COMMON_FLAGS, ...HELP_FLAG };
788
716
  }
789
- function validateFlagValue(name, value, repeatString = false) {
790
- const kind = FLAG_DEFINITIONS[name];
717
+ function validateFlagValue(name, value, kind) {
791
718
  if (!kind) {
792
719
  return;
793
720
  }
794
- if (repeatString) {
795
- if (Array.isArray(value)) {
796
- if (value.some((entry) => !entry.trim())) {
797
- throw new WorkbenchUserError(`--${name} requires a non-empty value.`);
798
- }
799
- return;
800
- }
801
- if (typeof value === "string" && value.trim()) {
802
- return;
803
- }
804
- throw new WorkbenchUserError(`--${name} requires a non-empty value.`);
805
- }
806
721
  if (kind === "boolean") {
807
722
  if (value !== true) {
808
723
  throw new WorkbenchUserError(`--${name} does not accept a value.`);
@@ -826,15 +741,28 @@ function validateFlagValue(name, value, repeatString = false) {
826
741
  }
827
742
  }
828
743
  const CONFIG_SCHEMA = "workbench.cli.config.v1";
744
+ const DEFAULT_WORKBENCH_CLOUD_BASE_URL = "https://v2.workbench.ai";
829
745
  const API_REQUEST_MAX_ATTEMPTS = 3;
830
746
  const API_REQUEST_GZIP_THRESHOLD_BYTES = 1024 * 1024;
747
+ const CLOUD_RUN_TIMEOUT_MS = 30 * 60 * 1000;
748
+ const CLOUD_RUN_POLL_INTERVAL_MS = 3000;
831
749
  async function handleLogin(parsed, io) {
832
- if (parsed.positionals.length > 1) {
833
- throw new WorkbenchUserError("workbench login accepts no positional arguments.");
750
+ const provider = optionalPositional(parsed, 1);
751
+ if (provider) {
752
+ if (parsed.positionals.length > 2) {
753
+ throw new WorkbenchUserError("workbench login PROVIDER accepts only one provider argument.");
754
+ }
755
+ if (parsed.flags["start-only"] === true || parsed.flags.wait === true || parsed.flags.timeout !== undefined || parsed.flags["no-open"] === true) {
756
+ throw new WorkbenchCodedError("usage", "Workbench Cloud login flags do not apply to provider login.", {
757
+ remediation: `Run workbench login ${provider} --method ${authMethod(parsed, provider)}.`,
758
+ exitCode: 2,
759
+ });
760
+ }
761
+ return await handleAdapterLogin(provider, parsed, io);
834
762
  }
835
763
  if (parsed.flags["start-only"] === true && parsed.flags.wait === true) {
836
764
  throw new WorkbenchCodedError("usage", "workbench login accepts only one of --start-only or --wait.", {
837
- remediation: "Run workbench login --start-only or workbench login --wait --timeout 120.",
765
+ remediation: "Run workbench login --start-only or workbench login --wait.",
838
766
  exitCode: 2,
839
767
  });
840
768
  }
@@ -843,22 +771,17 @@ async function handleLogin(parsed, io) {
843
771
  const timeoutSeconds = intFlag(parsed, "timeout");
844
772
  if (startOnly && timeoutSeconds !== undefined) {
845
773
  throw new WorkbenchCodedError("usage", "workbench login --timeout only applies with --wait.", {
846
- remediation: "Run workbench login --start-only, then workbench login --wait --timeout 120.",
847
- exitCode: 2,
848
- });
849
- }
850
- if (waitOnly && timeoutSeconds === undefined) {
851
- throw new WorkbenchCodedError("usage", "workbench login --wait requires --timeout N.", {
852
- remediation: "Run workbench login --wait --timeout 120.",
774
+ remediation: "Run workbench login --start-only, then workbench login --wait.",
853
775
  exitCode: 2,
854
776
  });
855
777
  }
856
778
  const config = await loadConfig();
857
- const baseUrl = selectWorkbenchBaseUrl({
858
- explicitBaseUrl: stringFlag(parsed, "base-url"),
779
+ const explicitBaseUrl = stringFlag(parsed, "base-url");
780
+ const pending = waitOnly ? await readPendingDeviceAuthorization(explicitBaseUrl) : null;
781
+ const baseUrl = pending?.baseUrl ?? selectWorkbenchBaseUrl({
782
+ explicitBaseUrl,
859
783
  configBaseUrl: config.baseUrl,
860
784
  });
861
- const pending = waitOnly ? await readPendingDeviceAuthorization(baseUrl) : null;
862
785
  const record = pending ?? await startDeviceAuthorization(baseUrl);
863
786
  const freshAuthorization = pending === null;
864
787
  if (startOnly) {
@@ -873,8 +796,8 @@ async function handleLogin(parsed, io) {
873
796
  verificationUriComplete: record.verification_uri_complete,
874
797
  userCode: record.user_code,
875
798
  expiresAt: record.expiresAt,
876
- resume: "workbench login --wait --timeout 120",
877
- }, parsed, io, () => `Open ${record.verification_uri_complete}\nCode: ${record.user_code}\nResume: workbench login --wait --timeout 120`);
799
+ resume: "workbench login --wait",
800
+ }, parsed, io, () => `Open ${record.verification_uri_complete}\nCode: ${record.user_code}\nResume: workbench login --wait`);
878
801
  }
879
802
  await writePendingDeviceAuthorization(record);
880
803
  if (freshAuthorization && !parsed.flags.json) {
@@ -911,15 +834,16 @@ async function handleLogin(parsed, io) {
911
834
  }, parsed, io, () => `Workbench Cloud: authenticated${username ? ` as ${username}` : ""}\nWorkbench API: ${baseUrl}`);
912
835
  }
913
836
  async function handleLogout(parsed, io) {
914
- if (parsed.positionals.length > 1) {
915
- throw new WorkbenchUserError("workbench logout accepts no positional arguments.");
837
+ const provider = optionalPositional(parsed, 1);
838
+ if (provider) {
839
+ if (parsed.positionals.length > 2) {
840
+ throw new WorkbenchUserError("workbench logout PROVIDER accepts only one provider argument.");
841
+ }
842
+ return await handleAdapterLogout(provider, parsed, io);
916
843
  }
917
844
  const config = await loadConfig();
918
845
  const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
919
846
  const tokenPresent = Boolean(config.accessToken);
920
- if (tokenPresent && !baseUrl) {
921
- throw new WorkbenchUserError("Missing Workbench API URL. Set WORKBENCH_API_URL or run `workbench login --base-url URL`.");
922
- }
923
847
  let revoke = "skipped";
924
848
  if (config.accessToken && baseUrl) {
925
849
  try {
@@ -950,51 +874,33 @@ async function handleLogout(parsed, io) {
950
874
  `Logged out of Workbench${baseUrl ? ` (${baseUrl})` : ""}.`,
951
875
  `Token: ${tokenPresent ? "present" : "absent"}; revoke ${revoke}; config ${configRemoved ? "removed" : "unchanged"}.`,
952
876
  adapterAuthRetained
953
- ? "Local adapter auth records were retained; run workbench auth disconnect ADAPTER to remove them."
877
+ ? "Local adapter auth records were retained; run workbench logout PROVIDER to remove them."
954
878
  : "No local adapter auth records remain.",
955
879
  ].join("\n"));
956
880
  }
957
881
  async function handleInstall(parsed, io) {
958
- const source = requiredFlag(parsed, {
959
- flag: "source",
960
- usage: "workbench install requires --source SOURCE.",
961
- remediation: "Run workbench install --source https://HOST/skills/OWNER/SKILL --agent codex.",
962
- });
882
+ const sourceInput = requiredPositional(parsed, 1, "workbench install requires HANDLE_OR_URL.");
963
883
  rejectExtraInput(parsed, {
964
- maxPositionals: 1,
965
- message: "workbench install accepts --source SOURCE, not positional SOURCE.",
966
- remediation: "Run workbench install --source https://HOST/skills/OWNER/SKILL --agent codex.",
884
+ maxPositionals: 2,
885
+ message: "workbench install accepts one HANDLE_OR_URL argument.",
886
+ remediation: "Run workbench install OWNER/SKILL --to codex.",
967
887
  });
968
- if (parsed.flags.list !== true && stringsFlag(parsed, "agent").length === 0 && parsed.flags.local !== true) {
969
- throw new WorkbenchCodedError("install_target_required", "workbench install requires an explicit target.", {
970
- remediation: "Run workbench install --source SOURCE --agent codex, workbench install --source SOURCE --agent claude, or workbench install --source SOURCE --local.",
971
- exitCode: 2,
972
- });
973
- }
888
+ const source = await resolveWorkbenchInstallSourceInput(sourceInput);
974
889
  const workbenchSource = parseWorkbenchInstallSource(source);
975
890
  if (!workbenchSource) {
976
891
  throw new WorkbenchCodedError("usage", "workbench install requires a Workbench Cloud source URL.", {
977
- remediation: "Run workbench install --source https://HOST/skills/OWNER/SKILL --agent codex.",
892
+ remediation: "Run workbench install OWNER/SKILL --to codex.",
978
893
  exitCode: 2,
979
894
  });
980
895
  }
981
896
  const snapshot = await fetchWorkbenchInstallSourceSnapshot(workbenchSource, source);
982
897
  const sourceSummary = workbenchInstallSourceSummary(workbenchSource, snapshot);
983
- if (parsed.flags.list === true) {
984
- return emitResult("workbench.cli.install.v1", {
985
- source: sourceSummary,
986
- skills: [snapshot.name],
987
- fileCount: snapshot.files.length,
988
- targets: installTargetsToJson(supportedInstallTargets()),
989
- }, parsed, io, () => [
990
- `${snapshot.name}\t${snapshot.versionId}\tfiles=${snapshot.files.length}`,
991
- "Targets:",
992
- ...supportedInstallTargets().map((target) => ` ${target.agent}\t${target.destination}`),
993
- ].join("\n"));
994
- }
898
+ const config = await loadConfig();
899
+ const toTargets = stringsFlag(parsed, "to");
900
+ const selectedTargets = toTargets.length > 0 ? normalizeInstallTargetNames(toTargets) : await defaultInstallTargetNames(config);
995
901
  const targets = resolveInstallTargets({
996
- agents: stringsFlag(parsed, "agent"),
997
- local: parsed.flags.local === true,
902
+ agents: selectedTargets.filter((target) => target !== "local"),
903
+ local: selectedTargets.some((target) => target === "local"),
998
904
  skillName: snapshot.name,
999
905
  });
1000
906
  const result = await installSnapshotToTargets({
@@ -1003,6 +909,9 @@ async function handleInstall(parsed, io) {
1003
909
  overwrite: parsed.flags.yes === true,
1004
910
  dryRun: parsed.flags["dry-run"] === true,
1005
911
  });
912
+ if (toTargets.length > 0 && parsed.flags["dry-run"] !== true) {
913
+ await writeConfig({ ...config, installTargets: selectedTargets });
914
+ }
1006
915
  return emitResult("workbench.cli.install.v1", {
1007
916
  source: sourceSummary,
1008
917
  result: result.result,
@@ -1016,6 +925,327 @@ async function handleInstall(parsed, io) {
1016
925
  ...result.targets.map((target) => ` ${target.agent}\t${target.previous}\t${target.destination}`),
1017
926
  ].join("\n"));
1018
927
  }
928
+ async function handleCloudEval(parsed, io) {
929
+ const started = await startCloudExecution("eval", parsed);
930
+ const artifactIds = await artifactIdsByRunId(started.core, started.runs);
931
+ const failedRuns = started.runs.filter((run) => run.status === "failed" || run.status === "canceled");
932
+ if (failedRuns.length > 0) {
933
+ return emitEvalFailure(started.runs, failedRuns, artifactIds, parsed, io);
934
+ }
935
+ const deltas = await evalDeltas(started.core, started.runs);
936
+ const next = await evalSuccessNextCommand(started.core, started.runs);
937
+ return emitResult("workbench.cli.eval.v1", {
938
+ result: started.runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
939
+ deltas: deltas,
940
+ next: next,
941
+ cloud: cloudExecutionSummary(started),
942
+ }, parsed, io, () => [
943
+ `Completed hosted eval on ${started.remote.url}.`,
944
+ started.runs.map(formatRun).join("\n"),
945
+ ...deltas.map(formatEvalDelta),
946
+ ...(next ? [`next: ${next}`] : []),
947
+ ].filter(Boolean).join("\n"));
948
+ }
949
+ async function handleCloudImprove(parsed, io) {
950
+ const started = await startCloudExecution("improve", parsed);
951
+ const artifactIds = await artifactIdsByRunId(started.core, started.runs);
952
+ const failedRuns = started.runs.filter((run) => run.status === "failed" || run.status === "canceled");
953
+ if (failedRuns.length > 0) {
954
+ const first = failedRuns[0];
955
+ throw new WorkbenchCodedError("improve_failed", "Hosted improve failed; evidence was saved.", {
956
+ remediation: `Run workbench show ${first.id}.`,
957
+ subject: {
958
+ runIds: failedRuns.map((run) => run.id),
959
+ statuses: Object.fromEntries(failedRuns.map((run) => [run.id, run.status])),
960
+ },
961
+ exitCode: 1,
962
+ });
963
+ }
964
+ const switchedVersionId = await switchHostedImproveVersionIfPromoted(started);
965
+ const next = cloudImproveNextCommand(started.runs);
966
+ return emitResult("workbench.cli.improve.v1", {
967
+ result: started.runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
968
+ next: next,
969
+ cloud: cloudExecutionSummary(started),
970
+ ...(switchedVersionId ? { switchedVersionId } : {}),
971
+ }, parsed, io, () => [
972
+ `Completed hosted improve on ${started.remote.url}.`,
973
+ started.runs.map(formatRun).join("\n"),
974
+ ...(switchedVersionId ? [`Switched local source to ${displayRef(switchedVersionId)}.`] : []),
975
+ ...(next ? [`next: ${next}`] : []),
976
+ ].filter(Boolean).join("\n"));
977
+ }
978
+ async function defaultInstallTargetNames(config) {
979
+ if (config.installTargets && config.installTargets.length > 0) {
980
+ return config.installTargets;
981
+ }
982
+ const detected = [];
983
+ for (const target of supportedInstallTargets()) {
984
+ if (target.agent === "local") {
985
+ continue;
986
+ }
987
+ const home = path.dirname(path.dirname(target.destination));
988
+ if (await pathExists(home)) {
989
+ detected.push(target.agent);
990
+ }
991
+ }
992
+ return detected.length > 0 ? detected : ["local"];
993
+ }
994
+ function normalizeInstallTargetNames(values) {
995
+ const normalized = [];
996
+ for (const value of values) {
997
+ const target = value.trim().toLowerCase();
998
+ if (target !== "codex" && target !== "claude" && target !== "local") {
999
+ throw new WorkbenchCodedError("usage", `Unsupported install target: ${value}`, {
1000
+ remediation: "Use --to codex, --to claude, or --to local.",
1001
+ exitCode: 2,
1002
+ });
1003
+ }
1004
+ normalized.push(target);
1005
+ }
1006
+ return [...new Set(normalized)];
1007
+ }
1008
+ async function pathExists(filePath) {
1009
+ try {
1010
+ await fs.access(filePath);
1011
+ return true;
1012
+ }
1013
+ catch {
1014
+ return false;
1015
+ }
1016
+ }
1017
+ async function startCloudExecution(command, parsed) {
1018
+ const root = dirFlag(parsed) ?? process.cwd();
1019
+ const remote = await ensureCloudRemoteForExecution(root, parsed);
1020
+ const source = parseWorkbenchInstallSource(remote.url);
1021
+ if (!source) {
1022
+ throw new WorkbenchCodedError("remote_invalid_url", `Workbench remote is not a Cloud skill URL: ${remote.url}`, {
1023
+ remediation: "Run workbench publish to recreate the Workbench Cloud link.",
1024
+ subject: { remote: remote.name, url: remote.url },
1025
+ exitCode: 2,
1026
+ });
1027
+ }
1028
+ const token = await workbenchCloudToken({ baseUrl: source.baseUrl });
1029
+ if (!token) {
1030
+ throw new WorkbenchCodedError("auth_required", `workbench ${command} --cloud requires Workbench Cloud auth.`, {
1031
+ remediation: `Run workbench login --base-url ${source.baseUrl}.`,
1032
+ exitCode: 1,
1033
+ });
1034
+ }
1035
+ const core = { dir: root, authToken: token };
1036
+ const syncBefore = await syncWorkbenchRemote({ ...core, remote: remote.name });
1037
+ const startSnapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
1038
+ const skillId = await resolveCloudSkillId(source);
1039
+ const response = await apiRequest(`/api/workbench/skills/${encodeURIComponent(skillId)}${command === "improve" ? "/improve" : "/runs"}`, { method: "POST", body: cloudExecutionRequestBody(command, parsed) }, source.baseUrl);
1040
+ const runs = response.runs ?? [];
1041
+ if (runs.length === 0) {
1042
+ throw new WorkbenchCodedError("cloud_run_missing", `Workbench Cloud did not return a run for ${command}.`, {
1043
+ retryable: true,
1044
+ remediation: "Run workbench log --runs.",
1045
+ subject: { remote: remote.name, skillId },
1046
+ exitCode: 1,
1047
+ });
1048
+ }
1049
+ const initialSyncAfter = await syncWorkbenchRemote({ ...core, remote: remote.name });
1050
+ const completed = await waitForCloudRuns({
1051
+ core,
1052
+ remote,
1053
+ runs,
1054
+ initialSync: initialSyncAfter,
1055
+ });
1056
+ return {
1057
+ core,
1058
+ remote,
1059
+ skillId,
1060
+ runs: completed.runs,
1061
+ startVersionId: startSnapshot.status.currentVersionId ?? startSnapshot.refs.current,
1062
+ source,
1063
+ sync: {
1064
+ before: { pushed: syncBefore.pushed, pulled: syncBefore.pulled, upToDate: syncBefore.upToDate },
1065
+ after: { pushed: completed.sync.pushed, pulled: completed.sync.pulled, upToDate: completed.sync.upToDate },
1066
+ },
1067
+ };
1068
+ }
1069
+ async function waitForCloudRuns(input) {
1070
+ const runIds = input.runs
1071
+ .map((run) => run.id)
1072
+ .filter((id) => typeof id === "string" && id.length > 0);
1073
+ if (runIds.length === 0 || runIds.length !== input.runs.length) {
1074
+ throw new WorkbenchCodedError("cloud_run_missing", "Workbench Cloud did not return a run id.", {
1075
+ retryable: true,
1076
+ remediation: "Run workbench log --runs.",
1077
+ exitCode: 1,
1078
+ });
1079
+ }
1080
+ let sync = input.initialSync;
1081
+ const timeoutMs = positiveIntEnv("WORKBENCH_CLOUD_RUN_TIMEOUT_MS") ?? CLOUD_RUN_TIMEOUT_MS;
1082
+ const pollIntervalMs = positiveIntEnv("WORKBENCH_CLOUD_RUN_POLL_INTERVAL_MS") ?? CLOUD_RUN_POLL_INTERVAL_MS;
1083
+ const deadline = Date.now() + timeoutMs;
1084
+ while (true) {
1085
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(input.core);
1086
+ const runs = runIds
1087
+ .map((id) => snapshot.runs.find((entry) => entry.id === id))
1088
+ .filter((run) => Boolean(run));
1089
+ if (runs.length === runIds.length && runs.every(isTerminalRun)) {
1090
+ return { runs, sync };
1091
+ }
1092
+ if (Date.now() >= deadline) {
1093
+ throw new WorkbenchCodedError("cloud_run_pending", "Hosted Workbench run is still running.", {
1094
+ retryable: true,
1095
+ remediation: runIds[0] ? `Run workbench show ${runIds[0]}.` : "Run workbench log --runs.",
1096
+ subject: {
1097
+ runIds,
1098
+ statuses: Object.fromEntries(runs.map((run) => [run.id, run.status])),
1099
+ },
1100
+ exitCode: 1,
1101
+ });
1102
+ }
1103
+ await sleep(pollIntervalMs);
1104
+ sync = await syncWorkbenchRemote({ ...input.core, remote: input.remote.name });
1105
+ }
1106
+ }
1107
+ function isTerminalRun(run) {
1108
+ return run.status === "succeeded" || run.status === "failed" || run.status === "canceled";
1109
+ }
1110
+ async function switchHostedImproveVersionIfPromoted(started) {
1111
+ const outputVersionId = started.runs.find((run) => run.status === "succeeded" && run.outputVersionId)?.outputVersionId;
1112
+ if (!outputVersionId) {
1113
+ return undefined;
1114
+ }
1115
+ const refs = await fetchCloudObjectRefs(started);
1116
+ if (refs.current !== outputVersionId) {
1117
+ return undefined;
1118
+ }
1119
+ await listWorkbenchVersions(started.core);
1120
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(started.core);
1121
+ const currentVersionId = snapshot.status.currentVersionId ?? snapshot.refs.current;
1122
+ if (started.startVersionId && currentVersionId && currentVersionId !== started.startVersionId) {
1123
+ throw new WorkbenchCodedError("worktree_changed", "Local source changed while hosted improve was running; refusing to overwrite it.", {
1124
+ remediation: `Review workbench diff, then run workbench switch ${outputVersionId} when ready.`,
1125
+ subject: {
1126
+ startedFrom: started.startVersionId,
1127
+ current: currentVersionId,
1128
+ hostedVersion: outputVersionId,
1129
+ },
1130
+ exitCode: 1,
1131
+ });
1132
+ }
1133
+ const version = await switchWorkbenchVersion(outputVersionId, started.core);
1134
+ return version.id;
1135
+ }
1136
+ async function fetchCloudObjectRefs(started) {
1137
+ const response = await apiRequest(`/api/workbench/skills/${encodeURIComponent(started.skillId)}/objects`, {}, started.source.baseUrl);
1138
+ return response.objectPack?.refs ?? {};
1139
+ }
1140
+ async function ensureCloudRemoteForExecution(root, parsed) {
1141
+ const linked = await linkedCloudRemote(root);
1142
+ if (linked) {
1143
+ return linked;
1144
+ }
1145
+ const link = await cloudRemoteLinkTarget(root);
1146
+ const remote = await derivePublishCloudRemote(parsed, "workbench --cloud", link.name);
1147
+ const source = parseWorkbenchInstallSource(remote.url);
1148
+ if (!source) {
1149
+ throw new WorkbenchCodedError("remote_invalid_url", `Workbench remote is not a Cloud skill URL: ${remote.url}`, {
1150
+ remediation: "Run workbench publish to recreate the Workbench Cloud link.",
1151
+ subject: { remote: remote.name, url: remote.url },
1152
+ exitCode: 2,
1153
+ });
1154
+ }
1155
+ const token = await workbenchCloudToken({ baseUrl: source.baseUrl });
1156
+ if (!token) {
1157
+ throw new WorkbenchCodedError("auth_required", "workbench --cloud requires Workbench Cloud auth.", {
1158
+ remediation: `Run workbench login --base-url ${source.baseUrl}.`,
1159
+ exitCode: 1,
1160
+ });
1161
+ }
1162
+ const result = await addWorkbenchRemote(remote.name, remote.url, {
1163
+ dir: root,
1164
+ authToken: token,
1165
+ replace: link.replace,
1166
+ });
1167
+ return result.remote;
1168
+ }
1169
+ async function linkedCloudRemote(root) {
1170
+ return preferredCloudRemote(await inspectionRemotes(root)) ?? null;
1171
+ }
1172
+ async function inspectionRemotes(root) {
1173
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot({ dir: root }).catch((error) => {
1174
+ if (error instanceof WorkbenchCodedError || error instanceof WorkbenchUserError) {
1175
+ return null;
1176
+ }
1177
+ throw error;
1178
+ });
1179
+ return snapshot?.remotes ?? [];
1180
+ }
1181
+ async function cloudRemoteLinkTarget(root) {
1182
+ return cloudRemoteLinkTargetFromRemotes(await inspectionRemotes(root));
1183
+ }
1184
+ function cloudRemoteLinkTargetFromRemotes(remotes) {
1185
+ const existing = preferredCloudRemote(remotes);
1186
+ if (existing) {
1187
+ return { name: existing.name, replace: true, existing };
1188
+ }
1189
+ return { name: availableCloudRemoteName(remotes), replace: false };
1190
+ }
1191
+ function preferredCloudRemote(remotes) {
1192
+ const cloudRemotes = remotes.filter((remote) => remote.kind === "workbench-cloud");
1193
+ return cloudRemotes.find((remote) => remote.name === "cloud") ?? cloudRemotes[0];
1194
+ }
1195
+ function availableCloudRemoteName(remotes) {
1196
+ const names = new Set(remotes.map((remote) => remote.name));
1197
+ if (!names.has("cloud")) {
1198
+ return "cloud";
1199
+ }
1200
+ for (let index = 1;; index += 1) {
1201
+ const name = `cloud-${index}`;
1202
+ if (!names.has(name)) {
1203
+ return name;
1204
+ }
1205
+ }
1206
+ }
1207
+ async function resolveCloudSkillId(source) {
1208
+ const listed = await apiRequest("/api/workbench/skills", {}, source.baseUrl);
1209
+ const skill = listed.skills?.find((entry) => entry.ownerSlug === source.owner && entry.name === source.skill);
1210
+ if (!skill?.id) {
1211
+ throw new WorkbenchCodedError("remote_not_found", `Workbench Cloud skill not found: ${source.owner}/${source.skill}`, {
1212
+ remediation: "Run workbench publish.",
1213
+ subject: { owner: source.owner, skill: source.skill },
1214
+ exitCode: 1,
1215
+ });
1216
+ }
1217
+ return skill.id;
1218
+ }
1219
+ function cloudExecutionRequestBody(command, parsed) {
1220
+ return {
1221
+ version: optionalPositional(parsed, 1),
1222
+ skill: stringFlag(parsed, "skills"),
1223
+ agent: stringFlag(parsed, "agents"),
1224
+ samples: intFlag(parsed, "samples"),
1225
+ ...(command === "improve" ? { budget: intFlag(parsed, "budget") } : {}),
1226
+ };
1227
+ }
1228
+ function cloudImproveNextCommand(runs) {
1229
+ return cloudExecutionNextCommand(runs, "workbench eval");
1230
+ }
1231
+ function cloudExecutionNextCommand(runs, successCommand) {
1232
+ const first = runs[0];
1233
+ if (!first) {
1234
+ return "workbench log --runs";
1235
+ }
1236
+ if (first.status === "running" || first.status === "failed" || first.status === "canceled") {
1237
+ return `workbench show ${displayRef(first.id)}`;
1238
+ }
1239
+ return successCommand;
1240
+ }
1241
+ function cloudExecutionSummary(started) {
1242
+ return {
1243
+ remote: started.remote.name,
1244
+ url: started.remote.url,
1245
+ skillId: started.skillId,
1246
+ sync: started.sync,
1247
+ };
1248
+ }
1019
1249
  function workbenchInstallSourceSummary(source, snapshot) {
1020
1250
  const installUrl = `${source.baseUrl}/skills/${encodeURIComponent(source.owner)}/${encodeURIComponent(source.skill)}`;
1021
1251
  return {
@@ -1089,12 +1319,13 @@ async function fetchWorkbenchInstallSourceSnapshot(source, displaySource) {
1089
1319
  throw new WorkbenchCodedError("auth_required", token
1090
1320
  ? `Workbench Cloud rejected the provided token while installing ${displaySource}.`
1091
1321
  : `Authentication is required to install ${displaySource}.`, {
1092
- remediation: `Run workbench login --base-url ${source.baseUrl}.`,
1322
+ remediation: "Run workbench login.",
1093
1323
  exitCode: 1,
1094
1324
  });
1095
1325
  }
1096
1326
  if (!response.ok) {
1097
- throw new WorkbenchCodedError("install_failed", `Unable to download Workbench source ${displaySource}: ${response.status} ${readResponseError(text) ?? response.statusText}`, {
1327
+ const excerpt = readResponseError(text);
1328
+ throw new WorkbenchCodedError("install_failed", `Unable to download Workbench source ${displaySource}: ${response.status}${excerpt ? ` ${excerpt}` : response.statusText ? ` ${response.statusText}` : ""}`, {
1098
1329
  subject: { source: displaySource, status: response.status },
1099
1330
  exitCode: 1,
1100
1331
  });
@@ -1175,6 +1406,7 @@ async function loadConfig() {
1175
1406
  ...(typeof parsed.baseUrl === "string" ? { baseUrl: normalizeBaseUrl(parsed.baseUrl) } : {}),
1176
1407
  ...(typeof parsed.accessToken === "string" ? { accessToken: parsed.accessToken } : {}),
1177
1408
  ...(typeof parsed.username === "string" ? { username: parsed.username } : {}),
1409
+ ...(Array.isArray(parsed.installTargets) ? { installTargets: normalizeInstallTargetNames(parsed.installTargets.flatMap((entry) => typeof entry === "string" ? [entry] : [])) } : {}),
1178
1410
  };
1179
1411
  }
1180
1412
  // Single resolver for the Workbench Cloud token used by every authenticated
@@ -1214,18 +1446,15 @@ function deviceAuthPath() {
1214
1446
  return process.env.WORKBENCH_DEVICE_AUTH?.trim() || path.join(path.dirname(configPath()), "device-auth.json");
1215
1447
  }
1216
1448
  function selectWorkbenchBaseUrl(input = {}) {
1217
- const baseUrl = optionalWorkbenchBaseUrl(input);
1218
- if (!baseUrl) {
1219
- throw new WorkbenchUserError("Missing Workbench API URL. Pass --base-url URL, set WORKBENCH_API_URL, or run `workbench login --base-url URL`.");
1220
- }
1221
- return baseUrl;
1449
+ return optionalWorkbenchBaseUrl(input);
1222
1450
  }
1223
1451
  function optionalWorkbenchBaseUrl(input = {}) {
1224
1452
  const value = input.explicitBaseUrl ??
1225
1453
  input.originBaseUrl ??
1226
1454
  process.env.WORKBENCH_API_URL ??
1227
- input.configBaseUrl;
1228
- return value ? normalizeBaseUrl(value) : undefined;
1455
+ input.configBaseUrl ??
1456
+ DEFAULT_WORKBENCH_CLOUD_BASE_URL;
1457
+ return normalizeBaseUrl(value);
1229
1458
  }
1230
1459
  function normalizeBaseUrl(value) {
1231
1460
  return value.trim().replace(/\/+$/u, "");
@@ -1243,7 +1472,8 @@ async function requestDeviceAuthorization(baseUrl) {
1243
1472
  });
1244
1473
  }
1245
1474
  if (!response.ok) {
1246
- throw new WorkbenchCodedError("login_denied", `Device login failed: ${readResponseError(text) ?? response.statusText}`, {
1475
+ const excerpt = readResponseError(text);
1476
+ throw new WorkbenchCodedError("login_denied", `Device login failed: ${response.status}${excerpt ? ` ${excerpt}` : response.statusText ? ` ${response.statusText}` : ""}`, {
1247
1477
  exitCode: 1,
1248
1478
  });
1249
1479
  }
@@ -1294,7 +1524,7 @@ async function pollDeviceToken(baseUrl, authorization, timeoutSeconds) {
1294
1524
  }
1295
1525
  throw new WorkbenchCodedError("login_pending", "Device login is still waiting for browser authorization.", {
1296
1526
  retryable: true,
1297
- remediation: "Authorize the device in the browser, then run workbench login --wait --timeout 120.",
1527
+ remediation: "Authorize the device in the browser, then run workbench login --wait.",
1298
1528
  subject: {
1299
1529
  retryAfterSeconds: Math.max(1, Math.ceil(intervalMs / 1000)),
1300
1530
  verificationUri: authorization.verification_uri,
@@ -1318,7 +1548,8 @@ async function fetchWorkbenchUsername(baseUrl, accessToken) {
1318
1548
  }
1319
1549
  async function readPendingDeviceAuthorization(baseUrl) {
1320
1550
  const record = await readDeviceAuthorizationJson(deviceAuthPath());
1321
- if (!record || record.baseUrl !== baseUrl || Date.parse(record.expiresAt) <= Date.now()) {
1551
+ const expectedBaseUrl = baseUrl ? normalizeBaseUrl(baseUrl) : undefined;
1552
+ if (!record || (expectedBaseUrl && record.baseUrl !== expectedBaseUrl) || Date.parse(record.expiresAt) <= Date.now()) {
1322
1553
  return null;
1323
1554
  }
1324
1555
  return record;
@@ -1408,7 +1639,8 @@ async function apiRequest(apiPath, options = {}, baseUrlOverride) {
1408
1639
  }
1409
1640
  throw requestError;
1410
1641
  }
1411
- const requestError = new WorkbenchApiRequestError(response.status, readResponseError(text) ?? `Request failed with status ${response.status}${response.statusText ? ` ${response.statusText}` : ""}.`, text);
1642
+ const excerpt = readResponseError(text);
1643
+ const requestError = new WorkbenchApiRequestError(response.status, `Request failed with status ${response.status}${response.statusText ? ` ${response.statusText}` : ""}${excerpt ? `: ${excerpt}` : ""}.`, text);
1412
1644
  lastError = requestError;
1413
1645
  if (canRetry && attempt < API_REQUEST_MAX_ATTEMPTS && isTransientApiRequestError(requestError)) {
1414
1646
  await sleep(250 * attempt);
@@ -1428,8 +1660,11 @@ function encodeJsonRequestBody(body) {
1428
1660
  if (Buffer.byteLength(text) < API_REQUEST_GZIP_THRESHOLD_BYTES) {
1429
1661
  return { body: text, headers: { "content-type": "application/json" } };
1430
1662
  }
1663
+ const compressed = gzipSync(text);
1664
+ const compressedBody = new ArrayBuffer(compressed.byteLength);
1665
+ new Uint8Array(compressedBody).set(compressed);
1431
1666
  return {
1432
- body: gzipSync(text),
1667
+ body: compressedBody,
1433
1668
  headers: {
1434
1669
  "content-encoding": "gzip",
1435
1670
  "content-type": "application/json",
@@ -1498,11 +1733,21 @@ function readResponseError(text) {
1498
1733
  const parsed = JSON.parse(text);
1499
1734
  const record = asRecord(parsed);
1500
1735
  const error = record?.error ?? record?.message;
1501
- return typeof error === "string" && error.trim() ? error : null;
1736
+ return typeof error === "string" && error.trim() ? oneLineExcerpt(error) : null;
1502
1737
  }
1503
1738
  catch {
1504
- return text.trim() || null;
1739
+ if (/<(?:!doctype|html|head|body)\b/iu.test(text)) {
1740
+ return null;
1741
+ }
1742
+ return oneLineExcerpt(text);
1743
+ }
1744
+ }
1745
+ function oneLineExcerpt(text) {
1746
+ const line = text.replace(/\s+/gu, " ").trim();
1747
+ if (!line) {
1748
+ return null;
1505
1749
  }
1750
+ return line.length > 180 ? `${line.slice(0, 177)}...` : line;
1506
1751
  }
1507
1752
  function parseWorkbenchCloudErrorBody(text) {
1508
1753
  try {
@@ -1535,6 +1780,14 @@ function errorMessage(error) {
1535
1780
  function sleep(ms) {
1536
1781
  return new Promise((resolve) => setTimeout(resolve, ms));
1537
1782
  }
1783
+ function positiveIntEnv(name) {
1784
+ const raw = process.env[name]?.trim();
1785
+ if (!raw) {
1786
+ return undefined;
1787
+ }
1788
+ const value = Number(raw);
1789
+ return Number.isSafeInteger(value) && value > 0 ? value : undefined;
1790
+ }
1538
1791
  async function openBrowser(url) {
1539
1792
  const command = process.platform === "darwin"
1540
1793
  ? "open"
@@ -1870,6 +2123,17 @@ function parseArgs(argv) {
1870
2123
  addFlag(flags, "version", true);
1871
2124
  continue;
1872
2125
  }
2126
+ if (arg === "-n") {
2127
+ const value = argv[index + 1];
2128
+ if (value && !value.startsWith("-")) {
2129
+ index += 1;
2130
+ addFlag(flags, "samples", value);
2131
+ }
2132
+ else {
2133
+ addFlag(flags, "samples", true);
2134
+ }
2135
+ continue;
2136
+ }
1873
2137
  if (!arg.startsWith("--") || arg === "--") {
1874
2138
  positionals.push(arg);
1875
2139
  continue;
@@ -1877,7 +2141,9 @@ function parseArgs(argv) {
1877
2141
  const eq = arg.indexOf("=");
1878
2142
  const name = eq === -1 ? arg.slice(2) : arg.slice(2, eq);
1879
2143
  const value = eq === -1 ? argv[index + 1] : arg.slice(eq + 1);
1880
- if (eq === -1 && BOOLEAN_FLAGS.has(name)) {
2144
+ const flagSpec = flagSpecForParsedPrefix(positionals, flags);
2145
+ const kind = flagSpec?.[name];
2146
+ if (eq === -1 && kind === "boolean") {
1881
2147
  addFlag(flags, name, true);
1882
2148
  }
1883
2149
  else if (eq === -1 && value && !value.startsWith("-")) {
@@ -1890,8 +2156,12 @@ function parseArgs(argv) {
1890
2156
  }
1891
2157
  return { positionals, flags };
1892
2158
  }
2159
+ function flagSpecForParsedPrefix(positionals, flags) {
2160
+ const command = positionals[0] ?? (flags.version === true ? "version" : "status");
2161
+ return allowedFlagsForCommand({ positionals: [...positionals], flags: {} }, command);
2162
+ }
1893
2163
  function addFlag(flags, name, value) {
1894
- if (name === "with") {
2164
+ if (name === "with" || name === "to") {
1895
2165
  const existing = flags[name];
1896
2166
  flags[name] = Array.isArray(existing)
1897
2167
  ? [...existing, String(value)]
@@ -1900,15 +2170,6 @@ function addFlag(flags, name, value) {
1900
2170
  : [String(existing), String(value)];
1901
2171
  return;
1902
2172
  }
1903
- if (name === "agent" || name === "skill") {
1904
- const existing = flags[name];
1905
- flags[name] = Array.isArray(existing)
1906
- ? [...existing, String(value)]
1907
- : existing === undefined
1908
- ? String(value)
1909
- : [String(existing), String(value)];
1910
- return;
1911
- }
1912
2173
  flags[name] = value;
1913
2174
  }
1914
2175
  function dirFlag(parsed) {
@@ -1972,14 +2233,154 @@ function rejectExtraInput(parsed, input) {
1972
2233
  exitCode: 2,
1973
2234
  });
1974
2235
  }
1975
- function parsePublishVisibility(value) {
1976
- if (value === undefined) {
1977
- return undefined;
2236
+ async function defaultDiffRange(core) {
2237
+ await listWorkbenchVersions(core);
2238
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
2239
+ const currentId = snapshot.status.currentVersionId ?? snapshot.refs.current;
2240
+ const current = snapshot.versions.find((version) => version.id === currentId);
2241
+ if (!current) {
2242
+ throw new WorkbenchCodedError("version_not_found", "Current Workbench version was not found.", {
2243
+ remediation: "Run workbench log --versions.",
2244
+ exitCode: 1,
2245
+ });
1978
2246
  }
1979
- if (value === "private" || value === "internal" || value === "public") {
1980
- return value;
2247
+ const parent = current.parentIds[0];
2248
+ return parent ? `${parent}..${current.id}` : `${current.id}..${current.id}`;
2249
+ }
2250
+ function parsePublishVisibilityFlags(parsed) {
2251
+ const selected = [
2252
+ parsed.flags.private === true ? "private" : undefined,
2253
+ parsed.flags.team === true ? "internal" : undefined,
2254
+ parsed.flags.public === true ? "public" : undefined,
2255
+ ].filter((value) => Boolean(value));
2256
+ if (selected.length > 1) {
2257
+ throw new WorkbenchCodedError("usage", "workbench publish accepts only one visibility flag.", {
2258
+ remediation: "Run workbench publish --private, workbench publish --team, or workbench publish --public.",
2259
+ exitCode: 2,
2260
+ });
2261
+ }
2262
+ return selected[0];
2263
+ }
2264
+ async function previewPublishWithDerivedRemote(parsed) {
2265
+ const root = path.resolve(dirFlag(parsed) ?? process.cwd());
2266
+ const reconciledSnapshot = await createWorkbenchReadOnlyInspectionSnapshot({ dir: root });
2267
+ const link = cloudRemoteLinkTargetFromRemotes(reconciledSnapshot.remotes);
2268
+ const remote = stringFlag(parsed, "as") || !link.existing
2269
+ ? await derivePublishCloudRemote(parsed, "workbench publish", link.name)
2270
+ : link.existing;
2271
+ const requestedVersion = optionalPositional(parsed, 1);
2272
+ const version = requestedVersion && requestedVersion !== "current"
2273
+ ? snapshotVersionByRef(reconciledSnapshot, requestedVersion)
2274
+ : snapshotVersionByRef(reconciledSnapshot, reconciledSnapshot.status.currentVersionId ?? reconciledSnapshot.refs.current ?? "");
2275
+ if (!version) {
2276
+ throw new WorkbenchCodedError("version_not_found", `Version not found: ${requestedVersion ?? "current"}`, {
2277
+ remediation: "Run workbench log --versions.",
2278
+ subject: { version: requestedVersion ?? "current" },
2279
+ exitCode: 1,
2280
+ });
1981
2281
  }
1982
- throw new WorkbenchUserError("workbench publish --visibility must be private, internal, or public.");
2282
+ return {
2283
+ remote,
2284
+ version,
2285
+ visibility: parsePublishVisibilityFlags(parsed) ?? "private",
2286
+ installHandle: installHandleFromCloudRemote(remote),
2287
+ installUrl: remote.url,
2288
+ pinnedInstallUrl: `${remote.url}/releases/${encodeURIComponent(version.id)}`,
2289
+ };
2290
+ }
2291
+ async function ensurePublishRemote(parsed) {
2292
+ const core = await coreOptions(parsed);
2293
+ const root = path.resolve(dirFlag(parsed) ?? process.cwd());
2294
+ const link = await cloudRemoteLinkTarget(root);
2295
+ const override = stringFlag(parsed, "as");
2296
+ if (override) {
2297
+ const remote = await derivePublishCloudRemote(parsed, "workbench publish", link.name);
2298
+ const result = await addWorkbenchRemote(remote.name, remote.url, { ...core, replace: link.replace });
2299
+ return result.remote.name;
2300
+ }
2301
+ if (link.existing) {
2302
+ return link.existing.name;
2303
+ }
2304
+ const remote = await derivePublishCloudRemote(parsed, "workbench publish", link.name);
2305
+ const result = await addWorkbenchRemote(remote.name, remote.url, core);
2306
+ return result.remote.name;
2307
+ }
2308
+ async function derivePublishCloudRemote(parsed, action = "workbench publish", name = "cloud") {
2309
+ const config = await loadConfig();
2310
+ const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl }) ?? DEFAULT_WORKBENCH_CLOUD_BASE_URL;
2311
+ const override = stringFlag(parsed, "as");
2312
+ const handle = override ? parseOwnerSkillHandle(override) : derivedOwnerSkillHandle(parsed, config, action);
2313
+ const url = `${baseUrl}/skills/${encodeURIComponent(handle.owner)}/${encodeURIComponent(handle.skill)}`;
2314
+ return { name, kind: "workbench-cloud", url };
2315
+ }
2316
+ function installHandleFromCloudRemote(remote) {
2317
+ const source = parseWorkbenchInstallSource(remote.url);
2318
+ if (!source) {
2319
+ throw new WorkbenchCodedError("remote_invalid_url", `Workbench remote is not a Cloud skill URL: ${remote.url}`, {
2320
+ remediation: "Run workbench publish to recreate the Workbench Cloud link.",
2321
+ subject: { remote: remote.name, url: remote.url },
2322
+ exitCode: 2,
2323
+ });
2324
+ }
2325
+ return `${source.owner}/${source.skill}`;
2326
+ }
2327
+ function parseOwnerSkillHandle(input) {
2328
+ const handle = normalizedOwnerSkillHandle(input);
2329
+ if (!handle) {
2330
+ throw new WorkbenchCodedError("usage", "workbench publish --as expects OWNER/SKILL.", {
2331
+ remediation: "Run workbench publish --as OWNER/SKILL.",
2332
+ exitCode: 2,
2333
+ });
2334
+ }
2335
+ return handle;
2336
+ }
2337
+ function derivedOwnerSkillHandle(parsed, config, action) {
2338
+ const owner = config.username?.trim();
2339
+ if (!owner) {
2340
+ throw new WorkbenchCodedError("auth_required", `${action} needs a logged-in Workbench Cloud username before it can derive OWNER/SKILL.`, {
2341
+ remediation: "Run workbench login.",
2342
+ exitCode: 1,
2343
+ });
2344
+ }
2345
+ const root = path.resolve(dirFlag(parsed) ?? process.cwd());
2346
+ const handle = normalizeOwnerSkillHandle(owner, path.basename(root));
2347
+ if (!handle.owner || !handle.skill) {
2348
+ throw new WorkbenchCodedError("usage", `${action} could not derive a valid OWNER/SKILL handle.`, {
2349
+ remediation: `Run ${action} --as OWNER/SKILL.`,
2350
+ subject: { owner, skill: path.basename(root) },
2351
+ exitCode: 2,
2352
+ });
2353
+ }
2354
+ return handle;
2355
+ }
2356
+ async function resolveWorkbenchInstallSourceInput(input) {
2357
+ if (/^https?:\/\//u.test(input)) {
2358
+ return input;
2359
+ }
2360
+ const handle = normalizedOwnerSkillHandle(input);
2361
+ if (!handle) {
2362
+ throw new WorkbenchCodedError("usage", "workbench install expects OWNER/SKILL or a Workbench Cloud skill URL.", {
2363
+ remediation: "Run workbench install OWNER/SKILL --to codex.",
2364
+ exitCode: 2,
2365
+ });
2366
+ }
2367
+ const config = await loadConfig();
2368
+ const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl }) ?? DEFAULT_WORKBENCH_CLOUD_BASE_URL;
2369
+ return `${baseUrl}/skills/${encodeURIComponent(handle.owner)}/${encodeURIComponent(handle.skill)}`;
2370
+ }
2371
+ function normalizedOwnerSkillHandle(value) {
2372
+ const parts = value.trim().split("/");
2373
+ if (parts.length !== 2) {
2374
+ return null;
2375
+ }
2376
+ const handle = normalizeOwnerSkillHandle(parts[0] ?? "", parts[1] ?? "");
2377
+ return handle.owner && handle.skill ? handle : null;
2378
+ }
2379
+ function normalizeOwnerSkillHandle(owner, skill) {
2380
+ return {
2381
+ owner: normalizeWorkbenchSkillName(owner),
2382
+ skill: normalizeWorkbenchSkillName(skill),
2383
+ };
1983
2384
  }
1984
2385
  function parseWithFlags(parsed) {
1985
2386
  const raw = parsed.flags.with;
@@ -2021,7 +2422,7 @@ async function artifactIdsByRunId(core, runs) {
2021
2422
  return byRun;
2022
2423
  }
2023
2424
  function emitEvalFailure(runs, failedRuns, artifactIds, parsed, io) {
2024
- const nextCommands = evalFailureNextCommands(failedRuns);
2425
+ const next = evalFailureNextCommand(failedRuns);
2025
2426
  if (parsed.flags.json === true) {
2026
2427
  io.stdout.write(`${JSON.stringify({
2027
2428
  schema: "workbench.cli.eval.v1",
@@ -2032,14 +2433,14 @@ function emitEvalFailure(runs, failedRuns, artifactIds, parsed, io) {
2032
2433
  evidenceSaved: true,
2033
2434
  runs: runs.map((run) => runFailureSummary(run, artifactIds.get(run.id) ?? [])),
2034
2435
  failedRuns: failedRuns.map((run) => runFailureSummary(run, artifactIds.get(run.id) ?? [])),
2035
- nextCommands,
2436
+ next,
2036
2437
  }, null, 2)}\n`);
2037
2438
  return 1;
2038
2439
  }
2039
2440
  io.stdout.write([
2040
2441
  "Eval failed; evidence was saved.",
2041
2442
  ...failedRuns.map(formatRun),
2042
- ...(nextCommands.length > 0 ? ["next:", ...nextCommands.map((command) => ` ${command}`)] : []),
2443
+ ...(next ? [`next: ${next}`] : []),
2043
2444
  ].join("\n") + "\n");
2044
2445
  return 1;
2045
2446
  }
@@ -2072,18 +2473,12 @@ function runFailureSummary(run, artifactIds) {
2072
2473
  artifactIds: [...artifactIds],
2073
2474
  };
2074
2475
  }
2075
- function evalFailureNextCommands(failedRuns) {
2476
+ function evalFailureNextCommand(failedRuns) {
2076
2477
  const first = failedRuns[0];
2077
2478
  if (!first) {
2078
- return ["workbench compare --versions all"];
2479
+ return "workbench log --runs";
2079
2480
  }
2080
- const traceId = first.traceIds[0];
2081
- return [
2082
- "workbench compare --versions all",
2083
- `workbench trace ${first.id}`,
2084
- ...(traceId ? [`workbench show ${traceId}:stderr.log`] : []),
2085
- `workbench improve --agent ${first.agentName} --budget 1 --samples 1`,
2086
- ];
2481
+ return `workbench show ${displayRef(first.id)}`;
2087
2482
  }
2088
2483
  function output(value, parsed, io, text) {
2089
2484
  return emitResult(commandSchema(parsed), { result: value }, parsed, io, text);
@@ -2091,7 +2486,7 @@ function output(value, parsed, io, text) {
2091
2486
  function commandSchema(parsed) {
2092
2487
  const command = parsed.positionals[0] ?? "result";
2093
2488
  const subcommand = parsed.positionals[1];
2094
- const suffix = ["auth", "remote", "agent", "case", "skills"].includes(command) && subcommand
2489
+ const suffix = ["agent", "case"].includes(command) && subcommand
2095
2490
  ? `${command}-${subcommand}`
2096
2491
  : command;
2097
2492
  return `workbench.cli.${suffix}.v1`;
@@ -2116,11 +2511,377 @@ async function workbenchCliAuthStatus() {
2116
2511
  })),
2117
2512
  };
2118
2513
  }
2514
+ function statusWithCausalNext(status, auth) {
2515
+ const cloudAuthMissing = auth.workbenchCloud.status !== "authenticated";
2516
+ const needsCloudAuth = cloudAuthMissing && status.remotes.some((remote) => remote.kind === "workbench-cloud" &&
2517
+ (remote.sync.status !== "up_to_date" || remote.publication.status === "unpublished"));
2518
+ if (!needsCloudAuth) {
2519
+ return status;
2520
+ }
2521
+ return {
2522
+ ...status,
2523
+ next: "workbench login",
2524
+ };
2525
+ }
2526
+ function displayRef(id) {
2527
+ const version = /^v_([0-9a-f]{8,})$/iu.exec(id);
2528
+ if (version?.[1]) {
2529
+ return version[1].slice(0, 8);
2530
+ }
2531
+ const separator = id.indexOf("_");
2532
+ if (separator > 0 && separator < id.length - 1) {
2533
+ const prefix = id.slice(0, separator);
2534
+ const suffix = id.slice(separator + 1);
2535
+ return `${prefix}_${suffix.slice(0, 8)}`;
2536
+ }
2537
+ return id.length > 8 ? id.slice(0, 8) : id;
2538
+ }
2539
+ function shortenCommandRefs(command) {
2540
+ return command.replace(/\b(?:v_[0-9a-f]{8,}|(?:run|job|trace|artifact)_[a-z0-9_-]+)/giu, (match) => displayRef(match));
2541
+ }
2542
+ function snapshotVersionByRef(snapshot, ref) {
2543
+ const requested = ref.trim();
2544
+ const normalized = requested === "current" ? snapshot.refs.current ?? "" : requested;
2545
+ if (!normalized) {
2546
+ return undefined;
2547
+ }
2548
+ const candidates = snapshot.versions.filter((version) => snapshotVersionRefMatches(version, normalized));
2549
+ if (candidates.length > 1) {
2550
+ throw new WorkbenchCodedError("ref_ambiguous", `Version ref is ambiguous: ${ref}. Candidates: ${candidates.map((version) => displayRef(version.id)).join(", ")}.`, {
2551
+ subject: { ref, candidates: candidates.map((version) => version.id) },
2552
+ exitCode: 2,
2553
+ });
2554
+ }
2555
+ return candidates[0];
2556
+ }
2557
+ function snapshotVersionRefMatches(version, ref) {
2558
+ const withoutVersionPrefix = ref.startsWith("v_") ? ref.slice(2) : ref;
2559
+ return version.id === ref ||
2560
+ version.hash === ref ||
2561
+ version.id.startsWith(ref) ||
2562
+ version.hash.startsWith(ref) ||
2563
+ version.hash.startsWith(withoutVersionPrefix) ||
2564
+ version.id.startsWith(`v_${withoutVersionPrefix}`);
2565
+ }
2566
+ function snapshotObjectByRef(entries, ref, kind) {
2567
+ const normalized = ref.trim();
2568
+ if (!normalized) {
2569
+ return undefined;
2570
+ }
2571
+ const candidates = entries.filter((entry) => objectRefMatches(entry.id, normalized));
2572
+ if (candidates.length > 1) {
2573
+ throw new WorkbenchCodedError("ref_ambiguous", `${capitalize(kind)} ref is ambiguous: ${ref}. Candidates: ${candidates.map((entry) => displayRef(entry.id)).slice(0, 8).join(", ")}.`, {
2574
+ subject: { ref, candidates: candidates.map((entry) => entry.id).slice(0, 20) },
2575
+ exitCode: 2,
2576
+ });
2577
+ }
2578
+ return candidates[0];
2579
+ }
2580
+ function objectRefMatches(id, ref) {
2581
+ if (id === ref || id.startsWith(ref)) {
2582
+ return true;
2583
+ }
2584
+ const separator = id.indexOf("_");
2585
+ return separator > 0 && id.slice(separator + 1).startsWith(ref);
2586
+ }
2587
+ function capitalize(value) {
2588
+ return value.length > 0 ? `${value[0].toUpperCase()}${value.slice(1)}` : value;
2589
+ }
2590
+ function runOrJobEvidenceSelection(snapshot, ref) {
2591
+ const run = snapshotObjectByRef(snapshot.runs, ref, "run");
2592
+ const job = snapshotObjectByRef(snapshot.jobs, ref, "job");
2593
+ if (run && job) {
2594
+ throw new WorkbenchCodedError("ref_ambiguous", `Run/job ref is ambiguous: ${ref}. Candidates: ${displayRef(run.id)}, ${displayRef(job.id)}.`, {
2595
+ subject: { ref, candidates: [run.id, job.id] },
2596
+ exitCode: 2,
2597
+ });
2598
+ }
2599
+ if (run) {
2600
+ return {
2601
+ run,
2602
+ jobs: snapshot.jobs.filter((entry) => entry.runId === run.id),
2603
+ };
2604
+ }
2605
+ return job ? { jobs: [job] } : { jobs: [] };
2606
+ }
2607
+ function evidenceFilesForRunOrJob(snapshot, ref) {
2608
+ const selection = runOrJobEvidenceSelection(snapshot, ref);
2609
+ if (!selection.run && selection.jobs.length === 0) {
2610
+ return [];
2611
+ }
2612
+ const traceById = new Map(snapshot.traces.map((trace) => [trace.id, trace]));
2613
+ const artifactById = new Map(snapshot.artifacts.map((artifact) => [artifact.id, artifact]));
2614
+ const files = selection.jobs.flatMap((job) => [
2615
+ ...job.traceIds.flatMap((traceId) => {
2616
+ const trace = traceById.get(traceId);
2617
+ return trace
2618
+ ? trace.files.map((file) => evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/traces/${evidencePathSegment(trace.id)}/${file.path}`))
2619
+ : [];
2620
+ }),
2621
+ ...job.artifactIds.flatMap((artifactId) => {
2622
+ const artifact = artifactById.get(artifactId);
2623
+ return artifact
2624
+ ? artifact.files.map((file) => evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/artifacts/${evidencePathSegment(artifact.id)}/${file.path}`))
2625
+ : [];
2626
+ }),
2627
+ ]);
2628
+ const seen = new Set();
2629
+ return files.filter((file) => {
2630
+ if (seen.has(file.path)) {
2631
+ return false;
2632
+ }
2633
+ seen.add(file.path);
2634
+ return true;
2635
+ });
2636
+ }
2637
+ function evidenceFileWithPath(file, filePath) {
2638
+ return {
2639
+ ...file,
2640
+ path: filePath.replace(/\\/gu, "/").replace(/^\/+/u, ""),
2641
+ };
2642
+ }
2643
+ function evidencePathSegment(value) {
2644
+ return value.replace(/[^A-Za-z0-9._-]+/gu, "-") || "_";
2645
+ }
2646
+ function formatRunOrJobEvidence(details, files) {
2647
+ const detailLines = details.map(formatTraceDetail).filter(Boolean);
2648
+ const fileLines = files.length > 0 ? ["Files:", ...files.map((file) => file.path)] : [];
2649
+ return [...detailLines, ...fileLines].join("\n") || "No evidence.";
2650
+ }
2651
+ function manifestOnly(value) {
2652
+ if (value === null || typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
2653
+ return value;
2654
+ }
2655
+ if (Array.isArray(value)) {
2656
+ return value.map(manifestOnly);
2657
+ }
2658
+ if (!value || typeof value !== "object") {
2659
+ return null;
2660
+ }
2661
+ const record = value;
2662
+ if (typeof record.path === "string" && typeof record.content === "string") {
2663
+ return fileSummary(record);
2664
+ }
2665
+ const out = {};
2666
+ for (const [key, child] of Object.entries(record)) {
2667
+ if (child === undefined) {
2668
+ continue;
2669
+ }
2670
+ out[key] = manifestOnly(child);
2671
+ }
2672
+ return out;
2673
+ }
2674
+ async function resolveLocalImproverAgent(parsed, core) {
2675
+ if (stringFlag(parsed, "agents")) {
2676
+ return undefined;
2677
+ }
2678
+ const agents = await listWorkbenchAgents(core).catch(() => []);
2679
+ const status = await workbenchStatusSnapshot(core).catch(() => undefined);
2680
+ const defaultAgentName = status?.project.defaultAgent ?? agents[0]?.name;
2681
+ const defaultAgent = agents.find((agent) => agent.name === defaultAgentName);
2682
+ if (defaultAgent && workbenchSkillImproveCanUseQueuedAdapter(defaultAgent)) {
2683
+ return undefined;
2684
+ }
2685
+ const connected = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).listStatus().catch(() => []);
2686
+ const candidates = connected
2687
+ .filter((entry) => entry.status === "connected" &&
2688
+ (entry.adapterId === "claude" || entry.adapterId === "codex"))
2689
+ .sort((left, right) => {
2690
+ const adapterRank = (adapter) => adapter === "claude" ? 0 : adapter === "codex" ? 1 : 2;
2691
+ return adapterRank(left.adapterId) - adapterRank(right.adapterId) ||
2692
+ (Date.parse(right.updatedAt ?? "") || 0) - (Date.parse(left.updatedAt ?? "") || 0);
2693
+ });
2694
+ const selected = candidates[0];
2695
+ if (!selected) {
2696
+ throw new WorkbenchCodedError("auth_required", "workbench improve needs a connected improver.", {
2697
+ remediation: "Run workbench login claude (or codex) to connect an improver.",
2698
+ exitCode: 1,
2699
+ });
2700
+ }
2701
+ return {
2702
+ name: selected.adapterId,
2703
+ adapter: selected.adapterId,
2704
+ config: {
2705
+ auth: selected.slot ? { [selected.slot]: selected.profile } : selected.profile,
2706
+ },
2707
+ };
2708
+ }
2709
+ function formatLogEntry(entry) {
2710
+ if (entry.kind === "version") {
2711
+ return `${entry.createdAt}\tversion\t${displayRef(entry.id)}\tfiles=${entry.fileCount}\t${entry.message}`;
2712
+ }
2713
+ const score = entry.score === undefined ? "n/a" : entry.score.toFixed(3);
2714
+ return `${entry.createdAt}\trun\t${displayRef(entry.id)}\t${entry.status}\tversion=${displayRef(entry.versionId)}\tskill=${entry.skillName}\tagent=${entry.agentName}\tscore=${score}`;
2715
+ }
2716
+ function splitShowRef(ref) {
2717
+ const index = ref.indexOf(":");
2718
+ if (index === -1) {
2719
+ return [ref, null];
2720
+ }
2721
+ return [ref.slice(0, index), ref.slice(index + 1)];
2722
+ }
2723
+ async function fileForRunOrJobRef(core, objectRef, requestedPath) {
2724
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
2725
+ const selection = runOrJobEvidenceSelection(snapshot, objectRef);
2726
+ if (!selection.run && selection.jobs.length === 0) {
2727
+ return null;
2728
+ }
2729
+ const files = evidenceFilesForRunOrJob(snapshot, objectRef);
2730
+ const file = findShowFile(files, requestedPath, objectRef);
2731
+ if (file) {
2732
+ return file;
2733
+ }
2734
+ throw new WorkbenchCodedError("ref_not_found", `File not found in ${objectRef}: ${requestedPath}`, {
2735
+ remediation: `Run workbench show ${objectRef}.`,
2736
+ subject: { ref: objectRef, path: requestedPath },
2737
+ exitCode: 1,
2738
+ });
2739
+ }
2740
+ function evidenceDetailsForRunOrJob(snapshot, ref) {
2741
+ const selection = runOrJobEvidenceSelection(snapshot, ref);
2742
+ return selection.jobs.flatMap((entry) => {
2743
+ const detail = workbenchJobEvidenceForSnapshot(snapshot, {
2744
+ runId: entry.runId,
2745
+ jobId: entry.id,
2746
+ });
2747
+ return detail ? [detail] : [];
2748
+ }).filter((detail) => detail.executions.some((execution) => execution.sessions.length > 0 ||
2749
+ execution.trace.spans.length > 0 ||
2750
+ execution.trace.events.length > 0 ||
2751
+ execution.trace.summaries.length > 0));
2752
+ }
2753
+ function findShowFile(files, requestedPath, objectRef) {
2754
+ const normalized = requestedPath.replace(/\\/gu, "/");
2755
+ const exact = files.filter((file) => file.path === normalized);
2756
+ if (exact.length === 1) {
2757
+ return exact[0];
2758
+ }
2759
+ const exactEquivalent = singleEquivalentShowFile(exact);
2760
+ if (exactEquivalent) {
2761
+ return exactEquivalent;
2762
+ }
2763
+ if (exact.length > 1) {
2764
+ throw ambiguousShowPath(objectRef, requestedPath, exact);
2765
+ }
2766
+ const suffixCandidates = files.filter((file) => file.path.endsWith(`/${normalized}`) || path.basename(file.path) === normalized);
2767
+ if (suffixCandidates.length === 0) {
2768
+ return null;
2769
+ }
2770
+ const candidates = normalized === "stderr.log"
2771
+ ? suffixCandidates.filter((file) => file.content.length > 0)
2772
+ : suffixCandidates;
2773
+ if (candidates.length === 1) {
2774
+ return candidates[0];
2775
+ }
2776
+ const equivalentCandidate = singleEquivalentShowFile(candidates);
2777
+ if (equivalentCandidate) {
2778
+ return equivalentCandidate;
2779
+ }
2780
+ if (candidates.length === 0 && suffixCandidates.length === 1) {
2781
+ return suffixCandidates[0];
2782
+ }
2783
+ const equivalentSuffixCandidate = singleEquivalentShowFile(suffixCandidates);
2784
+ if (equivalentSuffixCandidate) {
2785
+ return equivalentSuffixCandidate;
2786
+ }
2787
+ throw ambiguousShowPath(objectRef, requestedPath, candidates.length > 0 ? candidates : suffixCandidates);
2788
+ }
2789
+ function singleEquivalentShowFile(files) {
2790
+ if (files.length <= 1) {
2791
+ return null;
2792
+ }
2793
+ const first = files[0];
2794
+ return files.every((file) => file.kind === first.kind && file.encoding === first.encoding && file.content === first.content)
2795
+ ? first
2796
+ : null;
2797
+ }
2798
+ function ambiguousShowPath(objectRef, requestedPath, candidates) {
2799
+ const candidatePaths = candidates.map((file) => file.path);
2800
+ return new WorkbenchCodedError("ref_ambiguous", `File path is ambiguous in ${objectRef}: ${requestedPath}. Candidates: ${candidatePaths.join(", ")}.`, {
2801
+ remediation: `Run workbench show ${objectRef}.`,
2802
+ subject: { ref: objectRef, path: requestedPath, candidates: candidatePaths },
2803
+ exitCode: 2,
2804
+ });
2805
+ }
2806
+ function fileListing(kind, id, files) {
2807
+ return {
2808
+ kind,
2809
+ id,
2810
+ fileCount: files.length,
2811
+ files: files.map(fileSummary),
2812
+ };
2813
+ }
2814
+ function formatFileListing(kind, id, files) {
2815
+ return [`${kind}\t${displayRef(id)}\tfiles=${files.length}`, ...files.map((file) => file.path)].join("\n");
2816
+ }
2817
+ async function traceIdForCaseSource(core, ref) {
2818
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
2819
+ const trace = snapshotObjectByRef(snapshot.traces, ref, "trace");
2820
+ if (trace) {
2821
+ return trace.id;
2822
+ }
2823
+ const selection = runOrJobEvidenceSelection(snapshot, ref);
2824
+ const traceId = selection.run?.traceIds[0] ?? selection.jobs[0]?.traceIds[0];
2825
+ if (traceId) {
2826
+ return traceId;
2827
+ }
2828
+ throw new WorkbenchCodedError("ref_not_found", `Run, job, or trace not found: ${ref}`, {
2829
+ remediation: "Run workbench log, then workbench case add RUN_ID.",
2830
+ subject: { ref },
2831
+ exitCode: 1,
2832
+ });
2833
+ }
2834
+ async function evalDeltas(core, runs) {
2835
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
2836
+ return runs.map((run) => {
2837
+ const previous = snapshot.runs
2838
+ .filter((candidate) => candidate.id !== run.id &&
2839
+ candidate.skillName === run.skillName &&
2840
+ candidate.agentName === run.agentName &&
2841
+ typeof candidate.score === "number" &&
2842
+ candidate.createdAt < run.createdAt)
2843
+ .sort((left, right) => right.createdAt.localeCompare(left.createdAt))[0];
2844
+ return {
2845
+ runId: run.id,
2846
+ versionId: run.versionId,
2847
+ skillName: run.skillName,
2848
+ agentName: run.agentName,
2849
+ ...(run.score !== undefined ? { score: run.score } : {}),
2850
+ ...(previous?.score !== undefined ? { previousScore: previous.score } : {}),
2851
+ ...(run.score !== undefined && previous?.score !== undefined ? { delta: run.score - previous.score } : {}),
2852
+ };
2853
+ });
2854
+ }
2855
+ function formatEvalDelta(delta) {
2856
+ if (delta.score === undefined) {
2857
+ return "";
2858
+ }
2859
+ const score = delta.score.toFixed(3);
2860
+ if (delta.previousScore === undefined || delta.delta === undefined) {
2861
+ return `${delta.skillName} ${displayRef(delta.versionId)} ${score}`;
2862
+ }
2863
+ const sign = delta.delta >= 0 ? "+" : "";
2864
+ return `${delta.skillName} ${displayRef(delta.versionId)} ${score} (was ${delta.previousScore.toFixed(3)}, ${sign}${delta.delta.toFixed(3)})`;
2865
+ }
2866
+ async function evalSuccessNextCommand(core, runs) {
2867
+ if (runs.length === 0) {
2868
+ return "workbench eval";
2869
+ }
2870
+ if (!runs.some((run) => typeof run.score === "number")) {
2871
+ return "edit .workbench/cases, then run workbench eval";
2872
+ }
2873
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
2874
+ const currentVersion = snapshotVersionByRef(snapshot, snapshot.status.currentVersionId ?? snapshot.refs.current ?? "");
2875
+ const caseFiles = currentVersion?.files.filter((file) => file.kind === "text" &&
2876
+ /^\.workbench\/cases\/[^/]+\/case\.ya?ml$/u.test(file.path)) ?? [];
2877
+ const hasWorkflowCase = caseFiles.some((file) => file.kind === "text" && !/\n\s*smoke:\s*true(?:\s|$)/u.test(`\n${file.content}`));
2878
+ return hasWorkflowCase ? "workbench publish" : "edit .workbench/cases, then run workbench eval";
2879
+ }
2119
2880
  function formatStatusSnapshot(status) {
2120
2881
  const lines = [
2121
2882
  `Root: ${status.project.root}`,
2122
2883
  `Initialized: ${status.project.initialized ? "yes" : "no"}`,
2123
- ...(status.project.currentVersionId ? [`Current version: ${status.project.currentVersionId}`] : []),
2884
+ ...(status.project.currentVersionId ? [`Current version: ${displayRef(status.project.currentVersionId)}`] : []),
2124
2885
  ...(status.project.defaultSkill ? [`Default skill: ${status.project.defaultSkill}`] : []),
2125
2886
  ...(status.project.defaultAgent ? [`Default agent: ${status.project.defaultAgent}`] : []),
2126
2887
  `Runs: ${status.runs.total}${status.runs.lastStatus ? ` (last ${status.runs.lastStatus})` : ""}`,
@@ -2130,7 +2891,7 @@ function formatStatusSnapshot(status) {
2130
2891
  ? [
2131
2892
  "publication=published",
2132
2893
  remote.publication.visibility ? `visibility=${remote.publication.visibility}` : undefined,
2133
- remote.publication.versionId ? `version=${remote.publication.versionId}` : undefined,
2894
+ remote.publication.versionId ? `version=${displayRef(remote.publication.versionId)}` : undefined,
2134
2895
  remote.publication.installUrl ? `install=${remote.publication.installUrl}` : undefined,
2135
2896
  remote.publication.pinnedInstallUrl ? `pinned=${remote.publication.pinnedInstallUrl}` : undefined,
2136
2897
  ].filter(Boolean).join("\t")
@@ -2141,49 +2902,16 @@ function formatStatusSnapshot(status) {
2141
2902
  ? [
2142
2903
  ` error[${remote.sync.lastError.code}]: ${remote.sync.lastError.message}`,
2143
2904
  ...(remote.sync.lastAttemptAt ? [` last attempt: ${remote.sync.lastAttemptAt}`] : []),
2144
- ...(remote.sync.nextCommand ? [` next: ${remote.sync.nextCommand}`] : []),
2145
2905
  ]
2146
2906
  : []),
2147
2907
  ];
2148
2908
  })] : ["Remotes: none"]),
2149
- ...(status.next.length > 0 ? ["Next:", ...status.next.map((command) => ` ${command}`)] : []),
2909
+ ...(status.next ? [`next: ${shortenCommandRefs(status.next)}`] : []),
2150
2910
  ];
2151
2911
  return lines.join("\n");
2152
2912
  }
2153
- function formatCheck(result) {
2154
- return [
2155
- "Workbench skill is valid.",
2156
- `Cases: ${result.cases} (${result.plan.source.smokeCaseCount} smoke)`,
2157
- `Skills: ${result.skills}`,
2158
- `Agents: ${result.agents}`,
2159
- `Skill files: ${result.plan.source.skillFiles}`,
2160
- `Eval files: ${result.plan.source.evalFiles}`,
2161
- "",
2162
- "Skill plan:",
2163
- ...result.plan.skills.map((skill) => [
2164
- skill.name,
2165
- `bundle=${skill.bundleHash.slice(0, 12)}`,
2166
- `files=${skill.fileCount}`,
2167
- `includes=${skill.includedSkillCount}`,
2168
- ].join("\t")),
2169
- "",
2170
- "Agent plan:",
2171
- ...result.plan.agents.map((agent) => [
2172
- agent.name,
2173
- agent.adapter,
2174
- agent.model,
2175
- agent.providerBacked ? "provider-eval" : "local-eval",
2176
- `network=${agent.network.egress}`,
2177
- `cpu=${agent.resources.cpu}`,
2178
- `memoryGb=${agent.resources.memoryGb}`,
2179
- `timeout=${agent.resources.timeoutMinutes}m`,
2180
- `image=${agent.image}`,
2181
- agent.auth ? `auth=${agent.auth}` : undefined,
2182
- ].filter(Boolean).join("\t")),
2183
- ].join("\n");
2184
- }
2185
2913
  function formatVersion(version) {
2186
- return `${version.id}\t${version.hash.slice(0, 12)}\t${version.message}`;
2914
+ return `${displayRef(version.id)}\t${version.hash.slice(0, 12)}\t${version.message}`;
2187
2915
  }
2188
2916
  function versionSummary(version) {
2189
2917
  return {
@@ -2201,11 +2929,11 @@ function formatAgent(agent) {
2201
2929
  function formatRun(run) {
2202
2930
  const score = run.score === undefined ? "n/a" : run.score.toFixed(3);
2203
2931
  const latency = run.latencyMs === undefined ? "n/a" : `${run.latencyMs}ms`;
2204
- return `${run.id}\t${run.kind}\t${run.status}\tversion=${run.versionId}\tskill=${run.skillName}\tagent=${run.agentName}\tscore=${score}\tlatency=${latency}`;
2932
+ return `${displayRef(run.id)}\t${run.kind}\t${run.status}\tversion=${displayRef(run.versionId)}\tskill=${run.skillName}\tagent=${run.agentName}\tscore=${score}\tlatency=${latency}`;
2205
2933
  }
2206
2934
  function formatImproveResult(result) {
2207
2935
  return [
2208
- `Improved ${result.version.parentIds[0] ?? "current"} -> ${result.version.id}. ${formatRun(result.run)}`,
2936
+ `Improved ${result.version.parentIds[0] ? displayRef(result.version.parentIds[0]) : "current"} -> ${displayRef(result.version.id)}. ${formatRun(result.run)}`,
2209
2937
  result.switched
2210
2938
  ? "Switched to improved version."
2211
2939
  : `Did not switch: ${result.promotionReason}`,
@@ -2214,26 +2942,26 @@ function formatImproveResult(result) {
2214
2942
  function formatJob(job) {
2215
2943
  const score = job.score === undefined ? "n/a" : job.score.toFixed(3);
2216
2944
  const duration = job.durationMs === undefined ? "n/a" : `${job.durationMs}ms`;
2217
- return `${job.id}\trun=${job.runId}\tcase=${job.caseId}\tsample=${job.sample}\t${job.status}\tscore=${score}\tduration=${duration}`;
2945
+ return `${displayRef(job.id)}\trun=${displayRef(job.runId)}\tcase=${job.caseId}\tsample=${job.sample}\t${job.status}\tscore=${score}\tduration=${duration}`;
2218
2946
  }
2219
2947
  function formatComparison(comparison) {
2220
2948
  const lines = ["version\tskill\tagent\tstatus\tscore\tcost\tlatency\trun"];
2221
2949
  for (const cell of comparison.cells) {
2222
2950
  lines.push([
2223
- cell.versionId,
2951
+ displayRef(cell.versionId),
2224
2952
  cell.skillName,
2225
2953
  `${cell.agentName}@${shortObjectId(cell.agentHash)}`,
2226
2954
  cell.status ?? "not-run",
2227
2955
  cell.score === undefined ? "n/a" : cell.score.toFixed(3),
2228
2956
  cell.costUsd === undefined ? "n/a" : `$${cell.costUsd.toFixed(4)}`,
2229
2957
  cell.latencyMs === undefined ? "n/a" : `${cell.latencyMs}ms`,
2230
- cell.runId ?? "n/a",
2958
+ cell.runId ? displayRef(cell.runId) : "n/a",
2231
2959
  ].join("\t"));
2232
2960
  }
2233
2961
  return lines.join("\n");
2234
2962
  }
2235
2963
  function shortObjectId(id) {
2236
- return id.length > 12 ? id.slice(0, 12) : id;
2964
+ return id.length > 8 ? id.slice(0, 8) : id;
2237
2965
  }
2238
2966
  function formatTrace(trace) {
2239
2967
  const result = asRecord(trace.result);
@@ -2242,7 +2970,7 @@ function formatTrace(trace) {
2242
2970
  const error = typeof result?.error === "string" ? result.error.split(/\r?\n/u)[0] : undefined;
2243
2971
  const files = trace.files.slice(0, 5).map((file) => file.path).join(",");
2244
2972
  return [
2245
- `${trace.id}\trun=${trace.runId}\tjob=${trace.jobId ?? "n/a"}\tversion=${trace.versionId}\tskill=${trace.skillName}\tagent=${trace.agentName}`,
2973
+ `${displayRef(trace.id)}\trun=${displayRef(trace.runId)}\tjob=${trace.jobId ? displayRef(trace.jobId) : "n/a"}\tversion=${displayRef(trace.versionId)}\tskill=${trace.skillName}\tagent=${trace.agentName}`,
2246
2974
  status ? `status=${status}` : undefined,
2247
2975
  score ? `score=${score}` : undefined,
2248
2976
  error ? `error=${error}` : undefined,
@@ -2270,7 +2998,7 @@ function formatTraceDetail(detail) {
2270
2998
  return detail.executions.map((execution) => {
2271
2999
  const sessionLabels = execution.sessions.map((session) => session.label).join(",");
2272
3000
  return [
2273
- `${execution.id}\trun=${detail.runId}\tjobs=${execution.jobIds.join(",")}\tstatus=${execution.status}`,
3001
+ `${execution.id}\trun=${displayRef(detail.runId)}\tjobs=${execution.jobIds.map(displayRef).join(",")}\tstatus=${execution.status}`,
2274
3002
  `events=${execution.trace.events.length}`,
2275
3003
  `spans=${execution.trace.spans.length}`,
2276
3004
  `summaries=${execution.trace.summaries.length}`,
@@ -2279,7 +3007,7 @@ function formatTraceDetail(detail) {
2279
3007
  }).join("\n");
2280
3008
  }
2281
3009
  function formatArtifact(artifact) {
2282
- return `${artifact.id}\trun=${artifact.runId}\tjob=${artifact.jobId}\t${artifact.kind}\tfiles=${artifact.files.length}`;
3010
+ return `${displayRef(artifact.id)}\trun=${displayRef(artifact.runId)}\tjob=${displayRef(artifact.jobId)}\t${artifact.kind}\tfiles=${artifact.files.length}`;
2283
3011
  }
2284
3012
  function artifactSummary(artifact) {
2285
3013
  return {