@workbench-ai/workbench 0.0.49 → 0.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,13 @@
1
1
  const sourceDirectoryHelp = [
2
2
  "Directory:",
3
- " Run from a Workbench project containing benchmark.yaml plus subjects/<name>/subject.yaml.",
4
- " Subject manifests declare their files with files.path, usually files beside subject.yaml.",
5
- " Pass --dir DIR or pass benchmark.yaml, subjects/<name>, or subjects/<name>/subject.yaml as SOURCE.",
3
+ " Run from a Workbench project containing benchmark.yaml plus candidates/<name>/candidate.yaml.",
4
+ " Candidate manifests declare their files with files.path, usually files beside candidate.yaml.",
5
+ " Pass --dir DIR or pass benchmark.yaml, candidates/<name>, or candidates/<name>/candidate.yaml as SOURCE.",
6
6
  ];
7
7
  export const LOCAL_DEV_OPEN_LIFECYCLE_NOTE = "Keep this command running while using the local web view; Ctrl-C stops the server and the page will stop working.";
8
8
  const hostedWatchLifecycleNoteLines = [
9
9
  "Watching is client-side polling only.",
10
- "Stopping this command does not cancel the hosted run; use workbench cloud runs cancel RUN_ID to cancel it.",
10
+ "Stopping this command does not cancel the hosted run.",
11
11
  ];
12
12
  export const HOSTED_WATCH_LIFECYCLE_NOTE = hostedWatchLifecycleNoteLines.join(" ");
13
13
  const localOpenLifecycleHelp = [
@@ -34,45 +34,31 @@ const rootLines = [
34
34
  " workbench adapters inspect ID [--dir DIR] [--json]",
35
35
  " workbench adapters test ID|SOURCE [--dir DIR] [--request PATH] [--output DIR] [--json]",
36
36
  "",
37
- "Local runs:",
38
- " workbench eval [SOURCE] [--dir DIR] [--subject ID] [--samples N] [--json]",
39
- " workbench improve [SOURCE] [--dir DIR] [--from SUBJECT_ID] [--optimizer OPTIMIZER_YAML] [--budget N] [--samples N] [--json]",
40
- " workbench open [SOURCE] [--dir DIR] [--run RUN_ID] [--host HOST] [--port N] [--no-open] [--json]",
41
- " workbench restore [--dir DIR] [--subject ID] [--dry-run] [--yes] [--json]",
37
+ "Runs:",
38
+ " workbench eval [SOURCE] [--dir DIR] [--hosted] [--benchmark OWNER/BENCHMARK] [--candidate CANDIDATE_ID] [--base CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
39
+ " workbench improve [SOURCE] [--dir DIR] [--hosted] [--benchmark OWNER/BENCHMARK] [--from CANDIDATE_ID] [--base CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
40
+ " workbench retry TARGET_ID [--dir DIR] [--hosted] [--benchmark OWNER/BENCHMARK] [--watch] [--interval-ms N] [--timeout-ms N] [--json]",
41
+ " workbench open [SOURCE|OWNER/BENCHMARK|RUN_ID|CANDIDATE_ID] [--dir DIR] [--hosted] [--benchmark OWNER/BENCHMARK] [--run RUN_ID] [--host HOST] [--port N] [--no-open] [--json]",
42
+ " workbench restore [--dir DIR] [--candidate CANDIDATE_ID] [--dry-run] [--yes] [--json]",
42
43
  "",
43
44
  "Local inspection:",
44
45
  " workbench runs list [--dir DIR] [--json]",
45
46
  " workbench runs show RUN_ID [--dir DIR] [--json]",
46
- " workbench subjects list [--dir DIR] [--json]",
47
- " workbench subjects show SUBJECT_ID [--dir DIR] [--json]",
48
- " workbench subjects files [--dir DIR] [--subject ID] [--json]",
49
- " workbench subjects preview --path PATH [--dir DIR] [--subject ID] [--output PATH|-] [--json]",
47
+ " workbench candidates list [--dir DIR] [--json]",
48
+ " workbench candidates show CANDIDATE_ID [--dir DIR] [--json]",
49
+ " workbench candidates files [--dir DIR] [--candidate CANDIDATE_ID] [--json]",
50
+ " workbench candidates preview --path PATH [--dir DIR] [--candidate CANDIDATE_ID] [--output PATH|-] [--json]",
50
51
  " workbench traces collect [--providers codex,claude] [--since 30d] [--workspace DIR] [--limit N] [--json]",
51
52
  " workbench traces list [--providers codex,claude] [--since 30d] [--workspace DIR] [--limit N] [--json]",
52
53
  " workbench traces show TRACE_ID [--providers codex,claude] [--since 30d] [--workspace DIR] [--json]",
53
54
  "",
54
- "Remote sync:",
55
+ "Remote:",
55
56
  " workbench login [--base-url URL] [--no-open] [--json]",
56
57
  " workbench logout [--json]",
57
58
  " workbench whoami [--dir DIR] [--json]",
58
- " workbench clone OWNER/BENCHMARK[@REF] [DIR] [--dry-run] [--json]",
59
- " workbench remote show [--dir DIR] [--json]",
60
- " workbench remote add origin OWNER/BENCHMARK[@REF] [--dir DIR] [--json]",
61
- " workbench remote set-url origin OWNER/BENCHMARK[@REF] [--dir DIR] [--json]",
62
- " workbench remote remove origin [--dir DIR] [--json]",
63
- " workbench fetch [--dir DIR] [--json]",
59
+ " workbench clone OWNER/BENCHMARK [DIR] [--dry-run] [--json]",
64
60
  " workbench pull [--dir DIR] [--dry-run] [--json]",
65
- " workbench push [SOURCE] [--dir DIR] [--tag TAG] [--visibility public|private] [--dry-run] [--json]",
66
- "",
67
- "Hosted runs and resources:",
68
- " workbench cloud eval [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base SUBJECT_ID] [--samples N] [--watch] [--dry-run] [--json]",
69
- " workbench cloud improve [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base SUBJECT_ID] [--optimizer OPTIMIZER_YAML] [--budget N] [--samples N] [--watch] [--dry-run] [--json]",
70
- " workbench cloud open [OWNER/BENCHMARK[@REF]|RUN_ID|SUBJECT_ID] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--no-open] [--json]",
71
- " workbench cloud watch RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--interval-ms N] [--timeout-ms N] [--json]",
72
- " workbench cloud logs RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
73
- " workbench cloud star OWNER/BENCHMARK [--json]",
74
- " workbench cloud unstar OWNER/BENCHMARK [--json]",
75
- " workbench cloud benchmarks|runs|subjects <command> [options]",
61
+ " workbench push [SOURCE] [--dir DIR] [--visibility public|private] [--dry-run] [--json]",
76
62
  "",
77
63
  "Auth:",
78
64
  " workbench auth connect ADAPTER[/SLOT] [--dir DIR] [--method METHOD] [--profile PROFILE] [--profile-root DIR] [--local-only] [--json]",
@@ -80,11 +66,12 @@ const rootLines = [
80
66
  "",
81
67
  "Examples:",
82
68
  " workbench init --skill invoice-review --agent codex",
83
- " workbench eval subjects/codex --samples 1",
69
+ " workbench eval candidates/current --samples 1",
84
70
  " workbench improve --budget 2 --samples 1",
71
+ " workbench retry eval_local_123 --json",
85
72
  " workbench open --no-open --json",
86
- " workbench push --tag v1",
87
- " workbench cloud eval subjects/codex --benchmark openbench/invoice-review@v1 --watch",
73
+ " workbench push",
74
+ " workbench eval --hosted candidates/current --benchmark openbench/invoice-review --watch",
88
75
  "",
89
76
  "Environment:",
90
77
  " WORKBENCH_API_URL sets the hosted Workbench API base URL.",
@@ -118,7 +105,7 @@ const commandHelp = Object.fromEntries(Object.entries({
118
105
  " workbench init [DIR] --skill NAME --agent ADAPTER [--from PATH] [--example] [--json]",
119
106
  " workbench init [DIR] --command NAME [--from PATH] [--example] [--json]",
120
107
  "",
121
- "Scaffold a local Workbench project. benchmark.yaml selects an engine; the built-in workbench engine owns tasks, environment, and scoring under engine.with. subjects/<name>/subject.yaml owns files.path plus optional prepare and run behavior. optimizers/<name>.yaml owns improvement behavior.",
108
+ "Scaffold a local Workbench project. benchmark.yaml selects an engine; the built-in workbench engine owns tasks, environment, and scoring under engine.with. candidates/<name>/candidate.yaml owns files.path plus optional prepare and run behavior. Candidate manifests own improvement behavior.",
122
109
  "",
123
110
  "Examples:",
124
111
  " workbench init --skill invoice-review --agent codex",
@@ -128,51 +115,69 @@ const commandHelp = Object.fromEntries(Object.entries({
128
115
  "Usage:",
129
116
  " workbench check [SOURCE] [--dir DIR] [--json]",
130
117
  "",
131
- "Validate benchmark.yaml, one subject manifest, and an optional optimizer manifest.",
118
+ "Validate benchmark.yaml, one candidate manifest.",
132
119
  "",
133
120
  "Examples:",
134
121
  " workbench check",
135
- " workbench check subjects/codex --json",
122
+ " workbench check candidates/current --json",
136
123
  ]),
137
- eval: withSourceDirectoryHelp([
124
+ eval: withSourceDirectoryHelp(withLifecycleHelp([
138
125
  "Usage:",
139
- " workbench eval [SOURCE] [--dir DIR] [--subject ID] [--samples N] [--json]",
126
+ " workbench eval [SOURCE] [--dir DIR] [--candidate CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--json]",
127
+ " workbench eval --hosted [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK] [--base CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
140
128
  "",
141
- "Run the selected local subject against the current benchmark and record attempts, results, traces, artifacts, and a run record under .workbench/runtime.",
129
+ "Ensure the selected candidate run has an evaluation for the current benchmark. Without --hosted, execution writes local records. With --hosted, Workbench starts or reuses a hosted run against the configured remote or --benchmark target. Completed work is reused only when candidate, run configuration, source, adapters, benchmark, and samples match; use --rerun to intentionally spend again.",
142
130
  "",
143
131
  "Examples:",
144
132
  " workbench eval --samples 1",
145
- " workbench eval subjects/codex --samples 2 --json",
146
- ]),
147
- improve: withSourceDirectoryHelp([
133
+ " workbench eval --hosted candidates/current --samples 1 --watch --json",
134
+ " workbench eval candidates/current --samples 2 --json",
135
+ ], hostedWatchLifecycleHelp)),
136
+ improve: withSourceDirectoryHelp(withLifecycleHelp([
148
137
  "Usage:",
149
- " workbench improve [SOURCE] [--dir DIR] [--from SUBJECT_ID] [--optimizer OPTIMIZER_YAML] [--budget N] [--samples N] [--json]",
138
+ " workbench improve [SOURCE] [--dir DIR] [--from CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--json]",
139
+ " workbench improve --hosted [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK] [--base CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
150
140
  "",
151
- "Run local subject improvement. By default, Workbench improves the current subject. If it has not been evaluated yet, Workbench evaluates it first. Use --from to improve an explicit subject id.",
141
+ "Ensure a candidate improvement exists for the selected base, run, budget, and samples. Without --hosted, execution writes local records. With --hosted, Workbench starts or reuses hosted work against the configured remote or --benchmark target. Completed work is reused only when base, run configuration, source, adapters, benchmark, budget, and samples match; use --rerun to intentionally spend again.",
152
142
  "",
153
143
  "Examples:",
154
144
  " workbench improve --budget 1 --samples 1",
155
- " workbench improve subjects/codex --from subj_123 --optimizer optimizers/codex.yaml --json",
156
- ]),
145
+ " workbench improve --hosted candidates/current --budget 1 --samples 1 --watch --json",
146
+ " workbench improve candidates/current --from candidate_123 --json",
147
+ ], hostedWatchLifecycleHelp)),
148
+ retry: withSourceDirectoryHelp(withLifecycleHelp([
149
+ "Usage:",
150
+ " workbench retry TARGET_ID [--dir DIR] [--json]",
151
+ " workbench retry --hosted TARGET_ID [--dir DIR] [--benchmark OWNER/BENCHMARK] [--watch] [--interval-ms N] [--timeout-ms N] [--json]",
152
+ "",
153
+ "Retry a failed run or evaluation by replaying its recorded candidate, configuration, sample count, and improve budget. Use --hosted for hosted records.",
154
+ "",
155
+ "Examples:",
156
+ " workbench retry eval_local_123 --json",
157
+ " workbench retry --hosted run_123 --watch --json",
158
+ " workbench retry run_local_123 --dir ./my-benchmark",
159
+ ], hostedWatchLifecycleHelp)),
157
160
  open: withSourceDirectoryHelp(withLifecycleHelp([
158
161
  "Usage:",
159
162
  " workbench open [SOURCE] [--dir DIR] [--run RUN_ID] [--host HOST] [--port N] [--no-open] [--json]",
163
+ " workbench open --hosted [OWNER/BENCHMARK|RUN_ID|CANDIDATE_ID] [--dir DIR] [--benchmark OWNER/BENCHMARK] [--no-open] [--json]",
160
164
  "",
161
- "Start the local Workbench web view for the project and keep serving it until stopped. When a run is supplied, open directly to that run. Without --run, Workbench opens the latest run when one exists.",
165
+ "Start the local Workbench web view for the project and keep serving it until stopped. With --hosted, print and optionally open the hosted project URL instead.",
162
166
  "",
163
167
  "Examples:",
164
168
  " workbench open",
169
+ " workbench open --hosted --no-open --json",
165
170
  " workbench open --run eval_local_123 --port 4317 --no-open --json",
166
171
  ], localOpenLifecycleHelp)),
167
172
  restore: withSourceDirectoryHelp([
168
173
  "Usage:",
169
- " workbench restore [--dir DIR] [--subject ID] [--dry-run] [--yes] [--json]",
174
+ " workbench restore [--dir DIR] [--candidate CANDIDATE_ID] [--dry-run] [--yes] [--json]",
170
175
  "",
171
- "Restore a local subject snapshot into the subject files directory.",
176
+ "Restore a local candidate snapshot into the candidate files directory.",
172
177
  "",
173
178
  "Examples:",
174
- " workbench restore --subject subj_123 --dry-run",
175
- " workbench restore --subject subj_123 --yes",
179
+ " workbench restore --candidate candidate_123 --dry-run",
180
+ " workbench restore --candidate candidate_123 --yes",
176
181
  ]),
177
182
  runs: [
178
183
  "Usage:",
@@ -208,140 +213,77 @@ const commandHelp = Object.fromEntries(Object.entries({
208
213
  " workbench runs show eval_local_123",
209
214
  " workbench runs show eval_local_123 --json",
210
215
  ]),
211
- subjects: [
216
+ candidates: [
212
217
  "Usage:",
213
- " workbench subjects <command> [options]",
218
+ " workbench candidates <command> [options]",
214
219
  "",
215
- "Inspect local subjects.",
220
+ "Inspect local candidates.",
216
221
  "",
217
222
  "Commands:",
218
- " workbench subjects list [--dir DIR] [--json]",
219
- " workbench subjects show SUBJECT_ID [--dir DIR] [--json]",
220
- " workbench subjects files [--dir DIR] [--subject ID] [--json]",
221
- " workbench subjects preview --path PATH [--dir DIR] [--subject ID] [--output PATH|-] [--json]",
223
+ " workbench candidates list [--dir DIR] [--json]",
224
+ " workbench candidates show CANDIDATE_ID [--dir DIR] [--json]",
225
+ " workbench candidates files [--dir DIR] [--candidate CANDIDATE_ID] [--json]",
226
+ " workbench candidates preview --path PATH [--dir DIR] [--candidate CANDIDATE_ID] [--output PATH|-] [--json]",
222
227
  "",
223
228
  "Examples:",
224
- " workbench subjects list --json",
225
- " workbench subjects preview --subject subj_123 --path SKILL.md --output -",
229
+ " workbench candidates list --json",
230
+ " workbench candidates preview --candidate candidate_123 --path SKILL.md --output -",
226
231
  ],
227
- "subjects list": withSourceDirectoryHelp([
232
+ "candidates list": withSourceDirectoryHelp([
228
233
  "Usage:",
229
- " workbench subjects list [--dir DIR] [--json]",
234
+ " workbench candidates list [--dir DIR] [--json]",
230
235
  "",
231
- "List local subjects.",
236
+ "List local candidates.",
232
237
  "",
233
238
  "Examples:",
234
- " workbench subjects list",
235
- " workbench subjects list --json",
239
+ " workbench candidates list",
240
+ " workbench candidates list --json",
236
241
  ]),
237
- "subjects show": withSourceDirectoryHelp([
242
+ "candidates show": withSourceDirectoryHelp([
238
243
  "Usage:",
239
- " workbench subjects show SUBJECT_ID [--dir DIR] [--json]",
244
+ " workbench candidates show CANDIDATE_ID [--dir DIR] [--json]",
240
245
  "",
241
- "Show one local subject.",
246
+ "Show one local candidate.",
242
247
  "",
243
248
  "Examples:",
244
- " workbench subjects show subj_123",
245
- " workbench subjects show subj_123 --json",
249
+ " workbench candidates show candidate_123",
250
+ " workbench candidates show candidate_123 --json",
246
251
  ]),
247
- "subjects files": withSourceDirectoryHelp([
252
+ "candidates files": withSourceDirectoryHelp([
248
253
  "Usage:",
249
- " workbench subjects files [--dir DIR] [--subject ID] [--json]",
254
+ " workbench candidates files [--dir DIR] [--candidate CANDIDATE_ID] [--json]",
250
255
  "",
251
- "List files in a local subject snapshot.",
256
+ "List files in a local candidate snapshot.",
252
257
  "",
253
258
  "Examples:",
254
- " workbench subjects files --subject subj_123",
255
- " workbench subjects files --subject subj_123 --json",
259
+ " workbench candidates files --candidate candidate_123",
260
+ " workbench candidates files --candidate candidate_123 --json",
256
261
  ]),
257
- "subjects preview": withSourceDirectoryHelp([
262
+ "candidates preview": withSourceDirectoryHelp([
258
263
  "Usage:",
259
- " workbench subjects preview --path PATH [--dir DIR] [--subject ID] [--output PATH|-] [--json]",
264
+ " workbench candidates preview --path PATH [--dir DIR] [--candidate CANDIDATE_ID] [--output PATH|-] [--json]",
260
265
  "",
261
- "Preview a file from a local subject snapshot.",
266
+ "Preview a file from a local candidate snapshot.",
262
267
  "",
263
268
  "Examples:",
264
- " workbench subjects preview --subject subj_123 --path SKILL.md",
265
- " workbench subjects preview --subject subj_123 --path SKILL.md --output -",
269
+ " workbench candidates preview --candidate candidate_123 --path SKILL.md",
270
+ " workbench candidates preview --candidate candidate_123 --path SKILL.md --output -",
266
271
  ]),
267
272
  clone: [
268
273
  "Usage:",
269
- " workbench clone OWNER/BENCHMARK[@REF] [DIR] [--dry-run] [--json]",
274
+ " workbench clone OWNER/BENCHMARK [DIR] [--dry-run] [--json]",
270
275
  "",
271
- "Download a hosted benchmark project into a local Workbench project and write .workbench/origin.json.",
276
+ "Clone hosted source and runtime history into a local Workbench project and remember it as the remote.",
272
277
  "",
273
278
  "Examples:",
274
279
  " workbench clone openbench/invoice-review",
275
- " workbench clone openbench/invoice-review@v1 ./invoice-review --dry-run --json",
280
+ " workbench clone openbench/invoice-review ./invoice-review --dry-run --json",
276
281
  ],
277
- remote: withSourceDirectoryHelp([
278
- "Usage:",
279
- " workbench remote show [--dir DIR] [--json]",
280
- " workbench remote add origin OWNER/BENCHMARK[@REF] [--dir DIR] [--json]",
281
- " workbench remote set-url origin OWNER/BENCHMARK[@REF] [--dir DIR] [--json]",
282
- " workbench remote remove origin [--dir DIR] [--json]",
283
- "",
284
- "Manage the project origin used by fetch, pull, and push.",
285
- "",
286
- "Examples:",
287
- " workbench remote show --json",
288
- " workbench remote add origin openbench/invoice-review@v1",
289
- ]),
290
- "remote show": withSourceDirectoryHelp([
291
- "Usage:",
292
- " workbench remote show [--dir DIR] [--json]",
293
- "",
294
- "Show the configured origin.",
295
- "",
296
- "Examples:",
297
- " workbench remote show",
298
- " workbench remote show --json",
299
- ]),
300
- "remote add": withSourceDirectoryHelp([
301
- "Usage:",
302
- " workbench remote add origin OWNER/BENCHMARK[@REF] [--dir DIR] [--json]",
303
- "",
304
- "Set the project origin.",
305
- "",
306
- "Examples:",
307
- " workbench remote add origin openbench/invoice-review@v1",
308
- " workbench remote add origin openbench/invoice-review --json",
309
- ]),
310
- "remote set-url": withSourceDirectoryHelp([
311
- "Usage:",
312
- " workbench remote set-url origin OWNER/BENCHMARK[@REF] [--dir DIR] [--json]",
313
- "",
314
- "Replace the project origin.",
315
- "",
316
- "Examples:",
317
- " workbench remote set-url origin openbench/invoice-review@v2",
318
- " workbench remote set-url origin openbench/invoice-review@v2 --json",
319
- ]),
320
- "remote remove": withSourceDirectoryHelp([
321
- "Usage:",
322
- " workbench remote remove origin [--dir DIR] [--json]",
323
- "",
324
- "Remove the project origin.",
325
- "",
326
- "Examples:",
327
- " workbench remote remove origin",
328
- " workbench remote remove origin --json",
329
- ]),
330
- fetch: withSourceDirectoryHelp([
331
- "Usage:",
332
- " workbench fetch [--dir DIR] [--json]",
333
- "",
334
- "Download remote source into .workbench/fetch without changing project files.",
335
- "",
336
- "Examples:",
337
- " workbench fetch",
338
- " workbench fetch --json",
339
- ]),
340
282
  pull: withSourceDirectoryHelp([
341
283
  "Usage:",
342
284
  " workbench pull [--dir DIR] [--dry-run] [--json]",
343
285
  "",
344
- "Update managed project source files from the configured origin.",
286
+ "Pull source and runtime history from the remembered remote into the local project.",
345
287
  "",
346
288
  "Examples:",
347
289
  " workbench pull --dry-run",
@@ -349,13 +291,13 @@ const commandHelp = Object.fromEntries(Object.entries({
349
291
  ]),
350
292
  push: withSourceDirectoryHelp([
351
293
  "Usage:",
352
- " workbench push [SOURCE] [--dir DIR] [--tag TAG] [--visibility public|private] [--dry-run] [--json]",
294
+ " workbench push [SOURCE] [--dir DIR] [--visibility public|private] [--dry-run] [--json]",
353
295
  "",
354
- "Create or update the hosted benchmark version from local project source and write .workbench/origin.json.",
296
+ "Push local project source and runtime history to the remembered remote, or create one when the project has not been pushed before.",
355
297
  "",
356
298
  "Examples:",
357
- " workbench push --tag v1 --dry-run",
358
- " workbench push subjects/codex --visibility private --json",
299
+ " workbench push --dry-run",
300
+ " workbench push candidates/current --visibility private --json",
359
301
  ]),
360
302
  login: [
361
303
  "Usage:",
@@ -516,307 +458,7 @@ const commandHelp = Object.fromEntries(Object.entries({
516
458
  " workbench auth disconnect codex --local-only",
517
459
  " workbench auth disconnect claude --profile default --json",
518
460
  ],
519
- cloud: [
520
- "Usage:",
521
- " workbench cloud <command> [options]",
522
- "",
523
- "Hosted Workbench Cloud execution and resource commands.",
524
- "",
525
- "Commands:",
526
- " workbench cloud eval [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base SUBJECT_ID] [--samples N] [--watch] [--dry-run] [--json]",
527
- " workbench cloud improve [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base SUBJECT_ID] [--optimizer OPTIMIZER_YAML] [--budget N] [--samples N] [--watch] [--dry-run] [--json]",
528
- " workbench cloud open [OWNER/BENCHMARK[@REF]|RUN_ID|SUBJECT_ID] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--no-open] [--json]",
529
- " workbench cloud watch RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--interval-ms N] [--timeout-ms N] [--json]",
530
- " workbench cloud logs RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
531
- " workbench cloud star OWNER/BENCHMARK [--json]",
532
- " workbench cloud unstar OWNER/BENCHMARK [--json]",
533
- " workbench cloud benchmarks|runs|subjects <command> [options]",
534
- "",
535
- "Examples:",
536
- " workbench cloud eval subjects/codex --benchmark openbench/invoice-review@v1 --dry-run --json",
537
- " workbench cloud runs list --benchmark openbench/invoice-review --json",
538
- ],
539
- "cloud star": [
540
- "Usage:",
541
- " workbench cloud star OWNER/BENCHMARK [--json]",
542
- "",
543
- "Star a hosted benchmark.",
544
- "",
545
- "Examples:",
546
- " workbench cloud star openbench/invoice-review",
547
- " workbench cloud star openbench/invoice-review --json",
548
- ],
549
- "cloud unstar": [
550
- "Usage:",
551
- " workbench cloud unstar OWNER/BENCHMARK [--json]",
552
- "",
553
- "Remove your star from a hosted benchmark.",
554
- "",
555
- "Examples:",
556
- " workbench cloud unstar openbench/invoice-review",
557
- " workbench cloud unstar openbench/invoice-review --json",
558
- ],
559
- "cloud eval": withSourceDirectoryHelp(withLifecycleHelp([
560
- "Usage:",
561
- " workbench cloud eval [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base SUBJECT_ID] [--samples N] [--watch] [--dry-run] [--json]",
562
- "",
563
- "Submit subject files to Workbench Cloud and run hosted evaluation.",
564
- "",
565
- "Examples:",
566
- " workbench cloud eval subjects/codex --benchmark openbench/invoice-review@v1 --dry-run --json",
567
- " workbench cloud eval --benchmark openbench/invoice-review --watch",
568
- ], hostedWatchLifecycleHelp)),
569
- "cloud improve": withSourceDirectoryHelp(withLifecycleHelp([
570
- "Usage:",
571
- " workbench cloud improve [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base SUBJECT_ID] [--optimizer OPTIMIZER_YAML] [--budget N] [--samples N] [--watch] [--dry-run] [--json]",
572
- "",
573
- "Run hosted subject improvement.",
574
- "",
575
- "Examples:",
576
- " workbench cloud improve --optimizer optimizers/codex.yaml --benchmark openbench/invoice-review --dry-run",
577
- " workbench cloud improve subjects/codex --optimizer optimizers/codex.yaml --watch --json",
578
- ], hostedWatchLifecycleHelp)),
579
- "cloud open": [
580
- "Usage:",
581
- " workbench cloud open [OWNER/BENCHMARK[@REF]|RUN_ID|SUBJECT_ID] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--no-open] [--json]",
582
- "",
583
- "Print and open the hosted Workbench URL.",
584
- "",
585
- "Examples:",
586
- " workbench cloud open openbench/invoice-review --no-open",
587
- " workbench cloud open run_123 --benchmark openbench/invoice-review --json",
588
- ],
589
- "cloud watch": withSourceDirectoryHelp(withLifecycleHelp([
590
- "Usage:",
591
- " workbench cloud watch RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--interval-ms N] [--timeout-ms N] [--json]",
592
- "",
593
- "Poll a hosted run until it finishes.",
594
- "",
595
- "Examples:",
596
- " workbench cloud watch run_123 --benchmark openbench/invoice-review",
597
- " workbench cloud watch run_123 --interval-ms 5000 --timeout-ms 600000 --json",
598
- ], hostedWatchLifecycleHelp)),
599
- "cloud logs": withSourceDirectoryHelp([
600
- "Usage:",
601
- " workbench cloud logs RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
602
- "",
603
- "Show hosted run job statuses and errors.",
604
- "",
605
- "Examples:",
606
- " workbench cloud logs run_123 --benchmark openbench/invoice-review",
607
- " workbench cloud logs run_123 --json",
608
- ]),
609
- "cloud benchmarks": [
610
- "Usage:",
611
- " workbench cloud benchmarks <command> [options]",
612
- "",
613
- "Hosted benchmark resource commands.",
614
- "",
615
- "Commands:",
616
- " workbench cloud benchmarks list [--json]",
617
- " workbench cloud benchmarks show OWNER/BENCHMARK [--json]",
618
- " workbench cloud benchmarks versions OWNER/BENCHMARK [--json]",
619
- " workbench cloud benchmarks starred [--json]",
620
- " workbench cloud benchmarks delete OWNER/BENCHMARK [--dir DIR] [--dry-run] [--json]",
621
- "",
622
- "Examples:",
623
- " workbench cloud benchmarks list --json",
624
- " workbench cloud benchmarks show openbench/invoice-review",
625
- ],
626
- "cloud runs": [
627
- "Usage:",
628
- " workbench cloud runs <command> [options]",
629
- "",
630
- "Hosted run resource commands.",
631
- "",
632
- "Commands:",
633
- " workbench cloud runs list [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
634
- " workbench cloud runs show RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
635
- " workbench cloud runs cancel RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
636
- "",
637
- "Examples:",
638
- " workbench cloud runs list --benchmark openbench/invoice-review --json",
639
- " workbench cloud runs show run_123 --benchmark openbench/invoice-review",
640
- ],
641
- "cloud subjects": [
642
- "Usage:",
643
- " workbench cloud subjects <command> [options]",
644
- "",
645
- "Hosted subject resource commands.",
646
- "",
647
- "Commands:",
648
- " workbench cloud subjects list [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
649
- " workbench cloud subjects show SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
650
- " workbench cloud subjects files SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
651
- " workbench cloud subjects preview SUBJECT_ID --path PATH [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--output PATH|-] [--json]",
652
- " workbench cloud subjects pull SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--out DIR] [--json]",
653
- " workbench cloud subjects publish SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
654
- " workbench cloud subjects unpublish SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
655
- "",
656
- "Examples:",
657
- " workbench cloud subjects list --benchmark openbench/invoice-review --json",
658
- " workbench cloud subjects preview subj_123 --path SKILL.md --output -",
659
- ],
660
461
  }).map(([key, lines]) => [key, lines.join("\n")]));
661
462
  export function commandUsage(commandPath) {
662
- return commandHelp[commandPath] ?? cloudNestedCommandUsage(commandPath);
663
- }
664
- const hostedCommandHelp = Object.fromEntries(Object.entries({
665
- "benchmarks list": [
666
- "Usage:",
667
- " workbench cloud benchmarks list [--json]",
668
- "",
669
- "List public hosted benchmarks.",
670
- "",
671
- "Examples:",
672
- " workbench cloud benchmarks list",
673
- " workbench cloud benchmarks list --json",
674
- ],
675
- "benchmarks show": [
676
- "Usage:",
677
- " workbench cloud benchmarks show OWNER/BENCHMARK [--json]",
678
- "",
679
- "Show one hosted benchmark.",
680
- "",
681
- "Examples:",
682
- " workbench cloud benchmarks show openbench/invoice-review",
683
- " workbench cloud benchmarks show openbench/invoice-review --json",
684
- ],
685
- "benchmarks versions": [
686
- "Usage:",
687
- " workbench cloud benchmarks versions OWNER/BENCHMARK [--json]",
688
- "",
689
- "List hosted benchmark versions.",
690
- "",
691
- "Examples:",
692
- " workbench cloud benchmarks versions openbench/invoice-review",
693
- " workbench cloud benchmarks versions openbench/invoice-review --json",
694
- ],
695
- "benchmarks starred": [
696
- "Usage:",
697
- " workbench cloud benchmarks starred [--json]",
698
- "",
699
- "List benchmarks starred by the current user.",
700
- "",
701
- "Examples:",
702
- " workbench cloud benchmarks starred",
703
- " workbench cloud benchmarks starred --json",
704
- ],
705
- "benchmarks delete": [
706
- "Usage:",
707
- " workbench cloud benchmarks delete OWNER/BENCHMARK [--dir DIR] [--dry-run] [--json]",
708
- "",
709
- "Delete a hosted benchmark project that you own.",
710
- "",
711
- "Examples:",
712
- " workbench cloud benchmarks delete alice/invoice-review --dry-run",
713
- " workbench cloud benchmarks delete alice/invoice-review --json",
714
- ],
715
- "runs list": [
716
- "Usage:",
717
- " workbench cloud runs list [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
718
- "",
719
- "List hosted runs.",
720
- "",
721
- "Examples:",
722
- " workbench cloud runs list --benchmark openbench/invoice-review",
723
- " workbench cloud runs list --benchmark openbench/invoice-review --json",
724
- ],
725
- "runs show": [
726
- "Usage:",
727
- " workbench cloud runs show RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
728
- "",
729
- "Show one hosted run.",
730
- "",
731
- "Examples:",
732
- " workbench cloud runs show run_123 --benchmark openbench/invoice-review",
733
- " workbench cloud runs show run_123 --json",
734
- ],
735
- "runs cancel": [
736
- "Usage:",
737
- " workbench cloud runs cancel RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
738
- "",
739
- "Cancel a hosted run.",
740
- "",
741
- "Examples:",
742
- " workbench cloud runs cancel run_123 --benchmark openbench/invoice-review",
743
- " workbench cloud runs cancel run_123 --json",
744
- ],
745
- "subjects list": [
746
- "Usage:",
747
- " workbench cloud subjects list [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
748
- "",
749
- "List hosted subjects.",
750
- "",
751
- "Examples:",
752
- " workbench cloud subjects list --benchmark openbench/invoice-review",
753
- " workbench cloud subjects list --json",
754
- ],
755
- "subjects show": [
756
- "Usage:",
757
- " workbench cloud subjects show SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
758
- "",
759
- "Show one hosted subject.",
760
- "",
761
- "Examples:",
762
- " workbench cloud subjects show subj_123 --benchmark openbench/invoice-review",
763
- " workbench cloud subjects show subj_123 --json",
764
- ],
765
- "subjects files": [
766
- "Usage:",
767
- " workbench cloud subjects files SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
768
- "",
769
- "List files in a hosted subject snapshot.",
770
- "",
771
- "Examples:",
772
- " workbench cloud subjects files subj_123 --benchmark openbench/invoice-review",
773
- " workbench cloud subjects files subj_123 --json",
774
- ],
775
- "subjects preview": [
776
- "Usage:",
777
- " workbench cloud subjects preview SUBJECT_ID --path PATH [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--output PATH|-] [--json]",
778
- "",
779
- "Preview a file from a hosted subject snapshot.",
780
- "",
781
- "Examples:",
782
- " workbench cloud subjects preview subj_123 --path SKILL.md --output -",
783
- " workbench cloud subjects preview subj_123 --path SKILL.md --benchmark openbench/invoice-review",
784
- ],
785
- "subjects pull": [
786
- "Usage:",
787
- " workbench cloud subjects pull SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--out DIR] [--json]",
788
- "",
789
- "Download hosted subject files.",
790
- "",
791
- "Examples:",
792
- " workbench cloud subjects pull subj_123 --out ./subject-files",
793
- " workbench cloud subjects pull subj_123 --benchmark openbench/invoice-review --json",
794
- ],
795
- "subjects publish": [
796
- "Usage:",
797
- " workbench cloud subjects publish SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
798
- "",
799
- "Make a hosted subject public.",
800
- "",
801
- "Examples:",
802
- " workbench cloud subjects publish subj_123 --benchmark openbench/invoice-review",
803
- " workbench cloud subjects publish subj_123 --json",
804
- ],
805
- "subjects unpublish": [
806
- "Usage:",
807
- " workbench cloud subjects unpublish SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
808
- "",
809
- "Make a hosted subject private.",
810
- "",
811
- "Examples:",
812
- " workbench cloud subjects unpublish subj_123 --benchmark openbench/invoice-review",
813
- " workbench cloud subjects unpublish subj_123 --json",
814
- ],
815
- }).map(([key, lines]) => [key, lines.join("\n")]));
816
- function cloudNestedCommandUsage(commandPath) {
817
- if (!commandPath.startsWith("cloud ")) {
818
- return null;
819
- }
820
- const withoutCloud = commandPath.slice("cloud ".length);
821
- return hostedCommandHelp[withoutCloud] ?? null;
463
+ return commandHelp[commandPath] ?? null;
822
464
  }