@workbench-ai/workbench 0.0.48 → 0.0.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -55,7 +55,7 @@ export async function resolveWorkbenchAdaptersForProject(dir, spec) {
55
55
  discovered = true;
56
56
  continue;
57
57
  }
58
- throw new Error(`Adapter ${id} is referenced by benchmark/subject/optimizer YAML but is not installed. List its source under adapters in the YAML file that uses it.`);
58
+ throw new Error(`Adapter ${id} is referenced by benchmark/candidate YAML but is not installed. List its source under adapters in the YAML file that uses it.`);
59
59
  }
60
60
  }
61
61
  assertWorkbenchAdapterOperationSupport(rootAdapterOperationRequirements(spec), [...adapters.values()].map((adapter) => adapter.manifest));
@@ -258,8 +258,8 @@ function rootAdapterOperationRequirements(spec) {
258
258
  return [
259
259
  { invocation: engineResolveInvocationForSpec(spec), operation: "engine.resolve" },
260
260
  { invocation: spec.engineRun, operation: "engine.run" },
261
- ...(spec.improve ? [{ invocation: spec.improve, operation: "optimizer.improve" }] : []),
262
- { invocation: spec.run, operation: "subject.run" },
261
+ ...(spec.improve ? [{ invocation: spec.improve, operation: "candidate.improve" }] : []),
262
+ { invocation: spec.run, operation: "candidate.run" },
263
263
  ];
264
264
  }
265
265
  function rootAdapterInvocations(spec) {
@@ -1,6 +1,6 @@
1
1
  import type { SurfaceSnapshotFile } from "@workbench-ai/workbench-core";
2
2
  import type { LocalProjectSource } from "./project-source.js";
3
3
  export declare function localBenchmarkFingerprint(project: LocalProjectSource): string;
4
- export declare function localSubjectFingerprint(project: LocalProjectSource): string;
4
+ export declare function localCandidateFingerprint(project: LocalProjectSource): string;
5
5
  export declare function benchmarkFingerprintForFiles(files: readonly SurfaceSnapshotFile[]): string;
6
6
  //# sourceMappingURL=benchmark-fingerprint.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"benchmark-fingerprint.d.ts","sourceRoot":"","sources":["../src/benchmark-fingerprint.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,8BAA8B,CAAC;AAExE,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AAE9D,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,CAO7E;AAED,wBAAgB,uBAAuB,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,CAQ3E;AAED,wBAAgB,4BAA4B,CAC1C,KAAK,EAAE,SAAS,mBAAmB,EAAE,GACpC,MAAM,CAgBR"}
1
+ {"version":3,"file":"benchmark-fingerprint.d.ts","sourceRoot":"","sources":["../src/benchmark-fingerprint.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,8BAA8B,CAAC;AAExE,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AAE9D,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,CAO7E;AAED,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,CAM7E;AAED,wBAAgB,4BAA4B,CAC1C,KAAK,EAAE,SAAS,mBAAmB,EAAE,GACpC,MAAM,CAgBR"}
@@ -7,13 +7,11 @@ export function localBenchmarkFingerprint(project) {
7
7
  ...benchmarkAdapterFiles(project),
8
8
  ]);
9
9
  }
10
- export function localSubjectFingerprint(project) {
10
+ export function localCandidateFingerprint(project) {
11
11
  const hash = createHash("sha256");
12
- hash.update("workbench-subject-v1\0");
13
- hash.update(project.subjectSource);
14
- hash.update("\0runner\0");
15
- hash.update(JSON.stringify(project.spec.run));
16
- hashSurfaceFiles(hash, project.subjectFiles);
12
+ hash.update("workbench-candidate-v1\0");
13
+ hash.update(project.candidateSource);
14
+ hashSurfaceFiles(hash, project.candidateFiles);
17
15
  return hash.digest("hex");
18
16
  }
19
17
  export function benchmarkFingerprintForFiles(files) {
@@ -1 +1 @@
1
- {"version":3,"file":"command-model.d.ts","sourceRoot":"","sources":["../src/command-model.ts"],"names":[],"mappings":"AAOA,eAAO,MAAM,6BAA6B,sHAC2E,CAAC;AAOtH,eAAO,MAAM,2BAA2B,QAA0C,CAAC;AAuFnF,eAAO,MAAM,SAAS,QAAuB,CAAC;AA8jB9C,wBAAgB,YAAY,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAE/D"}
1
+ {"version":3,"file":"command-model.d.ts","sourceRoot":"","sources":["../src/command-model.ts"],"names":[],"mappings":"AAOA,eAAO,MAAM,6BAA6B,sHAC2E,CAAC;AAOtH,eAAO,MAAM,2BAA2B,QAA0C,CAAC;AA0FnF,eAAO,MAAM,SAAS,QAAuB,CAAC;AAolB9C,wBAAgB,YAAY,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAE/D"}
@@ -1,8 +1,8 @@
1
1
  const sourceDirectoryHelp = [
2
2
  "Directory:",
3
- " Run from a Workbench project containing benchmark.yaml plus subjects/<name>/subject.yaml.",
4
- " Subject manifests declare their files with files.path, usually files beside subject.yaml.",
5
- " Pass --dir DIR or pass benchmark.yaml, subjects/<name>, or subjects/<name>/subject.yaml as SOURCE.",
3
+ " Run from a Workbench project containing benchmark.yaml plus candidates/<name>/candidate.yaml.",
4
+ " Candidate manifests declare their files with files.path, usually files beside candidate.yaml.",
5
+ " Pass --dir DIR or pass benchmark.yaml, candidates/<name>, or candidates/<name>/candidate.yaml as SOURCE.",
6
6
  ];
7
7
  export const LOCAL_DEV_OPEN_LIFECYCLE_NOTE = "Keep this command running while using the local web view; Ctrl-C stops the server and the page will stop working.";
8
8
  const hostedWatchLifecycleNoteLines = [
@@ -35,18 +35,19 @@ const rootLines = [
35
35
  " workbench adapters test ID|SOURCE [--dir DIR] [--request PATH] [--output DIR] [--json]",
36
36
  "",
37
37
  "Local runs:",
38
- " workbench eval [SOURCE] [--dir DIR] [--subject ID] [--samples N] [--json]",
39
- " workbench improve [SOURCE] [--dir DIR] [--from SUBJECT_ID] [--optimizer OPTIMIZER_YAML] [--budget N] [--samples N] [--json]",
38
+ " workbench eval [SOURCE] [--dir DIR] [--candidate CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--json]",
39
+ " workbench improve [SOURCE] [--dir DIR] [--from CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--json]",
40
+ " workbench retry TARGET_ID [--dir DIR] [--json]",
40
41
  " workbench open [SOURCE] [--dir DIR] [--run RUN_ID] [--host HOST] [--port N] [--no-open] [--json]",
41
- " workbench restore [--dir DIR] [--subject ID] [--dry-run] [--yes] [--json]",
42
+ " workbench restore [--dir DIR] [--candidate CANDIDATE_ID] [--dry-run] [--yes] [--json]",
42
43
  "",
43
44
  "Local inspection:",
44
45
  " workbench runs list [--dir DIR] [--json]",
45
46
  " workbench runs show RUN_ID [--dir DIR] [--json]",
46
- " workbench subjects list [--dir DIR] [--json]",
47
- " workbench subjects show SUBJECT_ID [--dir DIR] [--json]",
48
- " workbench subjects files [--dir DIR] [--subject ID] [--json]",
49
- " workbench subjects preview --path PATH [--dir DIR] [--subject ID] [--output PATH|-] [--json]",
47
+ " workbench candidates list [--dir DIR] [--json]",
48
+ " workbench candidates show CANDIDATE_ID [--dir DIR] [--json]",
49
+ " workbench candidates files [--dir DIR] [--candidate CANDIDATE_ID] [--json]",
50
+ " workbench candidates preview --path PATH [--dir DIR] [--candidate CANDIDATE_ID] [--output PATH|-] [--json]",
50
51
  " workbench traces collect [--providers codex,claude] [--since 30d] [--workspace DIR] [--limit N] [--json]",
51
52
  " workbench traces list [--providers codex,claude] [--since 30d] [--workspace DIR] [--limit N] [--json]",
52
53
  " workbench traces show TRACE_ID [--providers codex,claude] [--since 30d] [--workspace DIR] [--json]",
@@ -65,14 +66,15 @@ const rootLines = [
65
66
  " workbench push [SOURCE] [--dir DIR] [--tag TAG] [--visibility public|private] [--dry-run] [--json]",
66
67
  "",
67
68
  "Hosted runs and resources:",
68
- " workbench cloud eval [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base SUBJECT_ID] [--samples N] [--watch] [--dry-run] [--json]",
69
- " workbench cloud improve [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base SUBJECT_ID] [--optimizer OPTIMIZER_YAML] [--budget N] [--samples N] [--watch] [--dry-run] [--json]",
70
- " workbench cloud open [OWNER/BENCHMARK[@REF]|RUN_ID|SUBJECT_ID] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--no-open] [--json]",
69
+ " workbench cloud eval [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
70
+ " workbench cloud improve [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
71
+ " workbench cloud retry TARGET_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--watch] [--interval-ms N] [--timeout-ms N] [--json]",
72
+ " workbench cloud open [OWNER/BENCHMARK[@REF]|RUN_ID|CANDIDATE_ID] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--no-open] [--json]",
71
73
  " workbench cloud watch RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--interval-ms N] [--timeout-ms N] [--json]",
72
74
  " workbench cloud logs RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
73
75
  " workbench cloud star OWNER/BENCHMARK [--json]",
74
76
  " workbench cloud unstar OWNER/BENCHMARK [--json]",
75
- " workbench cloud benchmarks|runs|subjects <command> [options]",
77
+ " workbench cloud benchmarks|runs|candidates <command> [options]",
76
78
  "",
77
79
  "Auth:",
78
80
  " workbench auth connect ADAPTER[/SLOT] [--dir DIR] [--method METHOD] [--profile PROFILE] [--profile-root DIR] [--local-only] [--json]",
@@ -80,11 +82,12 @@ const rootLines = [
80
82
  "",
81
83
  "Examples:",
82
84
  " workbench init --skill invoice-review --agent codex",
83
- " workbench eval subjects/codex --samples 1",
85
+ " workbench eval candidates/current --samples 1",
84
86
  " workbench improve --budget 2 --samples 1",
87
+ " workbench retry eval_local_123 --json",
85
88
  " workbench open --no-open --json",
86
89
  " workbench push --tag v1",
87
- " workbench cloud eval subjects/codex --benchmark openbench/invoice-review@v1 --watch",
90
+ " workbench cloud eval candidates/current --benchmark openbench/invoice-review@v1 --watch",
88
91
  "",
89
92
  "Environment:",
90
93
  " WORKBENCH_API_URL sets the hosted Workbench API base URL.",
@@ -118,7 +121,7 @@ const commandHelp = Object.fromEntries(Object.entries({
118
121
  " workbench init [DIR] --skill NAME --agent ADAPTER [--from PATH] [--example] [--json]",
119
122
  " workbench init [DIR] --command NAME [--from PATH] [--example] [--json]",
120
123
  "",
121
- "Scaffold a local Workbench project. benchmark.yaml selects an engine; the built-in workbench engine owns tasks, environment, and scoring under engine.with. subjects/<name>/subject.yaml owns files.path plus optional prepare and run behavior. optimizers/<name>.yaml owns improvement behavior.",
124
+ "Scaffold a local Workbench project. benchmark.yaml selects an engine; the built-in workbench engine owns tasks, environment, and scoring under engine.with. candidates/<name>/candidate.yaml owns files.path plus optional prepare and run behavior. Candidate manifests own improvement behavior.",
122
125
  "",
123
126
  "Examples:",
124
127
  " workbench init --skill invoice-review --agent codex",
@@ -128,31 +131,41 @@ const commandHelp = Object.fromEntries(Object.entries({
128
131
  "Usage:",
129
132
  " workbench check [SOURCE] [--dir DIR] [--json]",
130
133
  "",
131
- "Validate benchmark.yaml, one subject manifest, and an optional optimizer manifest.",
134
+ "Validate benchmark.yaml, one candidate manifest.",
132
135
  "",
133
136
  "Examples:",
134
137
  " workbench check",
135
- " workbench check subjects/codex --json",
138
+ " workbench check candidates/current --json",
136
139
  ]),
137
140
  eval: withSourceDirectoryHelp([
138
141
  "Usage:",
139
- " workbench eval [SOURCE] [--dir DIR] [--subject ID] [--samples N] [--json]",
142
+ " workbench eval [SOURCE] [--dir DIR] [--candidate CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--json]",
140
143
  "",
141
- "Run the selected local subject against the current benchmark and record attempts, results, traces, artifacts, and a run record under .workbench/runtime.",
144
+ "Ensure the selected local candidate run has an evaluation for the current benchmark. Completed work is reused only when candidate, run configuration, source, adapters, benchmark, and samples match; use --rerun to intentionally spend again.",
142
145
  "",
143
146
  "Examples:",
144
147
  " workbench eval --samples 1",
145
- " workbench eval subjects/codex --samples 2 --json",
148
+ " workbench eval candidates/current --samples 2 --json",
146
149
  ]),
147
150
  improve: withSourceDirectoryHelp([
148
151
  "Usage:",
149
- " workbench improve [SOURCE] [--dir DIR] [--from SUBJECT_ID] [--optimizer OPTIMIZER_YAML] [--budget N] [--samples N] [--json]",
152
+ " workbench improve [SOURCE] [--dir DIR] [--from CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--json]",
150
153
  "",
151
- "Run local subject improvement. By default, Workbench improves the current subject. If it has not been evaluated yet, Workbench evaluates it first. Use --from to improve an explicit subject id.",
154
+ "Ensure a local candidate improvement exists for the selected base, run, budget, and samples. Completed work is reused only when base, run configuration, source, adapters, benchmark, budget, and samples match; use --rerun to intentionally spend again.",
152
155
  "",
153
156
  "Examples:",
154
157
  " workbench improve --budget 1 --samples 1",
155
- " workbench improve subjects/codex --from subj_123 --optimizer optimizers/codex.yaml --json",
158
+ " workbench improve candidates/current --from candidate_123 --json",
159
+ ]),
160
+ retry: withSourceDirectoryHelp([
161
+ "Usage:",
162
+ " workbench retry TARGET_ID [--dir DIR] [--json]",
163
+ "",
164
+ "Retry a failed local run or evaluation by replaying its recorded candidate, configuration, sample count, and improve budget.",
165
+ "",
166
+ "Examples:",
167
+ " workbench retry eval_local_123 --json",
168
+ " workbench retry run_local_123 --dir ./my-benchmark",
156
169
  ]),
157
170
  open: withSourceDirectoryHelp(withLifecycleHelp([
158
171
  "Usage:",
@@ -166,13 +179,13 @@ const commandHelp = Object.fromEntries(Object.entries({
166
179
  ], localOpenLifecycleHelp)),
167
180
  restore: withSourceDirectoryHelp([
168
181
  "Usage:",
169
- " workbench restore [--dir DIR] [--subject ID] [--dry-run] [--yes] [--json]",
182
+ " workbench restore [--dir DIR] [--candidate CANDIDATE_ID] [--dry-run] [--yes] [--json]",
170
183
  "",
171
- "Restore a local subject snapshot into the subject files directory.",
184
+ "Restore a local candidate snapshot into the candidate files directory.",
172
185
  "",
173
186
  "Examples:",
174
- " workbench restore --subject subj_123 --dry-run",
175
- " workbench restore --subject subj_123 --yes",
187
+ " workbench restore --candidate candidate_123 --dry-run",
188
+ " workbench restore --candidate candidate_123 --yes",
176
189
  ]),
177
190
  runs: [
178
191
  "Usage:",
@@ -208,61 +221,61 @@ const commandHelp = Object.fromEntries(Object.entries({
208
221
  " workbench runs show eval_local_123",
209
222
  " workbench runs show eval_local_123 --json",
210
223
  ]),
211
- subjects: [
224
+ candidates: [
212
225
  "Usage:",
213
- " workbench subjects <command> [options]",
226
+ " workbench candidates <command> [options]",
214
227
  "",
215
- "Inspect local subjects.",
228
+ "Inspect local candidates.",
216
229
  "",
217
230
  "Commands:",
218
- " workbench subjects list [--dir DIR] [--json]",
219
- " workbench subjects show SUBJECT_ID [--dir DIR] [--json]",
220
- " workbench subjects files [--dir DIR] [--subject ID] [--json]",
221
- " workbench subjects preview --path PATH [--dir DIR] [--subject ID] [--output PATH|-] [--json]",
231
+ " workbench candidates list [--dir DIR] [--json]",
232
+ " workbench candidates show CANDIDATE_ID [--dir DIR] [--json]",
233
+ " workbench candidates files [--dir DIR] [--candidate CANDIDATE_ID] [--json]",
234
+ " workbench candidates preview --path PATH [--dir DIR] [--candidate CANDIDATE_ID] [--output PATH|-] [--json]",
222
235
  "",
223
236
  "Examples:",
224
- " workbench subjects list --json",
225
- " workbench subjects preview --subject subj_123 --path SKILL.md --output -",
237
+ " workbench candidates list --json",
238
+ " workbench candidates preview --candidate candidate_123 --path SKILL.md --output -",
226
239
  ],
227
- "subjects list": withSourceDirectoryHelp([
240
+ "candidates list": withSourceDirectoryHelp([
228
241
  "Usage:",
229
- " workbench subjects list [--dir DIR] [--json]",
242
+ " workbench candidates list [--dir DIR] [--json]",
230
243
  "",
231
- "List local subjects.",
244
+ "List local candidates.",
232
245
  "",
233
246
  "Examples:",
234
- " workbench subjects list",
235
- " workbench subjects list --json",
247
+ " workbench candidates list",
248
+ " workbench candidates list --json",
236
249
  ]),
237
- "subjects show": withSourceDirectoryHelp([
250
+ "candidates show": withSourceDirectoryHelp([
238
251
  "Usage:",
239
- " workbench subjects show SUBJECT_ID [--dir DIR] [--json]",
252
+ " workbench candidates show CANDIDATE_ID [--dir DIR] [--json]",
240
253
  "",
241
- "Show one local subject.",
254
+ "Show one local candidate.",
242
255
  "",
243
256
  "Examples:",
244
- " workbench subjects show subj_123",
245
- " workbench subjects show subj_123 --json",
257
+ " workbench candidates show candidate_123",
258
+ " workbench candidates show candidate_123 --json",
246
259
  ]),
247
- "subjects files": withSourceDirectoryHelp([
260
+ "candidates files": withSourceDirectoryHelp([
248
261
  "Usage:",
249
- " workbench subjects files [--dir DIR] [--subject ID] [--json]",
262
+ " workbench candidates files [--dir DIR] [--candidate CANDIDATE_ID] [--json]",
250
263
  "",
251
- "List files in a local subject snapshot.",
264
+ "List files in a local candidate snapshot.",
252
265
  "",
253
266
  "Examples:",
254
- " workbench subjects files --subject subj_123",
255
- " workbench subjects files --subject subj_123 --json",
267
+ " workbench candidates files --candidate candidate_123",
268
+ " workbench candidates files --candidate candidate_123 --json",
256
269
  ]),
257
- "subjects preview": withSourceDirectoryHelp([
270
+ "candidates preview": withSourceDirectoryHelp([
258
271
  "Usage:",
259
- " workbench subjects preview --path PATH [--dir DIR] [--subject ID] [--output PATH|-] [--json]",
272
+ " workbench candidates preview --path PATH [--dir DIR] [--candidate CANDIDATE_ID] [--output PATH|-] [--json]",
260
273
  "",
261
- "Preview a file from a local subject snapshot.",
274
+ "Preview a file from a local candidate snapshot.",
262
275
  "",
263
276
  "Examples:",
264
- " workbench subjects preview --subject subj_123 --path SKILL.md",
265
- " workbench subjects preview --subject subj_123 --path SKILL.md --output -",
277
+ " workbench candidates preview --candidate candidate_123 --path SKILL.md",
278
+ " workbench candidates preview --candidate candidate_123 --path SKILL.md --output -",
266
279
  ]),
267
280
  clone: [
268
281
  "Usage:",
@@ -355,7 +368,7 @@ const commandHelp = Object.fromEntries(Object.entries({
355
368
  "",
356
369
  "Examples:",
357
370
  " workbench push --tag v1 --dry-run",
358
- " workbench push subjects/codex --visibility private --json",
371
+ " workbench push candidates/current --visibility private --json",
359
372
  ]),
360
373
  login: [
361
374
  "Usage:",
@@ -523,17 +536,19 @@ const commandHelp = Object.fromEntries(Object.entries({
523
536
  "Hosted Workbench Cloud execution and resource commands.",
524
537
  "",
525
538
  "Commands:",
526
- " workbench cloud eval [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base SUBJECT_ID] [--samples N] [--watch] [--dry-run] [--json]",
527
- " workbench cloud improve [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base SUBJECT_ID] [--optimizer OPTIMIZER_YAML] [--budget N] [--samples N] [--watch] [--dry-run] [--json]",
528
- " workbench cloud open [OWNER/BENCHMARK[@REF]|RUN_ID|SUBJECT_ID] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--no-open] [--json]",
539
+ " workbench cloud eval [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
540
+ " workbench cloud improve [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
541
+ " workbench cloud retry TARGET_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--watch] [--interval-ms N] [--timeout-ms N] [--json]",
542
+ " workbench cloud open [OWNER/BENCHMARK[@REF]|RUN_ID|CANDIDATE_ID] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--no-open] [--json]",
529
543
  " workbench cloud watch RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--interval-ms N] [--timeout-ms N] [--json]",
530
544
  " workbench cloud logs RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
531
545
  " workbench cloud star OWNER/BENCHMARK [--json]",
532
546
  " workbench cloud unstar OWNER/BENCHMARK [--json]",
533
- " workbench cloud benchmarks|runs|subjects <command> [options]",
547
+ " workbench cloud benchmarks|runs|candidates <command> [options]",
534
548
  "",
535
549
  "Examples:",
536
- " workbench cloud eval subjects/codex --benchmark openbench/invoice-review@v1 --dry-run --json",
550
+ " workbench cloud eval candidates/current --benchmark openbench/invoice-review@v1 --dry-run --json",
551
+ " workbench cloud retry run_123 --benchmark openbench/invoice-review --watch",
537
552
  " workbench cloud runs list --benchmark openbench/invoice-review --json",
538
553
  ],
539
554
  "cloud star": [
@@ -558,27 +573,37 @@ const commandHelp = Object.fromEntries(Object.entries({
558
573
  ],
559
574
  "cloud eval": withSourceDirectoryHelp(withLifecycleHelp([
560
575
  "Usage:",
561
- " workbench cloud eval [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base SUBJECT_ID] [--samples N] [--watch] [--dry-run] [--json]",
576
+ " workbench cloud eval [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
562
577
  "",
563
- "Submit subject files to Workbench Cloud and run hosted evaluation.",
578
+ "Ensure a hosted evaluation exists for the selected candidate run. Completed hosted work is reused only when candidate, run configuration, source, adapters, benchmark, and samples match; use --rerun to intentionally spend again.",
564
579
  "",
565
580
  "Examples:",
566
- " workbench cloud eval subjects/codex --benchmark openbench/invoice-review@v1 --dry-run --json",
581
+ " workbench cloud eval candidates/current --benchmark openbench/invoice-review@v1 --dry-run --json",
567
582
  " workbench cloud eval --benchmark openbench/invoice-review --watch",
568
583
  ], hostedWatchLifecycleHelp)),
569
584
  "cloud improve": withSourceDirectoryHelp(withLifecycleHelp([
570
585
  "Usage:",
571
- " workbench cloud improve [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base SUBJECT_ID] [--optimizer OPTIMIZER_YAML] [--budget N] [--samples N] [--watch] [--dry-run] [--json]",
586
+ " workbench cloud improve [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
587
+ "",
588
+ "Ensure a hosted candidate improvement exists for the selected base, run, budget, and samples. Completed work is reused only when base, run configuration, source, adapters, benchmark, budget, and samples match; use --rerun to intentionally spend again.",
589
+ "",
590
+ "Examples:",
591
+ " workbench cloud improve --benchmark openbench/invoice-review --dry-run",
592
+ " workbench cloud improve candidates/current --watch --json",
593
+ ], hostedWatchLifecycleHelp)),
594
+ "cloud retry": withSourceDirectoryHelp(withLifecycleHelp([
595
+ "Usage:",
596
+ " workbench cloud retry TARGET_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--watch] [--interval-ms N] [--timeout-ms N] [--json]",
572
597
  "",
573
- "Run hosted subject improvement.",
598
+ "Retry a failed hosted run or evaluation by replaying its recorded candidate, configuration, sample count, and improve budget.",
574
599
  "",
575
600
  "Examples:",
576
- " workbench cloud improve --optimizer optimizers/codex.yaml --benchmark openbench/invoice-review --dry-run",
577
- " workbench cloud improve subjects/codex --optimizer optimizers/codex.yaml --watch --json",
601
+ " workbench cloud retry run_123 --benchmark openbench/invoice-review --watch",
602
+ " workbench cloud retry eval_123 --json",
578
603
  ], hostedWatchLifecycleHelp)),
579
604
  "cloud open": [
580
605
  "Usage:",
581
- " workbench cloud open [OWNER/BENCHMARK[@REF]|RUN_ID|SUBJECT_ID] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--no-open] [--json]",
606
+ " workbench cloud open [OWNER/BENCHMARK[@REF]|RUN_ID|CANDIDATE_ID] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--no-open] [--json]",
582
607
  "",
583
608
  "Print and open the hosted Workbench URL.",
584
609
  "",
@@ -638,24 +663,24 @@ const commandHelp = Object.fromEntries(Object.entries({
638
663
  " workbench cloud runs list --benchmark openbench/invoice-review --json",
639
664
  " workbench cloud runs show run_123 --benchmark openbench/invoice-review",
640
665
  ],
641
- "cloud subjects": [
666
+ "cloud candidates": [
642
667
  "Usage:",
643
- " workbench cloud subjects <command> [options]",
668
+ " workbench cloud candidates <command> [options]",
644
669
  "",
645
- "Hosted subject resource commands.",
670
+ "Hosted candidate resource commands.",
646
671
  "",
647
672
  "Commands:",
648
- " workbench cloud subjects list [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
649
- " workbench cloud subjects show SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
650
- " workbench cloud subjects files SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
651
- " workbench cloud subjects preview SUBJECT_ID --path PATH [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--output PATH|-] [--json]",
652
- " workbench cloud subjects pull SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--out DIR] [--json]",
653
- " workbench cloud subjects publish SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
654
- " workbench cloud subjects unpublish SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
673
+ " workbench cloud candidates list [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
674
+ " workbench cloud candidates show CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
675
+ " workbench cloud candidates files CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
676
+ " workbench cloud candidates preview CANDIDATE_ID --path PATH [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--output PATH|-] [--json]",
677
+ " workbench cloud candidates pull CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--out DIR] [--json]",
678
+ " workbench cloud candidates publish CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
679
+ " workbench cloud candidates unpublish CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
655
680
  "",
656
681
  "Examples:",
657
- " workbench cloud subjects list --benchmark openbench/invoice-review --json",
658
- " workbench cloud subjects preview subj_123 --path SKILL.md --output -",
682
+ " workbench cloud candidates list --benchmark openbench/invoice-review --json",
683
+ " workbench cloud candidates preview candidate_123 --path SKILL.md --output -",
659
684
  ],
660
685
  }).map(([key, lines]) => [key, lines.join("\n")]));
661
686
  export function commandUsage(commandPath) {
@@ -742,75 +767,75 @@ const hostedCommandHelp = Object.fromEntries(Object.entries({
742
767
  " workbench cloud runs cancel run_123 --benchmark openbench/invoice-review",
743
768
  " workbench cloud runs cancel run_123 --json",
744
769
  ],
745
- "subjects list": [
770
+ "candidates list": [
746
771
  "Usage:",
747
- " workbench cloud subjects list [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
772
+ " workbench cloud candidates list [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
748
773
  "",
749
- "List hosted subjects.",
774
+ "List hosted candidates.",
750
775
  "",
751
776
  "Examples:",
752
- " workbench cloud subjects list --benchmark openbench/invoice-review",
753
- " workbench cloud subjects list --json",
777
+ " workbench cloud candidates list --benchmark openbench/invoice-review",
778
+ " workbench cloud candidates list --json",
754
779
  ],
755
- "subjects show": [
780
+ "candidates show": [
756
781
  "Usage:",
757
- " workbench cloud subjects show SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
782
+ " workbench cloud candidates show CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
758
783
  "",
759
- "Show one hosted subject.",
784
+ "Show one hosted candidate.",
760
785
  "",
761
786
  "Examples:",
762
- " workbench cloud subjects show subj_123 --benchmark openbench/invoice-review",
763
- " workbench cloud subjects show subj_123 --json",
787
+ " workbench cloud candidates show candidate_123 --benchmark openbench/invoice-review",
788
+ " workbench cloud candidates show candidate_123 --json",
764
789
  ],
765
- "subjects files": [
790
+ "candidates files": [
766
791
  "Usage:",
767
- " workbench cloud subjects files SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
792
+ " workbench cloud candidates files CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
768
793
  "",
769
- "List files in a hosted subject snapshot.",
794
+ "List files in a hosted candidate snapshot.",
770
795
  "",
771
796
  "Examples:",
772
- " workbench cloud subjects files subj_123 --benchmark openbench/invoice-review",
773
- " workbench cloud subjects files subj_123 --json",
797
+ " workbench cloud candidates files candidate_123 --benchmark openbench/invoice-review",
798
+ " workbench cloud candidates files candidate_123 --json",
774
799
  ],
775
- "subjects preview": [
800
+ "candidates preview": [
776
801
  "Usage:",
777
- " workbench cloud subjects preview SUBJECT_ID --path PATH [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--output PATH|-] [--json]",
802
+ " workbench cloud candidates preview CANDIDATE_ID --path PATH [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--output PATH|-] [--json]",
778
803
  "",
779
- "Preview a file from a hosted subject snapshot.",
804
+ "Preview a file from a hosted candidate snapshot.",
780
805
  "",
781
806
  "Examples:",
782
- " workbench cloud subjects preview subj_123 --path SKILL.md --output -",
783
- " workbench cloud subjects preview subj_123 --path SKILL.md --benchmark openbench/invoice-review",
807
+ " workbench cloud candidates preview candidate_123 --path SKILL.md --output -",
808
+ " workbench cloud candidates preview candidate_123 --path SKILL.md --benchmark openbench/invoice-review",
784
809
  ],
785
- "subjects pull": [
810
+ "candidates pull": [
786
811
  "Usage:",
787
- " workbench cloud subjects pull SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--out DIR] [--json]",
812
+ " workbench cloud candidates pull CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--out DIR] [--json]",
788
813
  "",
789
- "Download hosted subject files.",
814
+ "Download hosted candidate files.",
790
815
  "",
791
816
  "Examples:",
792
- " workbench cloud subjects pull subj_123 --out ./subject-files",
793
- " workbench cloud subjects pull subj_123 --benchmark openbench/invoice-review --json",
817
+ " workbench cloud candidates pull candidate_123 --out ./candidate-files",
818
+ " workbench cloud candidates pull candidate_123 --benchmark openbench/invoice-review --json",
794
819
  ],
795
- "subjects publish": [
820
+ "candidates publish": [
796
821
  "Usage:",
797
- " workbench cloud subjects publish SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
822
+ " workbench cloud candidates publish CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
798
823
  "",
799
- "Make a hosted subject public.",
824
+ "Make a hosted candidate public.",
800
825
  "",
801
826
  "Examples:",
802
- " workbench cloud subjects publish subj_123 --benchmark openbench/invoice-review",
803
- " workbench cloud subjects publish subj_123 --json",
827
+ " workbench cloud candidates publish candidate_123 --benchmark openbench/invoice-review",
828
+ " workbench cloud candidates publish candidate_123 --json",
804
829
  ],
805
- "subjects unpublish": [
830
+ "candidates unpublish": [
806
831
  "Usage:",
807
- " workbench cloud subjects unpublish SUBJECT_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
832
+ " workbench cloud candidates unpublish CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
808
833
  "",
809
- "Make a hosted subject private.",
834
+ "Make a hosted candidate private.",
810
835
  "",
811
836
  "Examples:",
812
- " workbench cloud subjects unpublish subj_123 --benchmark openbench/invoice-review",
813
- " workbench cloud subjects unpublish subj_123 --json",
837
+ " workbench cloud candidates unpublish candidate_123 --benchmark openbench/invoice-review",
838
+ " workbench cloud candidates unpublish candidate_123 --json",
814
839
  ],
815
840
  }).map(([key, lines]) => [key, lines.join("\n")]));
816
841
  function cloudNestedCommandUsage(commandPath) {