@workbench-ai/workbench 0.0.49 → 0.0.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapter-project.js +3 -3
- package/dist/benchmark-fingerprint.d.ts +1 -1
- package/dist/benchmark-fingerprint.d.ts.map +1 -1
- package/dist/benchmark-fingerprint.js +4 -6
- package/dist/command-model.d.ts.map +1 -1
- package/dist/command-model.js +144 -119
- package/dist/dev-open/client.css +28 -0
- package/dist/dev-open/client.js +146 -146
- package/dist/dev-open-server.d.ts +9 -22
- package/dist/dev-open-server.d.ts.map +1 -1
- package/dist/dev-open-server.js +42 -38
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1472 -505
- package/dist/init-scaffold.d.ts +4 -4
- package/dist/init-scaffold.d.ts.map +1 -1
- package/dist/init-scaffold.js +2 -2
- package/dist/init-template-pack.d.ts +4 -4
- package/dist/init-template-pack.d.ts.map +1 -1
- package/dist/init-template-pack.js +47 -59
- package/dist/local-archive.d.ts +11 -11
- package/dist/local-archive.d.ts.map +1 -1
- package/dist/local-archive.js +87 -74
- package/dist/project-source.d.ts +14 -17
- package/dist/project-source.d.ts.map +1 -1
- package/dist/project-source.js +80 -151
- package/package.json +4 -4
package/dist/adapter-project.js
CHANGED
|
@@ -55,7 +55,7 @@ export async function resolveWorkbenchAdaptersForProject(dir, spec) {
|
|
|
55
55
|
discovered = true;
|
|
56
56
|
continue;
|
|
57
57
|
}
|
|
58
|
-
throw new Error(`Adapter ${id} is referenced by benchmark/
|
|
58
|
+
throw new Error(`Adapter ${id} is referenced by benchmark/candidate YAML but is not installed. List its source under adapters in the YAML file that uses it.`);
|
|
59
59
|
}
|
|
60
60
|
}
|
|
61
61
|
assertWorkbenchAdapterOperationSupport(rootAdapterOperationRequirements(spec), [...adapters.values()].map((adapter) => adapter.manifest));
|
|
@@ -258,8 +258,8 @@ function rootAdapterOperationRequirements(spec) {
|
|
|
258
258
|
return [
|
|
259
259
|
{ invocation: engineResolveInvocationForSpec(spec), operation: "engine.resolve" },
|
|
260
260
|
{ invocation: spec.engineRun, operation: "engine.run" },
|
|
261
|
-
...(spec.improve ? [{ invocation: spec.improve, operation: "
|
|
262
|
-
{ invocation: spec.run, operation: "
|
|
261
|
+
...(spec.improve ? [{ invocation: spec.improve, operation: "candidate.improve" }] : []),
|
|
262
|
+
{ invocation: spec.run, operation: "candidate.run" },
|
|
263
263
|
];
|
|
264
264
|
}
|
|
265
265
|
function rootAdapterInvocations(spec) {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { SurfaceSnapshotFile } from "@workbench-ai/workbench-core";
|
|
2
2
|
import type { LocalProjectSource } from "./project-source.js";
|
|
3
3
|
export declare function localBenchmarkFingerprint(project: LocalProjectSource): string;
|
|
4
|
-
export declare function
|
|
4
|
+
export declare function localCandidateFingerprint(project: LocalProjectSource): string;
|
|
5
5
|
export declare function benchmarkFingerprintForFiles(files: readonly SurfaceSnapshotFile[]): string;
|
|
6
6
|
//# sourceMappingURL=benchmark-fingerprint.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"benchmark-fingerprint.d.ts","sourceRoot":"","sources":["../src/benchmark-fingerprint.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,8BAA8B,CAAC;AAExE,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AAE9D,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,CAO7E;AAED,wBAAgB,
|
|
1
|
+
{"version":3,"file":"benchmark-fingerprint.d.ts","sourceRoot":"","sources":["../src/benchmark-fingerprint.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,8BAA8B,CAAC;AAExE,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AAE9D,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,CAO7E;AAED,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,CAM7E;AAED,wBAAgB,4BAA4B,CAC1C,KAAK,EAAE,SAAS,mBAAmB,EAAE,GACpC,MAAM,CAgBR"}
|
|
@@ -7,13 +7,11 @@ export function localBenchmarkFingerprint(project) {
|
|
|
7
7
|
...benchmarkAdapterFiles(project),
|
|
8
8
|
]);
|
|
9
9
|
}
|
|
10
|
-
export function
|
|
10
|
+
export function localCandidateFingerprint(project) {
|
|
11
11
|
const hash = createHash("sha256");
|
|
12
|
-
hash.update("workbench-
|
|
13
|
-
hash.update(project.
|
|
14
|
-
hash.
|
|
15
|
-
hash.update(JSON.stringify(project.spec.run));
|
|
16
|
-
hashSurfaceFiles(hash, project.subjectFiles);
|
|
12
|
+
hash.update("workbench-candidate-v1\0");
|
|
13
|
+
hash.update(project.candidateSource);
|
|
14
|
+
hashSurfaceFiles(hash, project.candidateFiles);
|
|
17
15
|
return hash.digest("hex");
|
|
18
16
|
}
|
|
19
17
|
export function benchmarkFingerprintForFiles(files) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"command-model.d.ts","sourceRoot":"","sources":["../src/command-model.ts"],"names":[],"mappings":"AAOA,eAAO,MAAM,6BAA6B,sHAC2E,CAAC;AAOtH,eAAO,MAAM,2BAA2B,QAA0C,CAAC;
|
|
1
|
+
{"version":3,"file":"command-model.d.ts","sourceRoot":"","sources":["../src/command-model.ts"],"names":[],"mappings":"AAOA,eAAO,MAAM,6BAA6B,sHAC2E,CAAC;AAOtH,eAAO,MAAM,2BAA2B,QAA0C,CAAC;AA0FnF,eAAO,MAAM,SAAS,QAAuB,CAAC;AAolB9C,wBAAgB,YAAY,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAE/D"}
|
package/dist/command-model.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
const sourceDirectoryHelp = [
|
|
2
2
|
"Directory:",
|
|
3
|
-
" Run from a Workbench project containing benchmark.yaml plus
|
|
4
|
-
"
|
|
5
|
-
" Pass --dir DIR or pass benchmark.yaml,
|
|
3
|
+
" Run from a Workbench project containing benchmark.yaml plus candidates/<name>/candidate.yaml.",
|
|
4
|
+
" Candidate manifests declare their files with files.path, usually files beside candidate.yaml.",
|
|
5
|
+
" Pass --dir DIR or pass benchmark.yaml, candidates/<name>, or candidates/<name>/candidate.yaml as SOURCE.",
|
|
6
6
|
];
|
|
7
7
|
export const LOCAL_DEV_OPEN_LIFECYCLE_NOTE = "Keep this command running while using the local web view; Ctrl-C stops the server and the page will stop working.";
|
|
8
8
|
const hostedWatchLifecycleNoteLines = [
|
|
@@ -35,18 +35,19 @@ const rootLines = [
|
|
|
35
35
|
" workbench adapters test ID|SOURCE [--dir DIR] [--request PATH] [--output DIR] [--json]",
|
|
36
36
|
"",
|
|
37
37
|
"Local runs:",
|
|
38
|
-
" workbench eval [SOURCE] [--dir DIR] [--
|
|
39
|
-
" workbench improve [SOURCE] [--dir DIR] [--from
|
|
38
|
+
" workbench eval [SOURCE] [--dir DIR] [--candidate CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--json]",
|
|
39
|
+
" workbench improve [SOURCE] [--dir DIR] [--from CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--json]",
|
|
40
|
+
" workbench retry TARGET_ID [--dir DIR] [--json]",
|
|
40
41
|
" workbench open [SOURCE] [--dir DIR] [--run RUN_ID] [--host HOST] [--port N] [--no-open] [--json]",
|
|
41
|
-
" workbench restore [--dir DIR] [--
|
|
42
|
+
" workbench restore [--dir DIR] [--candidate CANDIDATE_ID] [--dry-run] [--yes] [--json]",
|
|
42
43
|
"",
|
|
43
44
|
"Local inspection:",
|
|
44
45
|
" workbench runs list [--dir DIR] [--json]",
|
|
45
46
|
" workbench runs show RUN_ID [--dir DIR] [--json]",
|
|
46
|
-
" workbench
|
|
47
|
-
" workbench
|
|
48
|
-
" workbench
|
|
49
|
-
" workbench
|
|
47
|
+
" workbench candidates list [--dir DIR] [--json]",
|
|
48
|
+
" workbench candidates show CANDIDATE_ID [--dir DIR] [--json]",
|
|
49
|
+
" workbench candidates files [--dir DIR] [--candidate CANDIDATE_ID] [--json]",
|
|
50
|
+
" workbench candidates preview --path PATH [--dir DIR] [--candidate CANDIDATE_ID] [--output PATH|-] [--json]",
|
|
50
51
|
" workbench traces collect [--providers codex,claude] [--since 30d] [--workspace DIR] [--limit N] [--json]",
|
|
51
52
|
" workbench traces list [--providers codex,claude] [--since 30d] [--workspace DIR] [--limit N] [--json]",
|
|
52
53
|
" workbench traces show TRACE_ID [--providers codex,claude] [--since 30d] [--workspace DIR] [--json]",
|
|
@@ -65,14 +66,15 @@ const rootLines = [
|
|
|
65
66
|
" workbench push [SOURCE] [--dir DIR] [--tag TAG] [--visibility public|private] [--dry-run] [--json]",
|
|
66
67
|
"",
|
|
67
68
|
"Hosted runs and resources:",
|
|
68
|
-
" workbench cloud eval [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base
|
|
69
|
-
" workbench cloud improve [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base
|
|
70
|
-
" workbench cloud
|
|
69
|
+
" workbench cloud eval [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
|
|
70
|
+
" workbench cloud improve [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
|
|
71
|
+
" workbench cloud retry TARGET_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--watch] [--interval-ms N] [--timeout-ms N] [--json]",
|
|
72
|
+
" workbench cloud open [OWNER/BENCHMARK[@REF]|RUN_ID|CANDIDATE_ID] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--no-open] [--json]",
|
|
71
73
|
" workbench cloud watch RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--interval-ms N] [--timeout-ms N] [--json]",
|
|
72
74
|
" workbench cloud logs RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
|
|
73
75
|
" workbench cloud star OWNER/BENCHMARK [--json]",
|
|
74
76
|
" workbench cloud unstar OWNER/BENCHMARK [--json]",
|
|
75
|
-
" workbench cloud benchmarks|runs|
|
|
77
|
+
" workbench cloud benchmarks|runs|candidates <command> [options]",
|
|
76
78
|
"",
|
|
77
79
|
"Auth:",
|
|
78
80
|
" workbench auth connect ADAPTER[/SLOT] [--dir DIR] [--method METHOD] [--profile PROFILE] [--profile-root DIR] [--local-only] [--json]",
|
|
@@ -80,11 +82,12 @@ const rootLines = [
|
|
|
80
82
|
"",
|
|
81
83
|
"Examples:",
|
|
82
84
|
" workbench init --skill invoice-review --agent codex",
|
|
83
|
-
" workbench eval
|
|
85
|
+
" workbench eval candidates/current --samples 1",
|
|
84
86
|
" workbench improve --budget 2 --samples 1",
|
|
87
|
+
" workbench retry eval_local_123 --json",
|
|
85
88
|
" workbench open --no-open --json",
|
|
86
89
|
" workbench push --tag v1",
|
|
87
|
-
" workbench cloud eval
|
|
90
|
+
" workbench cloud eval candidates/current --benchmark openbench/invoice-review@v1 --watch",
|
|
88
91
|
"",
|
|
89
92
|
"Environment:",
|
|
90
93
|
" WORKBENCH_API_URL sets the hosted Workbench API base URL.",
|
|
@@ -118,7 +121,7 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
118
121
|
" workbench init [DIR] --skill NAME --agent ADAPTER [--from PATH] [--example] [--json]",
|
|
119
122
|
" workbench init [DIR] --command NAME [--from PATH] [--example] [--json]",
|
|
120
123
|
"",
|
|
121
|
-
"Scaffold a local Workbench project. benchmark.yaml selects an engine; the built-in workbench engine owns tasks, environment, and scoring under engine.with.
|
|
124
|
+
"Scaffold a local Workbench project. benchmark.yaml selects an engine; the built-in workbench engine owns tasks, environment, and scoring under engine.with. candidates/<name>/candidate.yaml owns files.path plus optional prepare and run behavior. Candidate manifests own improvement behavior.",
|
|
122
125
|
"",
|
|
123
126
|
"Examples:",
|
|
124
127
|
" workbench init --skill invoice-review --agent codex",
|
|
@@ -128,31 +131,41 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
128
131
|
"Usage:",
|
|
129
132
|
" workbench check [SOURCE] [--dir DIR] [--json]",
|
|
130
133
|
"",
|
|
131
|
-
"Validate benchmark.yaml, one
|
|
134
|
+
"Validate benchmark.yaml, one candidate manifest.",
|
|
132
135
|
"",
|
|
133
136
|
"Examples:",
|
|
134
137
|
" workbench check",
|
|
135
|
-
" workbench check
|
|
138
|
+
" workbench check candidates/current --json",
|
|
136
139
|
]),
|
|
137
140
|
eval: withSourceDirectoryHelp([
|
|
138
141
|
"Usage:",
|
|
139
|
-
" workbench eval [SOURCE] [--dir DIR] [--
|
|
142
|
+
" workbench eval [SOURCE] [--dir DIR] [--candidate CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--json]",
|
|
140
143
|
"",
|
|
141
|
-
"
|
|
144
|
+
"Ensure the selected local candidate run has an evaluation for the current benchmark. Completed work is reused only when candidate, run configuration, source, adapters, benchmark, and samples match; use --rerun to intentionally spend again.",
|
|
142
145
|
"",
|
|
143
146
|
"Examples:",
|
|
144
147
|
" workbench eval --samples 1",
|
|
145
|
-
" workbench eval
|
|
148
|
+
" workbench eval candidates/current --samples 2 --json",
|
|
146
149
|
]),
|
|
147
150
|
improve: withSourceDirectoryHelp([
|
|
148
151
|
"Usage:",
|
|
149
|
-
" workbench improve [SOURCE] [--dir DIR] [--from
|
|
152
|
+
" workbench improve [SOURCE] [--dir DIR] [--from CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--json]",
|
|
150
153
|
"",
|
|
151
|
-
"
|
|
154
|
+
"Ensure a local candidate improvement exists for the selected base, run, budget, and samples. Completed work is reused only when base, run configuration, source, adapters, benchmark, budget, and samples match; use --rerun to intentionally spend again.",
|
|
152
155
|
"",
|
|
153
156
|
"Examples:",
|
|
154
157
|
" workbench improve --budget 1 --samples 1",
|
|
155
|
-
" workbench improve
|
|
158
|
+
" workbench improve candidates/current --from candidate_123 --json",
|
|
159
|
+
]),
|
|
160
|
+
retry: withSourceDirectoryHelp([
|
|
161
|
+
"Usage:",
|
|
162
|
+
" workbench retry TARGET_ID [--dir DIR] [--json]",
|
|
163
|
+
"",
|
|
164
|
+
"Retry a failed local run or evaluation by replaying its recorded candidate, configuration, sample count, and improve budget.",
|
|
165
|
+
"",
|
|
166
|
+
"Examples:",
|
|
167
|
+
" workbench retry eval_local_123 --json",
|
|
168
|
+
" workbench retry run_local_123 --dir ./my-benchmark",
|
|
156
169
|
]),
|
|
157
170
|
open: withSourceDirectoryHelp(withLifecycleHelp([
|
|
158
171
|
"Usage:",
|
|
@@ -166,13 +179,13 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
166
179
|
], localOpenLifecycleHelp)),
|
|
167
180
|
restore: withSourceDirectoryHelp([
|
|
168
181
|
"Usage:",
|
|
169
|
-
" workbench restore [--dir DIR] [--
|
|
182
|
+
" workbench restore [--dir DIR] [--candidate CANDIDATE_ID] [--dry-run] [--yes] [--json]",
|
|
170
183
|
"",
|
|
171
|
-
"Restore a local
|
|
184
|
+
"Restore a local candidate snapshot into the candidate files directory.",
|
|
172
185
|
"",
|
|
173
186
|
"Examples:",
|
|
174
|
-
" workbench restore --
|
|
175
|
-
" workbench restore --
|
|
187
|
+
" workbench restore --candidate candidate_123 --dry-run",
|
|
188
|
+
" workbench restore --candidate candidate_123 --yes",
|
|
176
189
|
]),
|
|
177
190
|
runs: [
|
|
178
191
|
"Usage:",
|
|
@@ -208,61 +221,61 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
208
221
|
" workbench runs show eval_local_123",
|
|
209
222
|
" workbench runs show eval_local_123 --json",
|
|
210
223
|
]),
|
|
211
|
-
|
|
224
|
+
candidates: [
|
|
212
225
|
"Usage:",
|
|
213
|
-
" workbench
|
|
226
|
+
" workbench candidates <command> [options]",
|
|
214
227
|
"",
|
|
215
|
-
"Inspect local
|
|
228
|
+
"Inspect local candidates.",
|
|
216
229
|
"",
|
|
217
230
|
"Commands:",
|
|
218
|
-
" workbench
|
|
219
|
-
" workbench
|
|
220
|
-
" workbench
|
|
221
|
-
" workbench
|
|
231
|
+
" workbench candidates list [--dir DIR] [--json]",
|
|
232
|
+
" workbench candidates show CANDIDATE_ID [--dir DIR] [--json]",
|
|
233
|
+
" workbench candidates files [--dir DIR] [--candidate CANDIDATE_ID] [--json]",
|
|
234
|
+
" workbench candidates preview --path PATH [--dir DIR] [--candidate CANDIDATE_ID] [--output PATH|-] [--json]",
|
|
222
235
|
"",
|
|
223
236
|
"Examples:",
|
|
224
|
-
" workbench
|
|
225
|
-
" workbench
|
|
237
|
+
" workbench candidates list --json",
|
|
238
|
+
" workbench candidates preview --candidate candidate_123 --path SKILL.md --output -",
|
|
226
239
|
],
|
|
227
|
-
"
|
|
240
|
+
"candidates list": withSourceDirectoryHelp([
|
|
228
241
|
"Usage:",
|
|
229
|
-
" workbench
|
|
242
|
+
" workbench candidates list [--dir DIR] [--json]",
|
|
230
243
|
"",
|
|
231
|
-
"List local
|
|
244
|
+
"List local candidates.",
|
|
232
245
|
"",
|
|
233
246
|
"Examples:",
|
|
234
|
-
" workbench
|
|
235
|
-
" workbench
|
|
247
|
+
" workbench candidates list",
|
|
248
|
+
" workbench candidates list --json",
|
|
236
249
|
]),
|
|
237
|
-
"
|
|
250
|
+
"candidates show": withSourceDirectoryHelp([
|
|
238
251
|
"Usage:",
|
|
239
|
-
" workbench
|
|
252
|
+
" workbench candidates show CANDIDATE_ID [--dir DIR] [--json]",
|
|
240
253
|
"",
|
|
241
|
-
"Show one local
|
|
254
|
+
"Show one local candidate.",
|
|
242
255
|
"",
|
|
243
256
|
"Examples:",
|
|
244
|
-
" workbench
|
|
245
|
-
" workbench
|
|
257
|
+
" workbench candidates show candidate_123",
|
|
258
|
+
" workbench candidates show candidate_123 --json",
|
|
246
259
|
]),
|
|
247
|
-
"
|
|
260
|
+
"candidates files": withSourceDirectoryHelp([
|
|
248
261
|
"Usage:",
|
|
249
|
-
" workbench
|
|
262
|
+
" workbench candidates files [--dir DIR] [--candidate CANDIDATE_ID] [--json]",
|
|
250
263
|
"",
|
|
251
|
-
"List files in a local
|
|
264
|
+
"List files in a local candidate snapshot.",
|
|
252
265
|
"",
|
|
253
266
|
"Examples:",
|
|
254
|
-
" workbench
|
|
255
|
-
" workbench
|
|
267
|
+
" workbench candidates files --candidate candidate_123",
|
|
268
|
+
" workbench candidates files --candidate candidate_123 --json",
|
|
256
269
|
]),
|
|
257
|
-
"
|
|
270
|
+
"candidates preview": withSourceDirectoryHelp([
|
|
258
271
|
"Usage:",
|
|
259
|
-
" workbench
|
|
272
|
+
" workbench candidates preview --path PATH [--dir DIR] [--candidate CANDIDATE_ID] [--output PATH|-] [--json]",
|
|
260
273
|
"",
|
|
261
|
-
"Preview a file from a local
|
|
274
|
+
"Preview a file from a local candidate snapshot.",
|
|
262
275
|
"",
|
|
263
276
|
"Examples:",
|
|
264
|
-
" workbench
|
|
265
|
-
" workbench
|
|
277
|
+
" workbench candidates preview --candidate candidate_123 --path SKILL.md",
|
|
278
|
+
" workbench candidates preview --candidate candidate_123 --path SKILL.md --output -",
|
|
266
279
|
]),
|
|
267
280
|
clone: [
|
|
268
281
|
"Usage:",
|
|
@@ -355,7 +368,7 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
355
368
|
"",
|
|
356
369
|
"Examples:",
|
|
357
370
|
" workbench push --tag v1 --dry-run",
|
|
358
|
-
" workbench push
|
|
371
|
+
" workbench push candidates/current --visibility private --json",
|
|
359
372
|
]),
|
|
360
373
|
login: [
|
|
361
374
|
"Usage:",
|
|
@@ -523,17 +536,19 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
523
536
|
"Hosted Workbench Cloud execution and resource commands.",
|
|
524
537
|
"",
|
|
525
538
|
"Commands:",
|
|
526
|
-
" workbench cloud eval [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base
|
|
527
|
-
" workbench cloud improve [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base
|
|
528
|
-
" workbench cloud
|
|
539
|
+
" workbench cloud eval [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
|
|
540
|
+
" workbench cloud improve [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
|
|
541
|
+
" workbench cloud retry TARGET_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--watch] [--interval-ms N] [--timeout-ms N] [--json]",
|
|
542
|
+
" workbench cloud open [OWNER/BENCHMARK[@REF]|RUN_ID|CANDIDATE_ID] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--no-open] [--json]",
|
|
529
543
|
" workbench cloud watch RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--interval-ms N] [--timeout-ms N] [--json]",
|
|
530
544
|
" workbench cloud logs RUN_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
|
|
531
545
|
" workbench cloud star OWNER/BENCHMARK [--json]",
|
|
532
546
|
" workbench cloud unstar OWNER/BENCHMARK [--json]",
|
|
533
|
-
" workbench cloud benchmarks|runs|
|
|
547
|
+
" workbench cloud benchmarks|runs|candidates <command> [options]",
|
|
534
548
|
"",
|
|
535
549
|
"Examples:",
|
|
536
|
-
" workbench cloud eval
|
|
550
|
+
" workbench cloud eval candidates/current --benchmark openbench/invoice-review@v1 --dry-run --json",
|
|
551
|
+
" workbench cloud retry run_123 --benchmark openbench/invoice-review --watch",
|
|
537
552
|
" workbench cloud runs list --benchmark openbench/invoice-review --json",
|
|
538
553
|
],
|
|
539
554
|
"cloud star": [
|
|
@@ -558,27 +573,37 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
558
573
|
],
|
|
559
574
|
"cloud eval": withSourceDirectoryHelp(withLifecycleHelp([
|
|
560
575
|
"Usage:",
|
|
561
|
-
" workbench cloud eval [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base
|
|
576
|
+
" workbench cloud eval [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
|
|
562
577
|
"",
|
|
563
|
-
"
|
|
578
|
+
"Ensure a hosted evaluation exists for the selected candidate run. Completed hosted work is reused only when candidate, run configuration, source, adapters, benchmark, and samples match; use --rerun to intentionally spend again.",
|
|
564
579
|
"",
|
|
565
580
|
"Examples:",
|
|
566
|
-
" workbench cloud eval
|
|
581
|
+
" workbench cloud eval candidates/current --benchmark openbench/invoice-review@v1 --dry-run --json",
|
|
567
582
|
" workbench cloud eval --benchmark openbench/invoice-review --watch",
|
|
568
583
|
], hostedWatchLifecycleHelp)),
|
|
569
584
|
"cloud improve": withSourceDirectoryHelp(withLifecycleHelp([
|
|
570
585
|
"Usage:",
|
|
571
|
-
" workbench cloud improve [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base
|
|
586
|
+
" workbench cloud improve [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--base CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
|
|
587
|
+
"",
|
|
588
|
+
"Ensure a hosted candidate improvement exists for the selected base, run, budget, and samples. Completed work is reused only when base, run configuration, source, adapters, benchmark, budget, and samples match; use --rerun to intentionally spend again.",
|
|
589
|
+
"",
|
|
590
|
+
"Examples:",
|
|
591
|
+
" workbench cloud improve --benchmark openbench/invoice-review --dry-run",
|
|
592
|
+
" workbench cloud improve candidates/current --watch --json",
|
|
593
|
+
], hostedWatchLifecycleHelp)),
|
|
594
|
+
"cloud retry": withSourceDirectoryHelp(withLifecycleHelp([
|
|
595
|
+
"Usage:",
|
|
596
|
+
" workbench cloud retry TARGET_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--watch] [--interval-ms N] [--timeout-ms N] [--json]",
|
|
572
597
|
"",
|
|
573
|
-
"
|
|
598
|
+
"Retry a failed hosted run or evaluation by replaying its recorded candidate, configuration, sample count, and improve budget.",
|
|
574
599
|
"",
|
|
575
600
|
"Examples:",
|
|
576
|
-
" workbench cloud
|
|
577
|
-
" workbench cloud
|
|
601
|
+
" workbench cloud retry run_123 --benchmark openbench/invoice-review --watch",
|
|
602
|
+
" workbench cloud retry eval_123 --json",
|
|
578
603
|
], hostedWatchLifecycleHelp)),
|
|
579
604
|
"cloud open": [
|
|
580
605
|
"Usage:",
|
|
581
|
-
" workbench cloud open [OWNER/BENCHMARK[@REF]|RUN_ID|
|
|
606
|
+
" workbench cloud open [OWNER/BENCHMARK[@REF]|RUN_ID|CANDIDATE_ID] [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--no-open] [--json]",
|
|
582
607
|
"",
|
|
583
608
|
"Print and open the hosted Workbench URL.",
|
|
584
609
|
"",
|
|
@@ -638,24 +663,24 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
638
663
|
" workbench cloud runs list --benchmark openbench/invoice-review --json",
|
|
639
664
|
" workbench cloud runs show run_123 --benchmark openbench/invoice-review",
|
|
640
665
|
],
|
|
641
|
-
"cloud
|
|
666
|
+
"cloud candidates": [
|
|
642
667
|
"Usage:",
|
|
643
|
-
" workbench cloud
|
|
668
|
+
" workbench cloud candidates <command> [options]",
|
|
644
669
|
"",
|
|
645
|
-
"Hosted
|
|
670
|
+
"Hosted candidate resource commands.",
|
|
646
671
|
"",
|
|
647
672
|
"Commands:",
|
|
648
|
-
" workbench cloud
|
|
649
|
-
" workbench cloud
|
|
650
|
-
" workbench cloud
|
|
651
|
-
" workbench cloud
|
|
652
|
-
" workbench cloud
|
|
653
|
-
" workbench cloud
|
|
654
|
-
" workbench cloud
|
|
673
|
+
" workbench cloud candidates list [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
|
|
674
|
+
" workbench cloud candidates show CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
|
|
675
|
+
" workbench cloud candidates files CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
|
|
676
|
+
" workbench cloud candidates preview CANDIDATE_ID --path PATH [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--output PATH|-] [--json]",
|
|
677
|
+
" workbench cloud candidates pull CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--out DIR] [--json]",
|
|
678
|
+
" workbench cloud candidates publish CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
|
|
679
|
+
" workbench cloud candidates unpublish CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
|
|
655
680
|
"",
|
|
656
681
|
"Examples:",
|
|
657
|
-
" workbench cloud
|
|
658
|
-
" workbench cloud
|
|
682
|
+
" workbench cloud candidates list --benchmark openbench/invoice-review --json",
|
|
683
|
+
" workbench cloud candidates preview candidate_123 --path SKILL.md --output -",
|
|
659
684
|
],
|
|
660
685
|
}).map(([key, lines]) => [key, lines.join("\n")]));
|
|
661
686
|
export function commandUsage(commandPath) {
|
|
@@ -742,75 +767,75 @@ const hostedCommandHelp = Object.fromEntries(Object.entries({
|
|
|
742
767
|
" workbench cloud runs cancel run_123 --benchmark openbench/invoice-review",
|
|
743
768
|
" workbench cloud runs cancel run_123 --json",
|
|
744
769
|
],
|
|
745
|
-
"
|
|
770
|
+
"candidates list": [
|
|
746
771
|
"Usage:",
|
|
747
|
-
" workbench cloud
|
|
772
|
+
" workbench cloud candidates list [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
|
|
748
773
|
"",
|
|
749
|
-
"List hosted
|
|
774
|
+
"List hosted candidates.",
|
|
750
775
|
"",
|
|
751
776
|
"Examples:",
|
|
752
|
-
" workbench cloud
|
|
753
|
-
" workbench cloud
|
|
777
|
+
" workbench cloud candidates list --benchmark openbench/invoice-review",
|
|
778
|
+
" workbench cloud candidates list --json",
|
|
754
779
|
],
|
|
755
|
-
"
|
|
780
|
+
"candidates show": [
|
|
756
781
|
"Usage:",
|
|
757
|
-
" workbench cloud
|
|
782
|
+
" workbench cloud candidates show CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
|
|
758
783
|
"",
|
|
759
|
-
"Show one hosted
|
|
784
|
+
"Show one hosted candidate.",
|
|
760
785
|
"",
|
|
761
786
|
"Examples:",
|
|
762
|
-
" workbench cloud
|
|
763
|
-
" workbench cloud
|
|
787
|
+
" workbench cloud candidates show candidate_123 --benchmark openbench/invoice-review",
|
|
788
|
+
" workbench cloud candidates show candidate_123 --json",
|
|
764
789
|
],
|
|
765
|
-
"
|
|
790
|
+
"candidates files": [
|
|
766
791
|
"Usage:",
|
|
767
|
-
" workbench cloud
|
|
792
|
+
" workbench cloud candidates files CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
|
|
768
793
|
"",
|
|
769
|
-
"List files in a hosted
|
|
794
|
+
"List files in a hosted candidate snapshot.",
|
|
770
795
|
"",
|
|
771
796
|
"Examples:",
|
|
772
|
-
" workbench cloud
|
|
773
|
-
" workbench cloud
|
|
797
|
+
" workbench cloud candidates files candidate_123 --benchmark openbench/invoice-review",
|
|
798
|
+
" workbench cloud candidates files candidate_123 --json",
|
|
774
799
|
],
|
|
775
|
-
"
|
|
800
|
+
"candidates preview": [
|
|
776
801
|
"Usage:",
|
|
777
|
-
" workbench cloud
|
|
802
|
+
" workbench cloud candidates preview CANDIDATE_ID --path PATH [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--output PATH|-] [--json]",
|
|
778
803
|
"",
|
|
779
|
-
"Preview a file from a hosted
|
|
804
|
+
"Preview a file from a hosted candidate snapshot.",
|
|
780
805
|
"",
|
|
781
806
|
"Examples:",
|
|
782
|
-
" workbench cloud
|
|
783
|
-
" workbench cloud
|
|
807
|
+
" workbench cloud candidates preview candidate_123 --path SKILL.md --output -",
|
|
808
|
+
" workbench cloud candidates preview candidate_123 --path SKILL.md --benchmark openbench/invoice-review",
|
|
784
809
|
],
|
|
785
|
-
"
|
|
810
|
+
"candidates pull": [
|
|
786
811
|
"Usage:",
|
|
787
|
-
" workbench cloud
|
|
812
|
+
" workbench cloud candidates pull CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--out DIR] [--json]",
|
|
788
813
|
"",
|
|
789
|
-
"Download hosted
|
|
814
|
+
"Download hosted candidate files.",
|
|
790
815
|
"",
|
|
791
816
|
"Examples:",
|
|
792
|
-
" workbench cloud
|
|
793
|
-
" workbench cloud
|
|
817
|
+
" workbench cloud candidates pull candidate_123 --out ./candidate-files",
|
|
818
|
+
" workbench cloud candidates pull candidate_123 --benchmark openbench/invoice-review --json",
|
|
794
819
|
],
|
|
795
|
-
"
|
|
820
|
+
"candidates publish": [
|
|
796
821
|
"Usage:",
|
|
797
|
-
" workbench cloud
|
|
822
|
+
" workbench cloud candidates publish CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
|
|
798
823
|
"",
|
|
799
|
-
"Make a hosted
|
|
824
|
+
"Make a hosted candidate public.",
|
|
800
825
|
"",
|
|
801
826
|
"Examples:",
|
|
802
|
-
" workbench cloud
|
|
803
|
-
" workbench cloud
|
|
827
|
+
" workbench cloud candidates publish candidate_123 --benchmark openbench/invoice-review",
|
|
828
|
+
" workbench cloud candidates publish candidate_123 --json",
|
|
804
829
|
],
|
|
805
|
-
"
|
|
830
|
+
"candidates unpublish": [
|
|
806
831
|
"Usage:",
|
|
807
|
-
" workbench cloud
|
|
832
|
+
" workbench cloud candidates unpublish CANDIDATE_ID [--dir DIR] [--benchmark OWNER/BENCHMARK[@REF]] [--json]",
|
|
808
833
|
"",
|
|
809
|
-
"Make a hosted
|
|
834
|
+
"Make a hosted candidate private.",
|
|
810
835
|
"",
|
|
811
836
|
"Examples:",
|
|
812
|
-
" workbench cloud
|
|
813
|
-
" workbench cloud
|
|
837
|
+
" workbench cloud candidates unpublish candidate_123 --benchmark openbench/invoice-review",
|
|
838
|
+
" workbench cloud candidates unpublish candidate_123 --json",
|
|
814
839
|
],
|
|
815
840
|
}).map(([key, lines]) => [key, lines.join("\n")]));
|
|
816
841
|
function cloudNestedCommandUsage(commandPath) {
|
package/dist/dev-open/client.css
CHANGED
|
@@ -867,6 +867,12 @@
|
|
|
867
867
|
.max-w-\[10rem\] {
|
|
868
868
|
max-width: 10rem;
|
|
869
869
|
}
|
|
870
|
+
.max-w-\[11rem\] {
|
|
871
|
+
max-width: 11rem;
|
|
872
|
+
}
|
|
873
|
+
.max-w-\[14rem\] {
|
|
874
|
+
max-width: 14rem;
|
|
875
|
+
}
|
|
870
876
|
.max-w-\[32rem\] {
|
|
871
877
|
max-width: 32rem;
|
|
872
878
|
}
|
|
@@ -1180,6 +1186,9 @@
|
|
|
1180
1186
|
.gap-x-3 {
|
|
1181
1187
|
column-gap: calc(var(--spacing) * 3);
|
|
1182
1188
|
}
|
|
1189
|
+
.gap-x-4 {
|
|
1190
|
+
column-gap: calc(var(--spacing) * 4);
|
|
1191
|
+
}
|
|
1183
1192
|
.-space-x-2 {
|
|
1184
1193
|
:where(& > :not(:last-child)) {
|
|
1185
1194
|
--tw-space-x-reverse: 0;
|
|
@@ -1200,6 +1209,9 @@
|
|
|
1200
1209
|
.gap-y-1 {
|
|
1201
1210
|
row-gap: calc(var(--spacing) * 1);
|
|
1202
1211
|
}
|
|
1212
|
+
.gap-y-1\.5 {
|
|
1213
|
+
row-gap: calc(var(--spacing) * 1.5);
|
|
1214
|
+
}
|
|
1203
1215
|
.divide-y {
|
|
1204
1216
|
:where(& > :not(:last-child)) {
|
|
1205
1217
|
--tw-divide-y-reverse: 0;
|
|
@@ -1770,6 +1782,9 @@
|
|
|
1770
1782
|
.pl-4 {
|
|
1771
1783
|
padding-left: calc(var(--spacing) * 4);
|
|
1772
1784
|
}
|
|
1785
|
+
.pl-8 {
|
|
1786
|
+
padding-left: calc(var(--spacing) * 8);
|
|
1787
|
+
}
|
|
1773
1788
|
.text-center {
|
|
1774
1789
|
text-align: center;
|
|
1775
1790
|
}
|
|
@@ -2099,6 +2114,9 @@
|
|
|
2099
2114
|
.ring-background {
|
|
2100
2115
|
--tw-ring-color: var(--background);
|
|
2101
2116
|
}
|
|
2117
|
+
.ring-border {
|
|
2118
|
+
--tw-ring-color: var(--border);
|
|
2119
|
+
}
|
|
2102
2120
|
.ring-foreground\/10 {
|
|
2103
2121
|
--tw-ring-color: var(--foreground);
|
|
2104
2122
|
@supports (color: color-mix(in lab, red, red)) {
|
|
@@ -4920,6 +4938,11 @@
|
|
|
4920
4938
|
}
|
|
4921
4939
|
}
|
|
4922
4940
|
}
|
|
4941
|
+
.lg\:inline {
|
|
4942
|
+
@media (width >= 64rem) {
|
|
4943
|
+
display: inline;
|
|
4944
|
+
}
|
|
4945
|
+
}
|
|
4923
4946
|
.lg\:h-\[clamp\(36rem\,68vh\,52rem\)\] {
|
|
4924
4947
|
@media (width >= 64rem) {
|
|
4925
4948
|
height: clamp(36rem, 68vh, 52rem);
|
|
@@ -4960,6 +4983,11 @@
|
|
|
4960
4983
|
grid-template-columns: repeat(4, minmax(0, 1fr));
|
|
4961
4984
|
}
|
|
4962
4985
|
}
|
|
4986
|
+
.xl\:grid-cols-7 {
|
|
4987
|
+
@media (width >= 80rem) {
|
|
4988
|
+
grid-template-columns: repeat(7, minmax(0, 1fr));
|
|
4989
|
+
}
|
|
4990
|
+
}
|
|
4963
4991
|
.xl\:grid-cols-\[repeat\(auto-fit\,minmax\(18rem\,1fr\)\)\] {
|
|
4964
4992
|
@media (width >= 80rem) {
|
|
4965
4993
|
grid-template-columns: repeat(auto-fit,minmax(18rem,1fr));
|