@workbench-ai/workbench 0.0.64 → 0.0.66
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark-fingerprint.js +2 -2
- package/dist/command-model.d.ts +1 -1
- package/dist/command-model.d.ts.map +1 -1
- package/dist/command-model.js +106 -35
- package/dist/dev-open/client.js +109 -109
- package/dist/dev-open-server.d.ts +2 -37
- package/dist/dev-open-server.d.ts.map +1 -1
- package/dist/dev-open-server.js +39 -322
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +364 -263
- package/dist/local-archive.d.ts +4 -4
- package/dist/local-archive.d.ts.map +1 -1
- package/dist/local-archive.js +4 -90
- package/dist/local-inspection.d.ts +9 -0
- package/dist/local-inspection.d.ts.map +1 -0
- package/dist/local-inspection.js +317 -0
- package/dist/project-source.d.ts +6 -6
- package/dist/project-source.js +6 -6
- package/package.json +9 -4
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { engineResolveBindingForSpec, workbenchBenchmarkContentFingerprint, workbenchCandidateContentFingerprint, } from "@workbench-ai/workbench-core";
|
|
2
|
-
import {
|
|
2
|
+
import { remoteEngineResolveFiles, } from "./project-source.js";
|
|
3
3
|
export function localBenchmarkFingerprint(project) {
|
|
4
4
|
return workbenchBenchmarkContentFingerprint({
|
|
5
5
|
sourceYaml: project.specSource,
|
|
6
|
-
engineResolveFiles:
|
|
6
|
+
engineResolveFiles: remoteEngineResolveFiles(project).map(toSurfaceFile),
|
|
7
7
|
engineResolveBinding: engineResolveBindingForSpec(project.spec),
|
|
8
8
|
adapterFiles: project.adapterFiles.map(toSurfaceFile),
|
|
9
9
|
adapterManifests: project.adapters.map((adapter) => adapter.manifest),
|
package/dist/command-model.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export declare const LOCAL_DEV_OPEN_LIFECYCLE_NOTE = "Keep this command running while using the local web view; Ctrl-C stops the server and the page will stop working.";
|
|
2
|
-
export declare const
|
|
2
|
+
export declare const REMOTE_WATCH_LIFECYCLE_NOTE: string;
|
|
3
3
|
export declare const rootUsage: string;
|
|
4
4
|
export declare function commandUsage(commandPath: string): string | null;
|
|
5
5
|
//# sourceMappingURL=command-model.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"command-model.d.ts","sourceRoot":"","sources":["../src/command-model.ts"],"names":[],"mappings":"AAOA,eAAO,MAAM,6BAA6B,sHAC2E,CAAC;AAOtH,eAAO,MAAM,2BAA2B,QAA0C,CAAC;
|
|
1
|
+
{"version":3,"file":"command-model.d.ts","sourceRoot":"","sources":["../src/command-model.ts"],"names":[],"mappings":"AAOA,eAAO,MAAM,6BAA6B,sHAC2E,CAAC;AAOtH,eAAO,MAAM,2BAA2B,QAA0C,CAAC;AAiFnF,eAAO,MAAM,SAAS,QAAuB,CAAC;AAsc9C,wBAAgB,YAAY,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAE/D"}
|
package/dist/command-model.js
CHANGED
|
@@ -5,19 +5,19 @@ const sourceDirectoryHelp = [
|
|
|
5
5
|
" Pass --dir DIR or pass benchmark.yaml, candidates/<name>, or candidates/<name>/candidate.yaml as SOURCE.",
|
|
6
6
|
];
|
|
7
7
|
export const LOCAL_DEV_OPEN_LIFECYCLE_NOTE = "Keep this command running while using the local web view; Ctrl-C stops the server and the page will stop working.";
|
|
8
|
-
const
|
|
8
|
+
const remoteWatchLifecycleNoteLines = [
|
|
9
9
|
"Watching is client-side polling only.",
|
|
10
|
-
"Stopping this command does not cancel the
|
|
10
|
+
"Stopping this command does not cancel the remote run.",
|
|
11
11
|
];
|
|
12
|
-
export const
|
|
12
|
+
export const REMOTE_WATCH_LIFECYCLE_NOTE = remoteWatchLifecycleNoteLines.join(" ");
|
|
13
13
|
const localOpenLifecycleHelp = [
|
|
14
14
|
"Lifecycle:",
|
|
15
15
|
" workbench open starts a long-running local web server.",
|
|
16
16
|
` ${LOCAL_DEV_OPEN_LIFECYCLE_NOTE}`,
|
|
17
17
|
];
|
|
18
|
-
const
|
|
18
|
+
const remoteWatchLifecycleHelp = [
|
|
19
19
|
"Lifecycle:",
|
|
20
|
-
...
|
|
20
|
+
...remoteWatchLifecycleNoteLines.map((line) => ` ${line}`),
|
|
21
21
|
];
|
|
22
22
|
const rootLines = [
|
|
23
23
|
"Usage:",
|
|
@@ -36,20 +36,25 @@ const rootLines = [
|
|
|
36
36
|
"",
|
|
37
37
|
"Runs:",
|
|
38
38
|
" workbench eval [SOURCE] [--dir DIR] [--candidate CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--json]",
|
|
39
|
-
" workbench eval --
|
|
39
|
+
" workbench eval --remote [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK] [--candidate CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
|
|
40
40
|
" workbench improve [SOURCE] [--dir DIR] [--from CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--json]",
|
|
41
|
-
" workbench improve --
|
|
42
|
-
" workbench retry TARGET_ID [--dir DIR] [--
|
|
43
|
-
" workbench open [SOURCE|OWNER/BENCHMARK|RUN_ID|CANDIDATE_ID] [--dir DIR] [--
|
|
41
|
+
" workbench improve --remote [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK] [--base CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
|
|
42
|
+
" workbench retry TARGET_ID [--dir DIR] [--remote] [--benchmark OWNER/BENCHMARK] [--watch] [--interval-ms N] [--timeout-ms N] [--json]",
|
|
43
|
+
" workbench open [SOURCE|OWNER/BENCHMARK|RUN_ID|CANDIDATE_ID] [--dir DIR] [--remote] [--benchmark OWNER/BENCHMARK] [--run RUN_ID] [--host HOST] [--port N] [--no-open] [--json]",
|
|
44
44
|
" workbench restore [--dir DIR] [--candidate CANDIDATE_ID] [--dry-run] [--yes] [--json]",
|
|
45
45
|
"",
|
|
46
46
|
"Local inspection:",
|
|
47
47
|
" workbench runs list [--dir DIR] [--json]",
|
|
48
|
-
" workbench runs show RUN_ID [--dir DIR] [--json]",
|
|
48
|
+
" workbench runs show RUN_ID [--dir DIR] [--jobs] [--failures] [--json]",
|
|
49
|
+
" workbench evaluations list [--dir DIR] [--json]",
|
|
50
|
+
" workbench evaluations show EVALUATION_ID [--dir DIR] [--json]",
|
|
51
|
+
" workbench executions trace --run RUN_ID --job JOB_ID [--dir DIR] [--json]",
|
|
52
|
+
" workbench diagnose [RUN_OR_EVALUATION_ID] [--dir DIR] [--json]",
|
|
49
53
|
" workbench candidates list [--dir DIR] [--json]",
|
|
50
54
|
" workbench candidates show CANDIDATE_ID [--dir DIR] [--json]",
|
|
51
55
|
" workbench candidates files [--dir DIR] [--candidate CANDIDATE_ID] [--json]",
|
|
52
56
|
" workbench candidates preview --path PATH [--dir DIR] [--candidate CANDIDATE_ID] [--output PATH|-] [--json]",
|
|
57
|
+
" # Installed-agent traces, not Workbench run execution traces:",
|
|
53
58
|
" workbench traces collect [--providers codex,claude] [--since 30d] [--workspace DIR] [--limit N] [--json]",
|
|
54
59
|
" workbench traces list [--providers codex,claude] [--since 30d] [--workspace DIR] [--limit N] [--json]",
|
|
55
60
|
" workbench traces show TRACE_ID [--providers codex,claude] [--since 30d] [--workspace DIR] [--json]",
|
|
@@ -73,10 +78,10 @@ const rootLines = [
|
|
|
73
78
|
" workbench retry eval_local_123 --json",
|
|
74
79
|
" workbench open --no-open --json",
|
|
75
80
|
" workbench push",
|
|
76
|
-
" workbench eval --
|
|
81
|
+
" workbench eval --remote candidates/current --benchmark openbench/invoice-review --watch",
|
|
77
82
|
"",
|
|
78
83
|
"Environment:",
|
|
79
|
-
" WORKBENCH_API_URL sets the
|
|
84
|
+
" WORKBENCH_API_URL sets the remote Workbench API base URL.",
|
|
80
85
|
"",
|
|
81
86
|
"Default API URL:",
|
|
82
87
|
" https://v2.workbench.ai",
|
|
@@ -126,49 +131,49 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
126
131
|
eval: withSourceDirectoryHelp(withLifecycleHelp([
|
|
127
132
|
"Usage:",
|
|
128
133
|
" workbench eval [SOURCE] [--dir DIR] [--candidate CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--json]",
|
|
129
|
-
" workbench eval --
|
|
134
|
+
" workbench eval --remote [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK] [--candidate CANDIDATE_ID] [--runs RUNS|all] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
|
|
130
135
|
"",
|
|
131
|
-
"Ensure the selected candidate run has an evaluation for the current benchmark. Without --
|
|
136
|
+
"Ensure the selected candidate run has an evaluation for the current benchmark. Without --remote, execution writes local records. With --remote, Workbench starts or reuses a remote run against the configured remote or --benchmark target. Completed work is reused only when candidate, run configuration, source, adapters, benchmark, and samples match; use --rerun to intentionally spend again.",
|
|
132
137
|
"",
|
|
133
138
|
"Examples:",
|
|
134
139
|
" workbench eval --samples 1",
|
|
135
|
-
" workbench eval --
|
|
140
|
+
" workbench eval --remote candidates/current --samples 1 --watch --json",
|
|
136
141
|
" workbench eval candidates/current --samples 2 --json",
|
|
137
|
-
],
|
|
142
|
+
], remoteWatchLifecycleHelp)),
|
|
138
143
|
improve: withSourceDirectoryHelp(withLifecycleHelp([
|
|
139
144
|
"Usage:",
|
|
140
145
|
" workbench improve [SOURCE] [--dir DIR] [--from CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--json]",
|
|
141
|
-
" workbench improve --
|
|
146
|
+
" workbench improve --remote [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK] [--base CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
|
|
142
147
|
"",
|
|
143
|
-
"Ensure a candidate improvement exists for the selected base, run, budget, and samples. Improve defaults to the evaluated active candidate when it belongs to the current benchmark fingerprint; otherwise it evaluates and uses the authored current candidate. Without --
|
|
148
|
+
"Ensure a candidate improvement exists for the selected base, run, budget, and samples. Improve defaults to the evaluated active candidate when it belongs to the current benchmark fingerprint; otherwise it evaluates and uses the authored current candidate. Without --remote, execution writes local records. With --remote, Workbench starts or reuses remote work against the configured remote or --benchmark target. Completed work is reused only when base, run configuration, source, adapters, benchmark, budget, and samples match; use --rerun to intentionally spend again.",
|
|
144
149
|
"",
|
|
145
150
|
"Examples:",
|
|
146
151
|
" workbench improve --budget 1 --samples 1",
|
|
147
|
-
" workbench improve --
|
|
152
|
+
" workbench improve --remote candidates/current --budget 1 --samples 1 --watch --json",
|
|
148
153
|
" workbench improve candidates/current --from candidate_123 --json",
|
|
149
|
-
],
|
|
154
|
+
], remoteWatchLifecycleHelp)),
|
|
150
155
|
retry: withSourceDirectoryHelp(withLifecycleHelp([
|
|
151
156
|
"Usage:",
|
|
152
157
|
" workbench retry TARGET_ID [--dir DIR] [--json]",
|
|
153
|
-
" workbench retry --
|
|
158
|
+
" workbench retry --remote TARGET_ID [--dir DIR] [--benchmark OWNER/BENCHMARK] [--watch] [--interval-ms N] [--timeout-ms N] [--json]",
|
|
154
159
|
"",
|
|
155
|
-
"Retry a failed run or evaluation by replaying its recorded candidate, configuration, sample count, and improve budget. Use --
|
|
160
|
+
"Retry a failed run or evaluation by replaying its recorded candidate, configuration, sample count, and improve budget. Use --remote for remote records.",
|
|
156
161
|
"",
|
|
157
162
|
"Examples:",
|
|
158
163
|
" workbench retry eval_local_123 --json",
|
|
159
|
-
" workbench retry --
|
|
164
|
+
" workbench retry --remote run_123 --watch --json",
|
|
160
165
|
" workbench retry run_local_123 --dir ./my-benchmark",
|
|
161
|
-
],
|
|
166
|
+
], remoteWatchLifecycleHelp)),
|
|
162
167
|
open: withSourceDirectoryHelp(withLifecycleHelp([
|
|
163
168
|
"Usage:",
|
|
164
169
|
" workbench open [SOURCE] [--dir DIR] [--run RUN_ID] [--host HOST] [--port N] [--no-open] [--json]",
|
|
165
|
-
" workbench open --
|
|
170
|
+
" workbench open --remote [OWNER/BENCHMARK|RUN_ID|CANDIDATE_ID] [--dir DIR] [--benchmark OWNER/BENCHMARK] [--no-open] [--json]",
|
|
166
171
|
"",
|
|
167
|
-
"Start the local Workbench web view for the project and keep serving it until stopped. With --
|
|
172
|
+
"Start the local Workbench web view for the project and keep serving it until stopped. With --remote, print and optionally open the remote project URL instead.",
|
|
168
173
|
"",
|
|
169
174
|
"Examples:",
|
|
170
175
|
" workbench open",
|
|
171
|
-
" workbench open --
|
|
176
|
+
" workbench open --remote --no-open --json",
|
|
172
177
|
" workbench open --run eval_local_123 --port 4317 --no-open --json",
|
|
173
178
|
], localOpenLifecycleHelp)),
|
|
174
179
|
restore: withSourceDirectoryHelp([
|
|
@@ -189,7 +194,7 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
189
194
|
"",
|
|
190
195
|
"Commands:",
|
|
191
196
|
" workbench runs list [--dir DIR] [--json]",
|
|
192
|
-
" workbench runs show RUN_ID [--dir DIR] [--json]",
|
|
197
|
+
" workbench runs show RUN_ID [--dir DIR] [--jobs] [--failures] [--json]",
|
|
193
198
|
"",
|
|
194
199
|
"Examples:",
|
|
195
200
|
" workbench runs list --json",
|
|
@@ -207,13 +212,79 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
207
212
|
]),
|
|
208
213
|
"runs show": withSourceDirectoryHelp([
|
|
209
214
|
"Usage:",
|
|
210
|
-
" workbench runs show RUN_ID [--dir DIR] [--json]",
|
|
215
|
+
" workbench runs show RUN_ID [--dir DIR] [--jobs] [--failures] [--json]",
|
|
211
216
|
"",
|
|
212
|
-
"Show one local run record.",
|
|
217
|
+
"Show one local run record. Use --jobs for execution jobs and --failures for the generic failure diagnosis.",
|
|
213
218
|
"",
|
|
214
219
|
"Examples:",
|
|
215
220
|
" workbench runs show eval_local_123",
|
|
216
|
-
" workbench runs show eval_local_123 --json",
|
|
221
|
+
" workbench runs show eval_local_123 --failures --json",
|
|
222
|
+
]),
|
|
223
|
+
evaluations: [
|
|
224
|
+
"Usage:",
|
|
225
|
+
" workbench evaluations <command> [options]",
|
|
226
|
+
"",
|
|
227
|
+
"Inspect local evaluation comparisons through the same read model used by the browser.",
|
|
228
|
+
"",
|
|
229
|
+
"Commands:",
|
|
230
|
+
" workbench evaluations list [--dir DIR] [--json]",
|
|
231
|
+
" workbench evaluations show EVALUATION_ID [--dir DIR] [--json]",
|
|
232
|
+
"",
|
|
233
|
+
"Examples:",
|
|
234
|
+
" workbench evaluations list",
|
|
235
|
+
" workbench evaluations show eval_local_123 --json",
|
|
236
|
+
],
|
|
237
|
+
"evaluations list": withSourceDirectoryHelp([
|
|
238
|
+
"Usage:",
|
|
239
|
+
" workbench evaluations list [--dir DIR] [--json]",
|
|
240
|
+
"",
|
|
241
|
+
"List local evaluations with candidate, configuration, score, and run id.",
|
|
242
|
+
"",
|
|
243
|
+
"Examples:",
|
|
244
|
+
" workbench evaluations list",
|
|
245
|
+
" workbench evaluations list --json",
|
|
246
|
+
]),
|
|
247
|
+
"evaluations show": withSourceDirectoryHelp([
|
|
248
|
+
"Usage:",
|
|
249
|
+
" workbench evaluations show EVALUATION_ID [--dir DIR] [--json]",
|
|
250
|
+
"",
|
|
251
|
+
"Show one local evaluation scorecard and its case summaries.",
|
|
252
|
+
"",
|
|
253
|
+
"Examples:",
|
|
254
|
+
" workbench evaluations show eval_local_123",
|
|
255
|
+
" workbench evaluations show eval_local_123 --json",
|
|
256
|
+
]),
|
|
257
|
+
executions: [
|
|
258
|
+
"Usage:",
|
|
259
|
+
" workbench executions <command> [options]",
|
|
260
|
+
"",
|
|
261
|
+
"Inspect Workbench run execution artifacts. These are runtime traces tied to a Workbench run and job.",
|
|
262
|
+
"",
|
|
263
|
+
"Commands:",
|
|
264
|
+
" workbench executions trace --run RUN_ID --job JOB_ID [--dir DIR] [--json]",
|
|
265
|
+
"",
|
|
266
|
+
"Examples:",
|
|
267
|
+
" workbench executions trace --run run_local_123 --job job_123 --json",
|
|
268
|
+
],
|
|
269
|
+
"executions trace": withSourceDirectoryHelp([
|
|
270
|
+
"Usage:",
|
|
271
|
+
" workbench executions trace --run RUN_ID --job JOB_ID [--dir DIR] [--json]",
|
|
272
|
+
"",
|
|
273
|
+
"Show the Workbench execution trace for a run job.",
|
|
274
|
+
"",
|
|
275
|
+
"Examples:",
|
|
276
|
+
" workbench executions trace --run run_local_123 --job job_123",
|
|
277
|
+
" workbench executions trace --run run_local_123 --job job_123 --json",
|
|
278
|
+
]),
|
|
279
|
+
diagnose: withSourceDirectoryHelp([
|
|
280
|
+
"Usage:",
|
|
281
|
+
" workbench diagnose [RUN_OR_EVALUATION_ID] [--dir DIR] [--json]",
|
|
282
|
+
"",
|
|
283
|
+
"Summarize generic Workbench failure modes from runs, evaluations, samples, cases, and jobs.",
|
|
284
|
+
"",
|
|
285
|
+
"Examples:",
|
|
286
|
+
" workbench diagnose",
|
|
287
|
+
" workbench diagnose eval_local_123 --json",
|
|
217
288
|
]),
|
|
218
289
|
candidates: [
|
|
219
290
|
"Usage:",
|
|
@@ -275,7 +346,7 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
275
346
|
"Usage:",
|
|
276
347
|
" workbench clone OWNER/BENCHMARK [DIR] [--dry-run] [--json]",
|
|
277
348
|
"",
|
|
278
|
-
"Clone
|
|
349
|
+
"Clone remote source and runtime history into a local Workbench project and remember it as the remote.",
|
|
279
350
|
"",
|
|
280
351
|
"Examples:",
|
|
281
352
|
" workbench clone openbench/invoice-review",
|
|
@@ -390,7 +461,7 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
390
461
|
" workbench traces list [--providers codex,claude] [--since 30d] [--workspace DIR] [--limit N] [--json]",
|
|
391
462
|
" workbench traces show TRACE_ID [--providers codex,claude] [--since 30d] [--workspace DIR] [--json]",
|
|
392
463
|
"",
|
|
393
|
-
"Inspect local installed-agent traces without calling an LLM.",
|
|
464
|
+
"Inspect local installed-agent traces without calling an LLM. These are not Workbench run execution traces; use workbench executions trace for runtime traces.",
|
|
394
465
|
"",
|
|
395
466
|
"Examples:",
|
|
396
467
|
" workbench traces list --limit 10 --json",
|
|
@@ -430,7 +501,7 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
430
501
|
"Usage:",
|
|
431
502
|
" workbench auth <command> [options]",
|
|
432
503
|
"",
|
|
433
|
-
"Connect adapter auth for local and
|
|
504
|
+
"Connect adapter auth for local and remote runs.",
|
|
434
505
|
"",
|
|
435
506
|
"Commands:",
|
|
436
507
|
" workbench auth connect ADAPTER[/SLOT] [--dir DIR] [--method METHOD] [--profile PROFILE] [--profile-root DIR] [--local-only] [--json]",
|
|
@@ -454,7 +525,7 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
454
525
|
"Usage:",
|
|
455
526
|
" workbench auth disconnect ADAPTER[/SLOT] [--profile PROFILE] [--local-only] [--json]",
|
|
456
527
|
"",
|
|
457
|
-
"Disconnect adapter auth locally and, when logged in, in
|
|
528
|
+
"Disconnect adapter auth locally and, when logged in, in remote Workbench.",
|
|
458
529
|
"",
|
|
459
530
|
"Examples:",
|
|
460
531
|
" workbench auth disconnect codex --local-only",
|