@workbench-ai/workbench 0.0.70 → 0.0.71
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +446 -174
- package/package.json +6 -6
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AA2DA,MAAM,WAAW,KAAK;IACpB,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;CAC/B;AAkUD,wBAAsB,MAAM,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,EAAE,EAAE,GAAE,KAGzD,GAAG,OAAO,CAAC,MAAM,CAAC,CAwMlB"}
|
package/dist/index.js
CHANGED
|
@@ -4,10 +4,10 @@ import { createRequire } from "node:module";
|
|
|
4
4
|
import os from "node:os";
|
|
5
5
|
import path from "node:path";
|
|
6
6
|
import { gzipSync } from "node:zlib";
|
|
7
|
-
import { addWorkbenchCase, addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill,
|
|
7
|
+
import { addWorkbenchCase, addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchInspectionSnapshot, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, publishWorkbenchVersion, removeWorkbenchAgent, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchSkillImproveCanUseQueuedAdapter, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
|
|
8
8
|
import { normalizeWorkbenchSkillName } from "@workbench-ai/workbench-contract";
|
|
9
9
|
import { emitError, emitResult } from "./output.js";
|
|
10
|
-
import { installSnapshotToTargets,
|
|
10
|
+
import { installSnapshotToTargets, normalizeInstallSnapshotPath, resolveInstallTargets, supportedInstallTargets, } from "./install-targets.js";
|
|
11
11
|
import { startWorkbenchOpenServer } from "./open-server.js";
|
|
12
12
|
const require = createRequire(import.meta.url);
|
|
13
13
|
const HELP = [
|
|
@@ -23,7 +23,7 @@ const HELP = [
|
|
|
23
23
|
" workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
24
24
|
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
25
25
|
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
|
|
26
|
-
" workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--
|
|
26
|
+
" workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--dry-run] [--json]",
|
|
27
27
|
"",
|
|
28
28
|
"More:",
|
|
29
29
|
" workbench help --all",
|
|
@@ -36,7 +36,7 @@ const HELP_ALL = [
|
|
|
36
36
|
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
37
37
|
" workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
38
38
|
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
|
|
39
|
-
" workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--
|
|
39
|
+
" workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--dry-run] [--json]",
|
|
40
40
|
"",
|
|
41
41
|
"Inspect:",
|
|
42
42
|
" workbench status [--dir DIR] [--json]",
|
|
@@ -47,7 +47,7 @@ const HELP_ALL = [
|
|
|
47
47
|
" workbench open [--host HOST] [--port PORT] [--no-open] [--json]",
|
|
48
48
|
"",
|
|
49
49
|
"Configure:",
|
|
50
|
-
" workbench case add
|
|
50
|
+
" workbench case add RUN_ID [--json]",
|
|
51
51
|
" workbench agent add NAME --adapter X [--model M] [--with k=v]... | list | rm NAME [--json]",
|
|
52
52
|
"",
|
|
53
53
|
"Share and auth:",
|
|
@@ -65,28 +65,40 @@ const COMMAND_HELP = {
|
|
|
65
65
|
" workbench new [DIR] [--json]",
|
|
66
66
|
"",
|
|
67
67
|
"Creates a Workbench skill project.",
|
|
68
|
+
"",
|
|
69
|
+
"Example:",
|
|
70
|
+
" workbench new earnings-prep",
|
|
68
71
|
].join("\n"),
|
|
69
72
|
eval: [
|
|
70
73
|
"Usage:",
|
|
71
74
|
" workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
72
75
|
"",
|
|
73
76
|
"Runs eval jobs for the selected version, measured skills, and agents. Omitted selectors use manifest defaults.",
|
|
77
|
+
"",
|
|
78
|
+
"Example:",
|
|
79
|
+
" workbench eval -n 5",
|
|
74
80
|
].join("\n"),
|
|
75
81
|
improve: [
|
|
76
82
|
"Usage:",
|
|
77
83
|
" workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
78
84
|
"",
|
|
79
85
|
"Creates one improved child version from evidence. The selected skills and agents must resolve to exactly one entry each.",
|
|
86
|
+
"",
|
|
87
|
+
"Example:",
|
|
88
|
+
" workbench improve --budget 1 -n 1",
|
|
80
89
|
].join("\n"),
|
|
81
90
|
compare: [
|
|
82
91
|
"Usage:",
|
|
83
92
|
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
84
93
|
"",
|
|
85
94
|
"Compares recorded eval evidence across selected skills, agents, and versions.",
|
|
95
|
+
"",
|
|
96
|
+
"Example:",
|
|
97
|
+
" workbench compare --agents all",
|
|
86
98
|
].join("\n"),
|
|
87
99
|
install: [
|
|
88
100
|
"Usage:",
|
|
89
|
-
" workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--
|
|
101
|
+
" workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--dry-run] [--json]",
|
|
90
102
|
"",
|
|
91
103
|
"Installs published Workbench Cloud source into local agent targets.",
|
|
92
104
|
"",
|
|
@@ -98,12 +110,18 @@ const COMMAND_HELP = {
|
|
|
98
110
|
" workbench status [--dir DIR] [--json]",
|
|
99
111
|
"",
|
|
100
112
|
"Reports project, worktree, run, per-remote sync/publication, and auth state. --json emits the workbench.status.v1 dashboard.",
|
|
113
|
+
"",
|
|
114
|
+
"Example:",
|
|
115
|
+
" workbench status --json",
|
|
101
116
|
].join("\n"),
|
|
102
117
|
logout: [
|
|
103
118
|
"Usage:",
|
|
104
119
|
" workbench logout [PROVIDER] [--json]",
|
|
105
120
|
"",
|
|
106
121
|
"With no provider, logs out of Workbench Cloud. With a provider such as codex or claude, removes local adapter auth.",
|
|
122
|
+
"",
|
|
123
|
+
"Example:",
|
|
124
|
+
" workbench logout claude",
|
|
107
125
|
].join("\n"),
|
|
108
126
|
show: [
|
|
109
127
|
"Usage:",
|
|
@@ -111,38 +129,54 @@ const COMMAND_HELP = {
|
|
|
111
129
|
" workbench show REF:PATH [--json]",
|
|
112
130
|
"",
|
|
113
131
|
"Shows a Workbench object, lists files for file-backed objects, or prints one file.",
|
|
132
|
+
"",
|
|
133
|
+
"Example:",
|
|
134
|
+
" workbench show run_abc12345:result.json",
|
|
114
135
|
].join("\n"),
|
|
115
136
|
log: [
|
|
116
137
|
"Usage:",
|
|
117
138
|
" workbench log [--runs|--versions] [--json]",
|
|
118
139
|
"",
|
|
119
140
|
"Shows one reverse-chronological timeline of versions and runs.",
|
|
141
|
+
"",
|
|
142
|
+
"Example:",
|
|
143
|
+
" workbench log --runs",
|
|
120
144
|
].join("\n"),
|
|
121
145
|
diff: [
|
|
122
146
|
"Usage:",
|
|
123
147
|
" workbench diff [A..B] [--json]",
|
|
124
148
|
"",
|
|
125
149
|
"Shows changed files between two Workbench source versions.",
|
|
150
|
+
"",
|
|
151
|
+
"Example:",
|
|
152
|
+
" workbench diff 26059f9a..eac5699c",
|
|
126
153
|
].join("\n"),
|
|
127
154
|
switch: [
|
|
128
155
|
"Usage:",
|
|
129
156
|
" workbench switch VERSION [--json]",
|
|
130
157
|
"",
|
|
131
158
|
"Switches the working skill source to a recorded Workbench version.",
|
|
159
|
+
"",
|
|
160
|
+
"Example:",
|
|
161
|
+
" workbench switch 26059f9a",
|
|
132
162
|
].join("\n"),
|
|
133
163
|
open: [
|
|
134
164
|
"Usage:",
|
|
135
165
|
" workbench open [--host HOST] [--port PORT] [--no-open] [--json]",
|
|
136
166
|
"",
|
|
137
167
|
"Serves or emits the read-only Workbench inspection snapshot.",
|
|
168
|
+
"",
|
|
169
|
+
"Example:",
|
|
170
|
+
" workbench open --no-open",
|
|
138
171
|
].join("\n"),
|
|
139
172
|
case: [
|
|
140
173
|
"Usage:",
|
|
141
|
-
" workbench case
|
|
142
|
-
" workbench case add [RUN_ID] [--json]",
|
|
143
|
-
" workbench case rm ID [--json]",
|
|
174
|
+
" workbench case add RUN_ID [--json]",
|
|
144
175
|
"",
|
|
145
|
-
"
|
|
176
|
+
"Captures a regression case from a recorded run.",
|
|
177
|
+
"",
|
|
178
|
+
"Example:",
|
|
179
|
+
" workbench case add run_abc12345",
|
|
146
180
|
].join("\n"),
|
|
147
181
|
agent: [
|
|
148
182
|
"Usage:",
|
|
@@ -151,18 +185,27 @@ const COMMAND_HELP = {
|
|
|
151
185
|
" workbench agent rm NAME [--json]",
|
|
152
186
|
"",
|
|
153
187
|
"Lists, adds, or removes eval agent configurations.",
|
|
188
|
+
"",
|
|
189
|
+
"Example:",
|
|
190
|
+
" workbench agent add claude --adapter claude --model sonnet",
|
|
154
191
|
].join("\n"),
|
|
155
192
|
sync: [
|
|
156
193
|
"Usage:",
|
|
157
194
|
" workbench sync [REMOTE] [--dry-run] [--dir DIR] [--json]",
|
|
158
195
|
"",
|
|
159
196
|
"Plumbing command: synchronizes local evidence and version objects with a Workbench remote.",
|
|
197
|
+
"",
|
|
198
|
+
"Example:",
|
|
199
|
+
" workbench sync cloud --dry-run",
|
|
160
200
|
].join("\n"),
|
|
161
201
|
publish: [
|
|
162
202
|
"Usage:",
|
|
163
203
|
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--dir DIR] [--json]",
|
|
164
204
|
"",
|
|
165
205
|
"Publishes installable skill source to Workbench Cloud. --as sets the linked OWNER/SKILL handle.",
|
|
206
|
+
"",
|
|
207
|
+
"Example:",
|
|
208
|
+
" workbench publish --as acme/earnings-prep --dry-run",
|
|
166
209
|
].join("\n"),
|
|
167
210
|
login: [
|
|
168
211
|
"Usage:",
|
|
@@ -170,6 +213,9 @@ const COMMAND_HELP = {
|
|
|
170
213
|
" workbench logout [PROVIDER] [--json]",
|
|
171
214
|
"",
|
|
172
215
|
"Connects the CLI to Workbench Cloud or captures local adapter auth for a provider.",
|
|
216
|
+
"",
|
|
217
|
+
"Example:",
|
|
218
|
+
" workbench login --start-only --no-open",
|
|
173
219
|
].join("\n"),
|
|
174
220
|
};
|
|
175
221
|
const COMMON_FLAGS = {
|
|
@@ -207,7 +253,7 @@ const COMMAND_FLAGS = {
|
|
|
207
253
|
samples: "positive-integer",
|
|
208
254
|
skills: "string",
|
|
209
255
|
},
|
|
210
|
-
install: { ...COMMON_FLAGS, ...HELP_FLAG, "dry-run": "boolean",
|
|
256
|
+
install: { ...COMMON_FLAGS, ...HELP_FLAG, "dry-run": "boolean", to: "repeat-string", yes: "boolean" },
|
|
211
257
|
log: { ...PROJECT_FLAGS, ...HELP_FLAG, runs: "boolean", versions: "boolean" },
|
|
212
258
|
login: {
|
|
213
259
|
...COMMON_FLAGS,
|
|
@@ -243,9 +289,7 @@ const COMMAND_FLAGS = {
|
|
|
243
289
|
const SUBCOMMAND_FLAGS = {
|
|
244
290
|
case: {
|
|
245
291
|
flags: {
|
|
246
|
-
list: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
247
292
|
add: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
248
|
-
rm: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
249
293
|
},
|
|
250
294
|
},
|
|
251
295
|
agent: {
|
|
@@ -315,26 +359,28 @@ export async function runCli(argv, io = {
|
|
|
315
359
|
return emitEvalFailure(runs, failedRuns, artifactIds, parsed, io);
|
|
316
360
|
}
|
|
317
361
|
const deltas = await evalDeltas(core, runs);
|
|
318
|
-
const
|
|
362
|
+
const next = await evalSuccessNextCommand(core, runs);
|
|
319
363
|
return emitResult("workbench.cli.eval.v1", {
|
|
320
364
|
result: runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
|
|
321
365
|
deltas: deltas,
|
|
322
|
-
|
|
366
|
+
next: next,
|
|
323
367
|
}, parsed, io, () => [
|
|
324
368
|
runs.map(formatRun).join("\n"),
|
|
325
369
|
...deltas.map(formatEvalDelta),
|
|
326
|
-
...(
|
|
370
|
+
...(next ? [`next: ${next}`] : []),
|
|
327
371
|
].filter(Boolean).join("\n"));
|
|
328
372
|
}
|
|
329
373
|
if (command === "improve") {
|
|
330
374
|
if (parsed.flags.cloud === true) {
|
|
331
375
|
return await handleCloudImprove(parsed, io);
|
|
332
376
|
}
|
|
377
|
+
const improverAgent = await resolveLocalImproverAgent(parsed, core);
|
|
333
378
|
const result = await improveWorkbenchSkill({
|
|
334
379
|
...core,
|
|
335
380
|
version: optionalPositional(parsed, 1),
|
|
336
381
|
skill: stringFlag(parsed, "skills"),
|
|
337
382
|
agent: stringFlag(parsed, "agents"),
|
|
383
|
+
...(improverAgent ? { improverAgent } : {}),
|
|
338
384
|
budget: intFlag(parsed, "budget"),
|
|
339
385
|
samples: intFlag(parsed, "samples"),
|
|
340
386
|
});
|
|
@@ -350,12 +396,12 @@ export async function runCli(argv, io = {
|
|
|
350
396
|
skills: stringFlag(parsed, "skills"),
|
|
351
397
|
agents: stringFlag(parsed, "agents"),
|
|
352
398
|
});
|
|
353
|
-
return output(comparison, parsed, io, () => formatComparison(comparison));
|
|
399
|
+
return output(manifestOnly(comparison), parsed, io, () => formatComparison(comparison));
|
|
354
400
|
}
|
|
355
401
|
if (command === "switch") {
|
|
356
402
|
const versionRef = requiredPositional(parsed, 1, "workbench switch requires VERSION.");
|
|
357
403
|
const version = await switchWorkbenchVersion(versionRef, core);
|
|
358
|
-
return output(versionSummary(version), parsed, io, () => `Switched to ${version.id}.`);
|
|
404
|
+
return output(versionSummary(version), parsed, io, () => `Switched to ${displayRef(version.id)}.`);
|
|
359
405
|
}
|
|
360
406
|
if (command === "diff") {
|
|
361
407
|
const range = optionalPositional(parsed, 1) ?? await defaultDiffRange(core);
|
|
@@ -390,7 +436,7 @@ export async function runCli(argv, io = {
|
|
|
390
436
|
}, parsed, io, () => `${result.dryRun ? "Would sync" : "Synced"} ${result.remote.name}: pushed ${result.pushed}, pulled ${result.pulled}${result.upToDate ? " (up to date)" : ""}.`);
|
|
391
437
|
}
|
|
392
438
|
if (command === "publish") {
|
|
393
|
-
const preview = parsed.flags["dry-run"] === true
|
|
439
|
+
const preview = parsed.flags["dry-run"] === true
|
|
394
440
|
? await previewPublishWithDerivedRemote(parsed)
|
|
395
441
|
: undefined;
|
|
396
442
|
if (preview) {
|
|
@@ -403,7 +449,7 @@ export async function runCli(argv, io = {
|
|
|
403
449
|
pinnedInstallUrl: preview.pinnedInstallUrl,
|
|
404
450
|
dryRun: true,
|
|
405
451
|
}, parsed, io, () => [
|
|
406
|
-
`Would publish ${preview.version.id} to remote ${preview.remote.name}.`,
|
|
452
|
+
`Would publish ${displayRef(preview.version.id)} to remote ${preview.remote.name}.`,
|
|
407
453
|
`Visibility: ${preview.visibility}`,
|
|
408
454
|
`Install: ${preview.installUrl}`,
|
|
409
455
|
`Pinned: ${preview.pinnedInstallUrl}`,
|
|
@@ -427,7 +473,7 @@ export async function runCli(argv, io = {
|
|
|
427
473
|
pinnedInstallUrl: result.pinnedInstallUrl,
|
|
428
474
|
...(result.dryRun ? { dryRun: true } : {}),
|
|
429
475
|
}, parsed, io, () => [
|
|
430
|
-
`${result.dryRun ? "Would publish" : "Published"} ${result.version.id} to remote ${result.remote.name}.`,
|
|
476
|
+
`${result.dryRun ? "Would publish" : "Published"} ${displayRef(result.version.id)} to remote ${result.remote.name}.`,
|
|
431
477
|
`Visibility: ${result.visibility}`,
|
|
432
478
|
`Install: ${result.installUrl}`,
|
|
433
479
|
`Pinned: ${result.pinnedInstallUrl}`,
|
|
@@ -437,7 +483,7 @@ export async function runCli(argv, io = {
|
|
|
437
483
|
if (command === "open") {
|
|
438
484
|
if (parsed.flags.json === true) {
|
|
439
485
|
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
440
|
-
return output(snapshot, parsed, io, () => "Read-only Workbench inspection data is available with --json.");
|
|
486
|
+
return output(manifestOnly(snapshot), parsed, io, () => "Read-only Workbench inspection data is available with --json.");
|
|
441
487
|
}
|
|
442
488
|
// The browser server serves committed object state through a read-only
|
|
443
489
|
// snapshot path, so long-running commands do not block page loads.
|
|
@@ -462,14 +508,15 @@ export async function runCli(argv, io = {
|
|
|
462
508
|
async function handleStatus(parsed, io) {
|
|
463
509
|
const status = await workbenchStatusSnapshot(await coreOptions(parsed));
|
|
464
510
|
const auth = await workbenchCliAuthStatus();
|
|
511
|
+
const cliStatus = statusWithCausalNext(status, auth);
|
|
465
512
|
return emitResult("workbench.status.v1", {
|
|
466
|
-
project:
|
|
467
|
-
worktree:
|
|
468
|
-
runs:
|
|
469
|
-
remotes:
|
|
513
|
+
project: cliStatus.project,
|
|
514
|
+
worktree: cliStatus.worktree,
|
|
515
|
+
runs: cliStatus.runs,
|
|
516
|
+
remotes: cliStatus.remotes,
|
|
470
517
|
auth: auth,
|
|
471
|
-
next:
|
|
472
|
-
}, parsed, io, () => formatStatusSnapshot({ ...
|
|
518
|
+
next: cliStatus.next,
|
|
519
|
+
}, parsed, io, () => formatStatusSnapshot({ ...cliStatus, auth }));
|
|
473
520
|
}
|
|
474
521
|
async function handleLog(parsed, io) {
|
|
475
522
|
if (parsed.flags.runs === true && parsed.flags.versions === true) {
|
|
@@ -491,7 +538,7 @@ async function handleLog(parsed, io) {
|
|
|
491
538
|
remediation: "Run workbench log, workbench log --runs, or workbench log --versions.",
|
|
492
539
|
});
|
|
493
540
|
}
|
|
494
|
-
const snapshot = await
|
|
541
|
+
const snapshot = await createWorkbenchInspectionSnapshot(await coreOptions(parsed));
|
|
495
542
|
const includeRuns = parsed.flags.versions !== true;
|
|
496
543
|
const includeVersions = parsed.flags.runs !== true;
|
|
497
544
|
const entries = [
|
|
@@ -534,21 +581,25 @@ async function handleShow(parsed, io) {
|
|
|
534
581
|
return output(value, parsed, io, () => formatShow(value));
|
|
535
582
|
}
|
|
536
583
|
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
537
|
-
const version = snapshot
|
|
584
|
+
const version = snapshotVersionByRef(snapshot, objectRef);
|
|
538
585
|
if (version) {
|
|
539
586
|
return output(fileListing("version", version.id, version.files), parsed, io, () => formatFileListing("version", version.id, version.files));
|
|
540
587
|
}
|
|
541
|
-
const trace = snapshot.traces
|
|
588
|
+
const trace = snapshotObjectByRef(snapshot.traces, objectRef, "trace");
|
|
542
589
|
if (trace) {
|
|
543
590
|
return output(fileListing("trace", trace.id, trace.files), parsed, io, () => formatFileListing("trace", trace.id, trace.files));
|
|
544
591
|
}
|
|
545
|
-
const artifact = snapshot.artifacts
|
|
592
|
+
const artifact = snapshotObjectByRef(snapshot.artifacts, objectRef, "artifact");
|
|
546
593
|
if (artifact) {
|
|
547
594
|
return output(fileListing("artifact", artifact.id, artifact.files), parsed, io, () => formatFileListing("artifact", artifact.id, artifact.files));
|
|
548
595
|
}
|
|
549
596
|
const details = evidenceDetailsForRunOrJob(snapshot, objectRef);
|
|
550
|
-
|
|
551
|
-
|
|
597
|
+
const evidenceFiles = evidenceFilesForRunOrJob(snapshot, objectRef);
|
|
598
|
+
if (details.length > 0 || evidenceFiles.length > 0) {
|
|
599
|
+
return output({
|
|
600
|
+
details: details,
|
|
601
|
+
files: evidenceFiles.map(fileSummary),
|
|
602
|
+
}, parsed, io, () => formatRunOrJobEvidence(details, evidenceFiles));
|
|
552
603
|
}
|
|
553
604
|
const value = await showWorkbenchRef(ref, core);
|
|
554
605
|
return output(value, parsed, io, () => formatShow(value));
|
|
@@ -581,22 +632,19 @@ async function handleAgent(parsed, io) {
|
|
|
581
632
|
throw new WorkbenchUserError(`Unsupported agent command: ${subcommand}`);
|
|
582
633
|
}
|
|
583
634
|
async function handleCase(parsed, io) {
|
|
584
|
-
const subcommand = requiredPositional(parsed, 1, "workbench case requires
|
|
585
|
-
if (subcommand === "list") {
|
|
586
|
-
const cases = await listWorkbenchCases(await coreOptions(parsed));
|
|
587
|
-
return output(cases, parsed, io, () => cases.map((entry) => `${entry.id}\t${entry.path}`).join("\n") || "No cases.");
|
|
588
|
-
}
|
|
635
|
+
const subcommand = requiredPositional(parsed, 1, "workbench case requires add.");
|
|
589
636
|
if (subcommand === "add") {
|
|
590
637
|
const core = await coreOptions(parsed);
|
|
591
|
-
const sourceRef =
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
638
|
+
const sourceRef = requiredPositional(parsed, 2, "workbench case add requires RUN_ID.");
|
|
639
|
+
rejectExtraInput(parsed, {
|
|
640
|
+
maxPositionals: 3,
|
|
641
|
+
message: "workbench case add accepts one RUN_ID argument.",
|
|
642
|
+
remediation: "Run workbench case add RUN_ID.",
|
|
643
|
+
});
|
|
644
|
+
const record = await addWorkbenchCase({ ...core, fromTraceId: await traceIdForCaseSource(core, sourceRef) });
|
|
645
|
+
return output(record, parsed, io, () => `Added draft case ${record.id}. Edit .workbench/cases/${record.path}/case.yaml before using it as score evidence.`);
|
|
598
646
|
}
|
|
599
|
-
throw new WorkbenchUserError(`
|
|
647
|
+
throw new WorkbenchUserError(`Unknown command: workbench case ${subcommand}`);
|
|
600
648
|
}
|
|
601
649
|
async function handleAdapterLogin(provider, parsed, io) {
|
|
602
650
|
const target = parseAuthTarget(provider, authProfileFlag(parsed));
|
|
@@ -714,7 +762,7 @@ async function handleLogin(parsed, io) {
|
|
|
714
762
|
}
|
|
715
763
|
if (parsed.flags["start-only"] === true && parsed.flags.wait === true) {
|
|
716
764
|
throw new WorkbenchCodedError("usage", "workbench login accepts only one of --start-only or --wait.", {
|
|
717
|
-
remediation: "Run workbench login --start-only or workbench login --wait
|
|
765
|
+
remediation: "Run workbench login --start-only or workbench login --wait.",
|
|
718
766
|
exitCode: 2,
|
|
719
767
|
});
|
|
720
768
|
}
|
|
@@ -723,22 +771,17 @@ async function handleLogin(parsed, io) {
|
|
|
723
771
|
const timeoutSeconds = intFlag(parsed, "timeout");
|
|
724
772
|
if (startOnly && timeoutSeconds !== undefined) {
|
|
725
773
|
throw new WorkbenchCodedError("usage", "workbench login --timeout only applies with --wait.", {
|
|
726
|
-
remediation: "Run workbench login --start-only, then workbench login --wait
|
|
727
|
-
exitCode: 2,
|
|
728
|
-
});
|
|
729
|
-
}
|
|
730
|
-
if (waitOnly && timeoutSeconds === undefined) {
|
|
731
|
-
throw new WorkbenchCodedError("usage", "workbench login --wait requires --timeout N.", {
|
|
732
|
-
remediation: "Run workbench login --wait --timeout 120.",
|
|
774
|
+
remediation: "Run workbench login --start-only, then workbench login --wait.",
|
|
733
775
|
exitCode: 2,
|
|
734
776
|
});
|
|
735
777
|
}
|
|
736
778
|
const config = await loadConfig();
|
|
737
|
-
const
|
|
738
|
-
|
|
779
|
+
const explicitBaseUrl = stringFlag(parsed, "base-url");
|
|
780
|
+
const pending = waitOnly ? await readPendingDeviceAuthorization(explicitBaseUrl) : null;
|
|
781
|
+
const baseUrl = pending?.baseUrl ?? selectWorkbenchBaseUrl({
|
|
782
|
+
explicitBaseUrl,
|
|
739
783
|
configBaseUrl: config.baseUrl,
|
|
740
784
|
});
|
|
741
|
-
const pending = waitOnly ? await readPendingDeviceAuthorization(baseUrl) : null;
|
|
742
785
|
const record = pending ?? await startDeviceAuthorization(baseUrl);
|
|
743
786
|
const freshAuthorization = pending === null;
|
|
744
787
|
if (startOnly) {
|
|
@@ -753,8 +796,8 @@ async function handleLogin(parsed, io) {
|
|
|
753
796
|
verificationUriComplete: record.verification_uri_complete,
|
|
754
797
|
userCode: record.user_code,
|
|
755
798
|
expiresAt: record.expiresAt,
|
|
756
|
-
resume: "workbench login --wait
|
|
757
|
-
}, parsed, io, () => `Open ${record.verification_uri_complete}\nCode: ${record.user_code}\nResume: workbench login --wait
|
|
799
|
+
resume: "workbench login --wait",
|
|
800
|
+
}, parsed, io, () => `Open ${record.verification_uri_complete}\nCode: ${record.user_code}\nResume: workbench login --wait`);
|
|
758
801
|
}
|
|
759
802
|
await writePendingDeviceAuthorization(record);
|
|
760
803
|
if (freshAuthorization && !parsed.flags.json) {
|
|
@@ -801,9 +844,6 @@ async function handleLogout(parsed, io) {
|
|
|
801
844
|
const config = await loadConfig();
|
|
802
845
|
const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
|
|
803
846
|
const tokenPresent = Boolean(config.accessToken);
|
|
804
|
-
if (tokenPresent && !baseUrl) {
|
|
805
|
-
throw new WorkbenchUserError("Missing Workbench API URL. Set WORKBENCH_API_URL or run `workbench login --base-url URL`.");
|
|
806
|
-
}
|
|
807
847
|
let revoke = "skipped";
|
|
808
848
|
if (config.accessToken && baseUrl) {
|
|
809
849
|
try {
|
|
@@ -856,18 +896,6 @@ async function handleInstall(parsed, io) {
|
|
|
856
896
|
const snapshot = await fetchWorkbenchInstallSourceSnapshot(workbenchSource, source);
|
|
857
897
|
const sourceSummary = workbenchInstallSourceSummary(workbenchSource, snapshot);
|
|
858
898
|
const config = await loadConfig();
|
|
859
|
-
if (parsed.flags.list === true) {
|
|
860
|
-
return emitResult("workbench.cli.install.v1", {
|
|
861
|
-
source: sourceSummary,
|
|
862
|
-
skills: [snapshot.name],
|
|
863
|
-
fileCount: snapshot.files.length,
|
|
864
|
-
targets: installTargetsToJson(supportedInstallTargets()),
|
|
865
|
-
}, parsed, io, () => [
|
|
866
|
-
`${snapshot.name}\t${snapshot.versionId}\tfiles=${snapshot.files.length}`,
|
|
867
|
-
"Targets:",
|
|
868
|
-
...supportedInstallTargets().map((target) => ` ${target.agent}\t${target.destination}`),
|
|
869
|
-
].join("\n"));
|
|
870
|
-
}
|
|
871
899
|
const toTargets = stringsFlag(parsed, "to");
|
|
872
900
|
const selectedTargets = toTargets.length > 0 ? normalizeInstallTargetNames(toTargets) : await defaultInstallTargetNames(config);
|
|
873
901
|
const targets = resolveInstallTargets({
|
|
@@ -905,17 +933,17 @@ async function handleCloudEval(parsed, io) {
|
|
|
905
933
|
return emitEvalFailure(started.runs, failedRuns, artifactIds, parsed, io);
|
|
906
934
|
}
|
|
907
935
|
const deltas = await evalDeltas(started.core, started.runs);
|
|
908
|
-
const
|
|
936
|
+
const next = await evalSuccessNextCommand(started.core, started.runs);
|
|
909
937
|
return emitResult("workbench.cli.eval.v1", {
|
|
910
938
|
result: started.runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
|
|
911
939
|
deltas: deltas,
|
|
912
|
-
|
|
940
|
+
next: next,
|
|
913
941
|
cloud: cloudExecutionSummary(started),
|
|
914
942
|
}, parsed, io, () => [
|
|
915
943
|
`Completed hosted eval on ${started.remote.url}.`,
|
|
916
944
|
started.runs.map(formatRun).join("\n"),
|
|
917
945
|
...deltas.map(formatEvalDelta),
|
|
918
|
-
...(
|
|
946
|
+
...(next ? [`next: ${next}`] : []),
|
|
919
947
|
].filter(Boolean).join("\n"));
|
|
920
948
|
}
|
|
921
949
|
async function handleCloudImprove(parsed, io) {
|
|
@@ -934,17 +962,17 @@ async function handleCloudImprove(parsed, io) {
|
|
|
934
962
|
});
|
|
935
963
|
}
|
|
936
964
|
const switchedVersionId = await switchHostedImproveVersionIfPromoted(started);
|
|
937
|
-
const
|
|
965
|
+
const next = cloudImproveNextCommand(started.runs);
|
|
938
966
|
return emitResult("workbench.cli.improve.v1", {
|
|
939
967
|
result: started.runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
|
|
940
|
-
|
|
968
|
+
next: next,
|
|
941
969
|
cloud: cloudExecutionSummary(started),
|
|
942
970
|
...(switchedVersionId ? { switchedVersionId } : {}),
|
|
943
971
|
}, parsed, io, () => [
|
|
944
972
|
`Completed hosted improve on ${started.remote.url}.`,
|
|
945
973
|
started.runs.map(formatRun).join("\n"),
|
|
946
|
-
...(switchedVersionId ? [`Switched local source to ${switchedVersionId}.`] : []),
|
|
947
|
-
...(
|
|
974
|
+
...(switchedVersionId ? [`Switched local source to ${displayRef(switchedVersionId)}.`] : []),
|
|
975
|
+
...(next ? [`next: ${next}`] : []),
|
|
948
976
|
].filter(Boolean).join("\n"));
|
|
949
977
|
}
|
|
950
978
|
async function defaultInstallTargetNames(config) {
|
|
@@ -1197,21 +1225,18 @@ function cloudExecutionRequestBody(command, parsed) {
|
|
|
1197
1225
|
...(command === "improve" ? { budget: intFlag(parsed, "budget") } : {}),
|
|
1198
1226
|
};
|
|
1199
1227
|
}
|
|
1200
|
-
function
|
|
1201
|
-
return
|
|
1202
|
-
}
|
|
1203
|
-
function cloudImproveNextCommands(runs) {
|
|
1204
|
-
return cloudExecutionNextCommands(runs, "workbench eval");
|
|
1228
|
+
function cloudImproveNextCommand(runs) {
|
|
1229
|
+
return cloudExecutionNextCommand(runs, "workbench eval");
|
|
1205
1230
|
}
|
|
1206
|
-
function
|
|
1231
|
+
function cloudExecutionNextCommand(runs, successCommand) {
|
|
1207
1232
|
const first = runs[0];
|
|
1208
1233
|
if (!first) {
|
|
1209
|
-
return
|
|
1234
|
+
return "workbench log --runs";
|
|
1210
1235
|
}
|
|
1211
1236
|
if (first.status === "running" || first.status === "failed" || first.status === "canceled") {
|
|
1212
|
-
return
|
|
1237
|
+
return `workbench show ${displayRef(first.id)}`;
|
|
1213
1238
|
}
|
|
1214
|
-
return
|
|
1239
|
+
return successCommand;
|
|
1215
1240
|
}
|
|
1216
1241
|
function cloudExecutionSummary(started) {
|
|
1217
1242
|
return {
|
|
@@ -1294,12 +1319,13 @@ async function fetchWorkbenchInstallSourceSnapshot(source, displaySource) {
|
|
|
1294
1319
|
throw new WorkbenchCodedError("auth_required", token
|
|
1295
1320
|
? `Workbench Cloud rejected the provided token while installing ${displaySource}.`
|
|
1296
1321
|
: `Authentication is required to install ${displaySource}.`, {
|
|
1297
|
-
remediation:
|
|
1322
|
+
remediation: "Run workbench login.",
|
|
1298
1323
|
exitCode: 1,
|
|
1299
1324
|
});
|
|
1300
1325
|
}
|
|
1301
1326
|
if (!response.ok) {
|
|
1302
|
-
|
|
1327
|
+
const excerpt = readResponseError(text);
|
|
1328
|
+
throw new WorkbenchCodedError("install_failed", `Unable to download Workbench source ${displaySource}: ${response.status}${excerpt ? ` ${excerpt}` : response.statusText ? ` ${response.statusText}` : ""}`, {
|
|
1303
1329
|
subject: { source: displaySource, status: response.status },
|
|
1304
1330
|
exitCode: 1,
|
|
1305
1331
|
});
|
|
@@ -1420,18 +1446,15 @@ function deviceAuthPath() {
|
|
|
1420
1446
|
return process.env.WORKBENCH_DEVICE_AUTH?.trim() || path.join(path.dirname(configPath()), "device-auth.json");
|
|
1421
1447
|
}
|
|
1422
1448
|
function selectWorkbenchBaseUrl(input = {}) {
|
|
1423
|
-
|
|
1424
|
-
if (!baseUrl) {
|
|
1425
|
-
throw new WorkbenchUserError("Missing Workbench API URL. Pass --base-url URL, set WORKBENCH_API_URL, or run `workbench login --base-url URL`.");
|
|
1426
|
-
}
|
|
1427
|
-
return baseUrl;
|
|
1449
|
+
return optionalWorkbenchBaseUrl(input);
|
|
1428
1450
|
}
|
|
1429
1451
|
function optionalWorkbenchBaseUrl(input = {}) {
|
|
1430
1452
|
const value = input.explicitBaseUrl ??
|
|
1431
1453
|
input.originBaseUrl ??
|
|
1432
1454
|
process.env.WORKBENCH_API_URL ??
|
|
1433
|
-
input.configBaseUrl
|
|
1434
|
-
|
|
1455
|
+
input.configBaseUrl ??
|
|
1456
|
+
DEFAULT_WORKBENCH_CLOUD_BASE_URL;
|
|
1457
|
+
return normalizeBaseUrl(value);
|
|
1435
1458
|
}
|
|
1436
1459
|
function normalizeBaseUrl(value) {
|
|
1437
1460
|
return value.trim().replace(/\/+$/u, "");
|
|
@@ -1449,7 +1472,8 @@ async function requestDeviceAuthorization(baseUrl) {
|
|
|
1449
1472
|
});
|
|
1450
1473
|
}
|
|
1451
1474
|
if (!response.ok) {
|
|
1452
|
-
|
|
1475
|
+
const excerpt = readResponseError(text);
|
|
1476
|
+
throw new WorkbenchCodedError("login_denied", `Device login failed: ${response.status}${excerpt ? ` ${excerpt}` : response.statusText ? ` ${response.statusText}` : ""}`, {
|
|
1453
1477
|
exitCode: 1,
|
|
1454
1478
|
});
|
|
1455
1479
|
}
|
|
@@ -1500,7 +1524,7 @@ async function pollDeviceToken(baseUrl, authorization, timeoutSeconds) {
|
|
|
1500
1524
|
}
|
|
1501
1525
|
throw new WorkbenchCodedError("login_pending", "Device login is still waiting for browser authorization.", {
|
|
1502
1526
|
retryable: true,
|
|
1503
|
-
remediation: "Authorize the device in the browser, then run workbench login --wait
|
|
1527
|
+
remediation: "Authorize the device in the browser, then run workbench login --wait.",
|
|
1504
1528
|
subject: {
|
|
1505
1529
|
retryAfterSeconds: Math.max(1, Math.ceil(intervalMs / 1000)),
|
|
1506
1530
|
verificationUri: authorization.verification_uri,
|
|
@@ -1524,7 +1548,8 @@ async function fetchWorkbenchUsername(baseUrl, accessToken) {
|
|
|
1524
1548
|
}
|
|
1525
1549
|
async function readPendingDeviceAuthorization(baseUrl) {
|
|
1526
1550
|
const record = await readDeviceAuthorizationJson(deviceAuthPath());
|
|
1527
|
-
|
|
1551
|
+
const expectedBaseUrl = baseUrl ? normalizeBaseUrl(baseUrl) : undefined;
|
|
1552
|
+
if (!record || (expectedBaseUrl && record.baseUrl !== expectedBaseUrl) || Date.parse(record.expiresAt) <= Date.now()) {
|
|
1528
1553
|
return null;
|
|
1529
1554
|
}
|
|
1530
1555
|
return record;
|
|
@@ -1614,7 +1639,8 @@ async function apiRequest(apiPath, options = {}, baseUrlOverride) {
|
|
|
1614
1639
|
}
|
|
1615
1640
|
throw requestError;
|
|
1616
1641
|
}
|
|
1617
|
-
const
|
|
1642
|
+
const excerpt = readResponseError(text);
|
|
1643
|
+
const requestError = new WorkbenchApiRequestError(response.status, `Request failed with status ${response.status}${response.statusText ? ` ${response.statusText}` : ""}${excerpt ? `: ${excerpt}` : ""}.`, text);
|
|
1618
1644
|
lastError = requestError;
|
|
1619
1645
|
if (canRetry && attempt < API_REQUEST_MAX_ATTEMPTS && isTransientApiRequestError(requestError)) {
|
|
1620
1646
|
await sleep(250 * attempt);
|
|
@@ -1707,12 +1733,22 @@ function readResponseError(text) {
|
|
|
1707
1733
|
const parsed = JSON.parse(text);
|
|
1708
1734
|
const record = asRecord(parsed);
|
|
1709
1735
|
const error = record?.error ?? record?.message;
|
|
1710
|
-
return typeof error === "string" && error.trim() ? error : null;
|
|
1736
|
+
return typeof error === "string" && error.trim() ? oneLineExcerpt(error) : null;
|
|
1711
1737
|
}
|
|
1712
1738
|
catch {
|
|
1713
|
-
|
|
1739
|
+
if (/<(?:!doctype|html|head|body)\b/iu.test(text)) {
|
|
1740
|
+
return null;
|
|
1741
|
+
}
|
|
1742
|
+
return oneLineExcerpt(text);
|
|
1714
1743
|
}
|
|
1715
1744
|
}
|
|
1745
|
+
function oneLineExcerpt(text) {
|
|
1746
|
+
const line = text.replace(/\s+/gu, " ").trim();
|
|
1747
|
+
if (!line) {
|
|
1748
|
+
return null;
|
|
1749
|
+
}
|
|
1750
|
+
return line.length > 180 ? `${line.slice(0, 177)}...` : line;
|
|
1751
|
+
}
|
|
1716
1752
|
function parseWorkbenchCloudErrorBody(text) {
|
|
1717
1753
|
try {
|
|
1718
1754
|
const record = asRecord(JSON.parse(text));
|
|
@@ -2227,19 +2263,15 @@ function parsePublishVisibilityFlags(parsed) {
|
|
|
2227
2263
|
}
|
|
2228
2264
|
async function previewPublishWithDerivedRemote(parsed) {
|
|
2229
2265
|
const root = path.resolve(dirFlag(parsed) ?? process.cwd());
|
|
2230
|
-
const core = await coreOptions(parsed);
|
|
2231
|
-
await listWorkbenchVersions(core);
|
|
2232
2266
|
const reconciledSnapshot = await createWorkbenchReadOnlyInspectionSnapshot({ dir: root });
|
|
2233
2267
|
const link = cloudRemoteLinkTargetFromRemotes(reconciledSnapshot.remotes);
|
|
2234
|
-
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
const remote = await derivePublishCloudRemote(parsed, "workbench publish", link.name);
|
|
2268
|
+
const remote = stringFlag(parsed, "as") || !link.existing
|
|
2269
|
+
? await derivePublishCloudRemote(parsed, "workbench publish", link.name)
|
|
2270
|
+
: link.existing;
|
|
2238
2271
|
const requestedVersion = optionalPositional(parsed, 1);
|
|
2239
|
-
const
|
|
2240
|
-
? requestedVersion
|
|
2241
|
-
: reconciledSnapshot.status.currentVersionId ?? reconciledSnapshot.refs.current;
|
|
2242
|
-
const version = reconciledSnapshot.versions.find((entry) => entry.id === versionId);
|
|
2272
|
+
const version = requestedVersion && requestedVersion !== "current"
|
|
2273
|
+
? snapshotVersionByRef(reconciledSnapshot, requestedVersion)
|
|
2274
|
+
: snapshotVersionByRef(reconciledSnapshot, reconciledSnapshot.status.currentVersionId ?? reconciledSnapshot.refs.current ?? "");
|
|
2243
2275
|
if (!version) {
|
|
2244
2276
|
throw new WorkbenchCodedError("version_not_found", `Version not found: ${requestedVersion ?? "current"}`, {
|
|
2245
2277
|
remediation: "Run workbench log --versions.",
|
|
@@ -2390,7 +2422,7 @@ async function artifactIdsByRunId(core, runs) {
|
|
|
2390
2422
|
return byRun;
|
|
2391
2423
|
}
|
|
2392
2424
|
function emitEvalFailure(runs, failedRuns, artifactIds, parsed, io) {
|
|
2393
|
-
const
|
|
2425
|
+
const next = evalFailureNextCommand(failedRuns);
|
|
2394
2426
|
if (parsed.flags.json === true) {
|
|
2395
2427
|
io.stdout.write(`${JSON.stringify({
|
|
2396
2428
|
schema: "workbench.cli.eval.v1",
|
|
@@ -2401,14 +2433,14 @@ function emitEvalFailure(runs, failedRuns, artifactIds, parsed, io) {
|
|
|
2401
2433
|
evidenceSaved: true,
|
|
2402
2434
|
runs: runs.map((run) => runFailureSummary(run, artifactIds.get(run.id) ?? [])),
|
|
2403
2435
|
failedRuns: failedRuns.map((run) => runFailureSummary(run, artifactIds.get(run.id) ?? [])),
|
|
2404
|
-
|
|
2436
|
+
next,
|
|
2405
2437
|
}, null, 2)}\n`);
|
|
2406
2438
|
return 1;
|
|
2407
2439
|
}
|
|
2408
2440
|
io.stdout.write([
|
|
2409
2441
|
"Eval failed; evidence was saved.",
|
|
2410
2442
|
...failedRuns.map(formatRun),
|
|
2411
|
-
...(
|
|
2443
|
+
...(next ? [`next: ${next}`] : []),
|
|
2412
2444
|
].join("\n") + "\n");
|
|
2413
2445
|
return 1;
|
|
2414
2446
|
}
|
|
@@ -2441,17 +2473,12 @@ function runFailureSummary(run, artifactIds) {
|
|
|
2441
2473
|
artifactIds: [...artifactIds],
|
|
2442
2474
|
};
|
|
2443
2475
|
}
|
|
2444
|
-
function
|
|
2476
|
+
function evalFailureNextCommand(failedRuns) {
|
|
2445
2477
|
const first = failedRuns[0];
|
|
2446
2478
|
if (!first) {
|
|
2447
|
-
return
|
|
2479
|
+
return "workbench log --runs";
|
|
2448
2480
|
}
|
|
2449
|
-
return
|
|
2450
|
-
`workbench show ${first.id}`,
|
|
2451
|
-
`workbench show ${first.id}:stderr.log`,
|
|
2452
|
-
`workbench case add ${first.id}`,
|
|
2453
|
-
`workbench improve --agents ${first.agentName} --budget 1 -n 1`,
|
|
2454
|
-
];
|
|
2481
|
+
return `workbench show ${displayRef(first.id)}`;
|
|
2455
2482
|
}
|
|
2456
2483
|
function output(value, parsed, io, text) {
|
|
2457
2484
|
return emitResult(commandSchema(parsed), { result: value }, parsed, io, text);
|
|
@@ -2484,12 +2511,207 @@ async function workbenchCliAuthStatus() {
|
|
|
2484
2511
|
})),
|
|
2485
2512
|
};
|
|
2486
2513
|
}
|
|
2514
|
+
function statusWithCausalNext(status, auth) {
|
|
2515
|
+
const cloudAuthMissing = auth.workbenchCloud.status !== "authenticated";
|
|
2516
|
+
const needsCloudAuth = cloudAuthMissing && status.remotes.some((remote) => remote.kind === "workbench-cloud" &&
|
|
2517
|
+
(remote.sync.status !== "up_to_date" || remote.publication.status === "unpublished"));
|
|
2518
|
+
if (!needsCloudAuth) {
|
|
2519
|
+
return status;
|
|
2520
|
+
}
|
|
2521
|
+
return {
|
|
2522
|
+
...status,
|
|
2523
|
+
next: "workbench login",
|
|
2524
|
+
};
|
|
2525
|
+
}
|
|
2526
|
+
function displayRef(id) {
|
|
2527
|
+
const version = /^v_([0-9a-f]{8,})$/iu.exec(id);
|
|
2528
|
+
if (version?.[1]) {
|
|
2529
|
+
return version[1].slice(0, 8);
|
|
2530
|
+
}
|
|
2531
|
+
const separator = id.indexOf("_");
|
|
2532
|
+
if (separator > 0 && separator < id.length - 1) {
|
|
2533
|
+
const prefix = id.slice(0, separator);
|
|
2534
|
+
const suffix = id.slice(separator + 1);
|
|
2535
|
+
return `${prefix}_${suffix.slice(0, 8)}`;
|
|
2536
|
+
}
|
|
2537
|
+
return id.length > 8 ? id.slice(0, 8) : id;
|
|
2538
|
+
}
|
|
2539
|
+
function shortenCommandRefs(command) {
|
|
2540
|
+
return command.replace(/\b(?:v_[0-9a-f]{8,}|(?:run|job|trace|artifact)_[a-z0-9_-]+)/giu, (match) => displayRef(match));
|
|
2541
|
+
}
|
|
2542
|
+
function snapshotVersionByRef(snapshot, ref) {
|
|
2543
|
+
const requested = ref.trim();
|
|
2544
|
+
const normalized = requested === "current" ? snapshot.refs.current ?? "" : requested;
|
|
2545
|
+
if (!normalized) {
|
|
2546
|
+
return undefined;
|
|
2547
|
+
}
|
|
2548
|
+
const candidates = snapshot.versions.filter((version) => snapshotVersionRefMatches(version, normalized));
|
|
2549
|
+
if (candidates.length > 1) {
|
|
2550
|
+
throw new WorkbenchCodedError("ref_ambiguous", `Version ref is ambiguous: ${ref}. Candidates: ${candidates.map((version) => displayRef(version.id)).join(", ")}.`, {
|
|
2551
|
+
subject: { ref, candidates: candidates.map((version) => version.id) },
|
|
2552
|
+
exitCode: 2,
|
|
2553
|
+
});
|
|
2554
|
+
}
|
|
2555
|
+
return candidates[0];
|
|
2556
|
+
}
|
|
2557
|
+
function snapshotVersionRefMatches(version, ref) {
|
|
2558
|
+
const withoutVersionPrefix = ref.startsWith("v_") ? ref.slice(2) : ref;
|
|
2559
|
+
return version.id === ref ||
|
|
2560
|
+
version.hash === ref ||
|
|
2561
|
+
version.id.startsWith(ref) ||
|
|
2562
|
+
version.hash.startsWith(ref) ||
|
|
2563
|
+
version.hash.startsWith(withoutVersionPrefix) ||
|
|
2564
|
+
version.id.startsWith(`v_${withoutVersionPrefix}`);
|
|
2565
|
+
}
|
|
2566
|
+
function snapshotObjectByRef(entries, ref, kind) {
|
|
2567
|
+
const normalized = ref.trim();
|
|
2568
|
+
if (!normalized) {
|
|
2569
|
+
return undefined;
|
|
2570
|
+
}
|
|
2571
|
+
const candidates = entries.filter((entry) => objectRefMatches(entry.id, normalized));
|
|
2572
|
+
if (candidates.length > 1) {
|
|
2573
|
+
throw new WorkbenchCodedError("ref_ambiguous", `${capitalize(kind)} ref is ambiguous: ${ref}. Candidates: ${candidates.map((entry) => displayRef(entry.id)).slice(0, 8).join(", ")}.`, {
|
|
2574
|
+
subject: { ref, candidates: candidates.map((entry) => entry.id).slice(0, 20) },
|
|
2575
|
+
exitCode: 2,
|
|
2576
|
+
});
|
|
2577
|
+
}
|
|
2578
|
+
return candidates[0];
|
|
2579
|
+
}
|
|
2580
|
+
function objectRefMatches(id, ref) {
|
|
2581
|
+
if (id === ref || id.startsWith(ref)) {
|
|
2582
|
+
return true;
|
|
2583
|
+
}
|
|
2584
|
+
const separator = id.indexOf("_");
|
|
2585
|
+
return separator > 0 && id.slice(separator + 1).startsWith(ref);
|
|
2586
|
+
}
|
|
2587
|
+
function capitalize(value) {
|
|
2588
|
+
return value.length > 0 ? `${value[0].toUpperCase()}${value.slice(1)}` : value;
|
|
2589
|
+
}
|
|
2590
|
+
function runOrJobEvidenceSelection(snapshot, ref) {
|
|
2591
|
+
const run = snapshotObjectByRef(snapshot.runs, ref, "run");
|
|
2592
|
+
const job = snapshotObjectByRef(snapshot.jobs, ref, "job");
|
|
2593
|
+
if (run && job) {
|
|
2594
|
+
throw new WorkbenchCodedError("ref_ambiguous", `Run/job ref is ambiguous: ${ref}. Candidates: ${displayRef(run.id)}, ${displayRef(job.id)}.`, {
|
|
2595
|
+
subject: { ref, candidates: [run.id, job.id] },
|
|
2596
|
+
exitCode: 2,
|
|
2597
|
+
});
|
|
2598
|
+
}
|
|
2599
|
+
if (run) {
|
|
2600
|
+
return {
|
|
2601
|
+
run,
|
|
2602
|
+
jobs: snapshot.jobs.filter((entry) => entry.runId === run.id),
|
|
2603
|
+
};
|
|
2604
|
+
}
|
|
2605
|
+
return job ? { jobs: [job] } : { jobs: [] };
|
|
2606
|
+
}
|
|
2607
|
+
function evidenceFilesForRunOrJob(snapshot, ref) {
|
|
2608
|
+
const selection = runOrJobEvidenceSelection(snapshot, ref);
|
|
2609
|
+
if (!selection.run && selection.jobs.length === 0) {
|
|
2610
|
+
return [];
|
|
2611
|
+
}
|
|
2612
|
+
const traceById = new Map(snapshot.traces.map((trace) => [trace.id, trace]));
|
|
2613
|
+
const artifactById = new Map(snapshot.artifacts.map((artifact) => [artifact.id, artifact]));
|
|
2614
|
+
const files = selection.jobs.flatMap((job) => [
|
|
2615
|
+
...job.traceIds.flatMap((traceId) => {
|
|
2616
|
+
const trace = traceById.get(traceId);
|
|
2617
|
+
return trace
|
|
2618
|
+
? trace.files.map((file) => evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/traces/${evidencePathSegment(trace.id)}/${file.path}`))
|
|
2619
|
+
: [];
|
|
2620
|
+
}),
|
|
2621
|
+
...job.artifactIds.flatMap((artifactId) => {
|
|
2622
|
+
const artifact = artifactById.get(artifactId);
|
|
2623
|
+
return artifact
|
|
2624
|
+
? artifact.files.map((file) => evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/artifacts/${evidencePathSegment(artifact.id)}/${file.path}`))
|
|
2625
|
+
: [];
|
|
2626
|
+
}),
|
|
2627
|
+
]);
|
|
2628
|
+
const seen = new Set();
|
|
2629
|
+
return files.filter((file) => {
|
|
2630
|
+
if (seen.has(file.path)) {
|
|
2631
|
+
return false;
|
|
2632
|
+
}
|
|
2633
|
+
seen.add(file.path);
|
|
2634
|
+
return true;
|
|
2635
|
+
});
|
|
2636
|
+
}
|
|
2637
|
+
function evidenceFileWithPath(file, filePath) {
|
|
2638
|
+
return {
|
|
2639
|
+
...file,
|
|
2640
|
+
path: filePath.replace(/\\/gu, "/").replace(/^\/+/u, ""),
|
|
2641
|
+
};
|
|
2642
|
+
}
|
|
2643
|
+
function evidencePathSegment(value) {
|
|
2644
|
+
return value.replace(/[^A-Za-z0-9._-]+/gu, "-") || "_";
|
|
2645
|
+
}
|
|
2646
|
+
function formatRunOrJobEvidence(details, files) {
|
|
2647
|
+
const detailLines = details.map(formatTraceDetail).filter(Boolean);
|
|
2648
|
+
const fileLines = files.length > 0 ? ["Files:", ...files.map((file) => file.path)] : [];
|
|
2649
|
+
return [...detailLines, ...fileLines].join("\n") || "No evidence.";
|
|
2650
|
+
}
|
|
2651
|
+
function manifestOnly(value) {
|
|
2652
|
+
if (value === null || typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
|
2653
|
+
return value;
|
|
2654
|
+
}
|
|
2655
|
+
if (Array.isArray(value)) {
|
|
2656
|
+
return value.map(manifestOnly);
|
|
2657
|
+
}
|
|
2658
|
+
if (!value || typeof value !== "object") {
|
|
2659
|
+
return null;
|
|
2660
|
+
}
|
|
2661
|
+
const record = value;
|
|
2662
|
+
if (typeof record.path === "string" && typeof record.content === "string") {
|
|
2663
|
+
return fileSummary(record);
|
|
2664
|
+
}
|
|
2665
|
+
const out = {};
|
|
2666
|
+
for (const [key, child] of Object.entries(record)) {
|
|
2667
|
+
if (child === undefined) {
|
|
2668
|
+
continue;
|
|
2669
|
+
}
|
|
2670
|
+
out[key] = manifestOnly(child);
|
|
2671
|
+
}
|
|
2672
|
+
return out;
|
|
2673
|
+
}
|
|
2674
|
+
async function resolveLocalImproverAgent(parsed, core) {
|
|
2675
|
+
if (stringFlag(parsed, "agents")) {
|
|
2676
|
+
return undefined;
|
|
2677
|
+
}
|
|
2678
|
+
const agents = await listWorkbenchAgents(core).catch(() => []);
|
|
2679
|
+
const status = await workbenchStatusSnapshot(core).catch(() => undefined);
|
|
2680
|
+
const defaultAgentName = status?.project.defaultAgent ?? agents[0]?.name;
|
|
2681
|
+
const defaultAgent = agents.find((agent) => agent.name === defaultAgentName);
|
|
2682
|
+
if (defaultAgent && workbenchSkillImproveCanUseQueuedAdapter(defaultAgent)) {
|
|
2683
|
+
return undefined;
|
|
2684
|
+
}
|
|
2685
|
+
const connected = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).listStatus().catch(() => []);
|
|
2686
|
+
const candidates = connected
|
|
2687
|
+
.filter((entry) => entry.status === "connected" &&
|
|
2688
|
+
(entry.adapterId === "claude" || entry.adapterId === "codex"))
|
|
2689
|
+
.sort((left, right) => {
|
|
2690
|
+
const adapterRank = (adapter) => adapter === "claude" ? 0 : adapter === "codex" ? 1 : 2;
|
|
2691
|
+
return adapterRank(left.adapterId) - adapterRank(right.adapterId) ||
|
|
2692
|
+
(Date.parse(right.updatedAt ?? "") || 0) - (Date.parse(left.updatedAt ?? "") || 0);
|
|
2693
|
+
});
|
|
2694
|
+
const selected = candidates[0];
|
|
2695
|
+
if (!selected) {
|
|
2696
|
+
throw new WorkbenchCodedError("auth_required", "workbench improve needs a connected improver.", {
|
|
2697
|
+
remediation: "Run workbench login claude (or codex) to connect an improver.",
|
|
2698
|
+
exitCode: 1,
|
|
2699
|
+
});
|
|
2700
|
+
}
|
|
2701
|
+
return {
|
|
2702
|
+
name: selected.adapterId,
|
|
2703
|
+
adapter: selected.adapterId,
|
|
2704
|
+
config: {
|
|
2705
|
+
auth: selected.slot ? { [selected.slot]: selected.profile } : selected.profile,
|
|
2706
|
+
},
|
|
2707
|
+
};
|
|
2708
|
+
}
|
|
2487
2709
|
function formatLogEntry(entry) {
|
|
2488
2710
|
if (entry.kind === "version") {
|
|
2489
|
-
return `${entry.createdAt}\tversion\t${entry.id}\tfiles=${entry.fileCount}\t${entry.message}`;
|
|
2711
|
+
return `${entry.createdAt}\tversion\t${displayRef(entry.id)}\tfiles=${entry.fileCount}\t${entry.message}`;
|
|
2490
2712
|
}
|
|
2491
2713
|
const score = entry.score === undefined ? "n/a" : entry.score.toFixed(3);
|
|
2492
|
-
return `${entry.createdAt}\trun\t${entry.id}\t${entry.status}\tversion=${entry.versionId}\tskill=${entry.skillName}\tagent=${entry.agentName}\tscore=${score}`;
|
|
2714
|
+
return `${entry.createdAt}\trun\t${displayRef(entry.id)}\t${entry.status}\tversion=${displayRef(entry.versionId)}\tskill=${entry.skillName}\tagent=${entry.agentName}\tscore=${score}`;
|
|
2493
2715
|
}
|
|
2494
2716
|
function splitShowRef(ref) {
|
|
2495
2717
|
const index = ref.indexOf(":");
|
|
@@ -2500,18 +2722,14 @@ function splitShowRef(ref) {
|
|
|
2500
2722
|
}
|
|
2501
2723
|
async function fileForRunOrJobRef(core, objectRef, requestedPath) {
|
|
2502
2724
|
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2503
|
-
const
|
|
2504
|
-
|
|
2505
|
-
if (!run && !job) {
|
|
2725
|
+
const selection = runOrJobEvidenceSelection(snapshot, objectRef);
|
|
2726
|
+
if (!selection.run && selection.jobs.length === 0) {
|
|
2506
2727
|
return null;
|
|
2507
2728
|
}
|
|
2508
|
-
const
|
|
2509
|
-
const
|
|
2510
|
-
|
|
2511
|
-
|
|
2512
|
-
if (file) {
|
|
2513
|
-
return file;
|
|
2514
|
-
}
|
|
2729
|
+
const files = evidenceFilesForRunOrJob(snapshot, objectRef);
|
|
2730
|
+
const file = findShowFile(files, requestedPath, objectRef);
|
|
2731
|
+
if (file) {
|
|
2732
|
+
return file;
|
|
2515
2733
|
}
|
|
2516
2734
|
throw new WorkbenchCodedError("ref_not_found", `File not found in ${objectRef}: ${requestedPath}`, {
|
|
2517
2735
|
remediation: `Run workbench show ${objectRef}.`,
|
|
@@ -2520,12 +2738,8 @@ async function fileForRunOrJobRef(core, objectRef, requestedPath) {
|
|
|
2520
2738
|
});
|
|
2521
2739
|
}
|
|
2522
2740
|
function evidenceDetailsForRunOrJob(snapshot, ref) {
|
|
2523
|
-
const
|
|
2524
|
-
|
|
2525
|
-
const jobs = run
|
|
2526
|
-
? snapshot.jobs.filter((entry) => entry.runId === run.id)
|
|
2527
|
-
: job ? [job] : [];
|
|
2528
|
-
return jobs.flatMap((entry) => {
|
|
2741
|
+
const selection = runOrJobEvidenceSelection(snapshot, ref);
|
|
2742
|
+
return selection.jobs.flatMap((entry) => {
|
|
2529
2743
|
const detail = workbenchJobEvidenceForSnapshot(snapshot, {
|
|
2530
2744
|
runId: entry.runId,
|
|
2531
2745
|
jobId: entry.id,
|
|
@@ -2536,12 +2750,58 @@ function evidenceDetailsForRunOrJob(snapshot, ref) {
|
|
|
2536
2750
|
execution.trace.events.length > 0 ||
|
|
2537
2751
|
execution.trace.summaries.length > 0));
|
|
2538
2752
|
}
|
|
2539
|
-
function findShowFile(files, requestedPath) {
|
|
2753
|
+
function findShowFile(files, requestedPath, objectRef) {
|
|
2540
2754
|
const normalized = requestedPath.replace(/\\/gu, "/");
|
|
2541
|
-
|
|
2542
|
-
|
|
2543
|
-
|
|
2544
|
-
|
|
2755
|
+
const exact = files.filter((file) => file.path === normalized);
|
|
2756
|
+
if (exact.length === 1) {
|
|
2757
|
+
return exact[0];
|
|
2758
|
+
}
|
|
2759
|
+
const exactEquivalent = singleEquivalentShowFile(exact);
|
|
2760
|
+
if (exactEquivalent) {
|
|
2761
|
+
return exactEquivalent;
|
|
2762
|
+
}
|
|
2763
|
+
if (exact.length > 1) {
|
|
2764
|
+
throw ambiguousShowPath(objectRef, requestedPath, exact);
|
|
2765
|
+
}
|
|
2766
|
+
const suffixCandidates = files.filter((file) => file.path.endsWith(`/${normalized}`) || path.basename(file.path) === normalized);
|
|
2767
|
+
if (suffixCandidates.length === 0) {
|
|
2768
|
+
return null;
|
|
2769
|
+
}
|
|
2770
|
+
const candidates = normalized === "stderr.log"
|
|
2771
|
+
? suffixCandidates.filter((file) => file.content.length > 0)
|
|
2772
|
+
: suffixCandidates;
|
|
2773
|
+
if (candidates.length === 1) {
|
|
2774
|
+
return candidates[0];
|
|
2775
|
+
}
|
|
2776
|
+
const equivalentCandidate = singleEquivalentShowFile(candidates);
|
|
2777
|
+
if (equivalentCandidate) {
|
|
2778
|
+
return equivalentCandidate;
|
|
2779
|
+
}
|
|
2780
|
+
if (candidates.length === 0 && suffixCandidates.length === 1) {
|
|
2781
|
+
return suffixCandidates[0];
|
|
2782
|
+
}
|
|
2783
|
+
const equivalentSuffixCandidate = singleEquivalentShowFile(suffixCandidates);
|
|
2784
|
+
if (equivalentSuffixCandidate) {
|
|
2785
|
+
return equivalentSuffixCandidate;
|
|
2786
|
+
}
|
|
2787
|
+
throw ambiguousShowPath(objectRef, requestedPath, candidates.length > 0 ? candidates : suffixCandidates);
|
|
2788
|
+
}
|
|
2789
|
+
function singleEquivalentShowFile(files) {
|
|
2790
|
+
if (files.length <= 1) {
|
|
2791
|
+
return null;
|
|
2792
|
+
}
|
|
2793
|
+
const first = files[0];
|
|
2794
|
+
return files.every((file) => file.kind === first.kind && file.encoding === first.encoding && file.content === first.content)
|
|
2795
|
+
? first
|
|
2796
|
+
: null;
|
|
2797
|
+
}
|
|
2798
|
+
function ambiguousShowPath(objectRef, requestedPath, candidates) {
|
|
2799
|
+
const candidatePaths = candidates.map((file) => file.path);
|
|
2800
|
+
return new WorkbenchCodedError("ref_ambiguous", `File path is ambiguous in ${objectRef}: ${requestedPath}. Candidates: ${candidatePaths.join(", ")}.`, {
|
|
2801
|
+
remediation: `Run workbench show ${objectRef}.`,
|
|
2802
|
+
subject: { ref: objectRef, path: requestedPath, candidates: candidatePaths },
|
|
2803
|
+
exitCode: 2,
|
|
2804
|
+
});
|
|
2545
2805
|
}
|
|
2546
2806
|
function fileListing(kind, id, files) {
|
|
2547
2807
|
return {
|
|
@@ -2552,17 +2812,16 @@ function fileListing(kind, id, files) {
|
|
|
2552
2812
|
};
|
|
2553
2813
|
}
|
|
2554
2814
|
function formatFileListing(kind, id, files) {
|
|
2555
|
-
return [`${kind}\t${id}\tfiles=${files.length}`, ...files.map((file) => file.path)].join("\n");
|
|
2815
|
+
return [`${kind}\t${displayRef(id)}\tfiles=${files.length}`, ...files.map((file) => file.path)].join("\n");
|
|
2556
2816
|
}
|
|
2557
2817
|
async function traceIdForCaseSource(core, ref) {
|
|
2558
2818
|
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2559
|
-
const trace = snapshot.traces
|
|
2819
|
+
const trace = snapshotObjectByRef(snapshot.traces, ref, "trace");
|
|
2560
2820
|
if (trace) {
|
|
2561
2821
|
return trace.id;
|
|
2562
2822
|
}
|
|
2563
|
-
const
|
|
2564
|
-
const
|
|
2565
|
-
const traceId = run?.traceIds[0] ?? job?.traceIds[0];
|
|
2823
|
+
const selection = runOrJobEvidenceSelection(snapshot, ref);
|
|
2824
|
+
const traceId = selection.run?.traceIds[0] ?? selection.jobs[0]?.traceIds[0];
|
|
2566
2825
|
if (traceId) {
|
|
2567
2826
|
return traceId;
|
|
2568
2827
|
}
|
|
@@ -2594,21 +2853,35 @@ async function evalDeltas(core, runs) {
|
|
|
2594
2853
|
});
|
|
2595
2854
|
}
|
|
2596
2855
|
function formatEvalDelta(delta) {
|
|
2597
|
-
|
|
2856
|
+
if (delta.score === undefined) {
|
|
2857
|
+
return "";
|
|
2858
|
+
}
|
|
2859
|
+
const score = delta.score.toFixed(3);
|
|
2598
2860
|
if (delta.previousScore === undefined || delta.delta === undefined) {
|
|
2599
|
-
return `${delta.skillName} ${delta.versionId} ${score}
|
|
2861
|
+
return `${delta.skillName} ${displayRef(delta.versionId)} ${score}`;
|
|
2600
2862
|
}
|
|
2601
2863
|
const sign = delta.delta >= 0 ? "+" : "";
|
|
2602
|
-
return `${delta.skillName} ${delta.versionId} ${score} (was ${delta.previousScore.toFixed(3)}, ${sign}${delta.delta.toFixed(3)})`;
|
|
2864
|
+
return `${delta.skillName} ${displayRef(delta.versionId)} ${score} (was ${delta.previousScore.toFixed(3)}, ${sign}${delta.delta.toFixed(3)})`;
|
|
2603
2865
|
}
|
|
2604
|
-
function
|
|
2605
|
-
|
|
2866
|
+
async function evalSuccessNextCommand(core, runs) {
|
|
2867
|
+
if (runs.length === 0) {
|
|
2868
|
+
return "workbench eval";
|
|
2869
|
+
}
|
|
2870
|
+
if (!runs.some((run) => typeof run.score === "number")) {
|
|
2871
|
+
return "edit .workbench/cases, then run workbench eval";
|
|
2872
|
+
}
|
|
2873
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2874
|
+
const currentVersion = snapshotVersionByRef(snapshot, snapshot.status.currentVersionId ?? snapshot.refs.current ?? "");
|
|
2875
|
+
const caseFiles = currentVersion?.files.filter((file) => file.kind === "text" &&
|
|
2876
|
+
/^\.workbench\/cases\/[^/]+\/case\.ya?ml$/u.test(file.path)) ?? [];
|
|
2877
|
+
const hasWorkflowCase = caseFiles.some((file) => file.kind === "text" && !/\n\s*smoke:\s*true(?:\s|$)/u.test(`\n${file.content}`));
|
|
2878
|
+
return hasWorkflowCase ? "workbench publish" : "edit .workbench/cases, then run workbench eval";
|
|
2606
2879
|
}
|
|
2607
2880
|
function formatStatusSnapshot(status) {
|
|
2608
2881
|
const lines = [
|
|
2609
2882
|
`Root: ${status.project.root}`,
|
|
2610
2883
|
`Initialized: ${status.project.initialized ? "yes" : "no"}`,
|
|
2611
|
-
...(status.project.currentVersionId ? [`Current version: ${status.project.currentVersionId}`] : []),
|
|
2884
|
+
...(status.project.currentVersionId ? [`Current version: ${displayRef(status.project.currentVersionId)}`] : []),
|
|
2612
2885
|
...(status.project.defaultSkill ? [`Default skill: ${status.project.defaultSkill}`] : []),
|
|
2613
2886
|
...(status.project.defaultAgent ? [`Default agent: ${status.project.defaultAgent}`] : []),
|
|
2614
2887
|
`Runs: ${status.runs.total}${status.runs.lastStatus ? ` (last ${status.runs.lastStatus})` : ""}`,
|
|
@@ -2618,7 +2891,7 @@ function formatStatusSnapshot(status) {
|
|
|
2618
2891
|
? [
|
|
2619
2892
|
"publication=published",
|
|
2620
2893
|
remote.publication.visibility ? `visibility=${remote.publication.visibility}` : undefined,
|
|
2621
|
-
remote.publication.versionId ? `version=${remote.publication.versionId}` : undefined,
|
|
2894
|
+
remote.publication.versionId ? `version=${displayRef(remote.publication.versionId)}` : undefined,
|
|
2622
2895
|
remote.publication.installUrl ? `install=${remote.publication.installUrl}` : undefined,
|
|
2623
2896
|
remote.publication.pinnedInstallUrl ? `pinned=${remote.publication.pinnedInstallUrl}` : undefined,
|
|
2624
2897
|
].filter(Boolean).join("\t")
|
|
@@ -2629,17 +2902,16 @@ function formatStatusSnapshot(status) {
|
|
|
2629
2902
|
? [
|
|
2630
2903
|
` error[${remote.sync.lastError.code}]: ${remote.sync.lastError.message}`,
|
|
2631
2904
|
...(remote.sync.lastAttemptAt ? [` last attempt: ${remote.sync.lastAttemptAt}`] : []),
|
|
2632
|
-
...(remote.sync.nextCommand ? [` next: ${remote.sync.nextCommand}`] : []),
|
|
2633
2905
|
]
|
|
2634
2906
|
: []),
|
|
2635
2907
|
];
|
|
2636
2908
|
})] : ["Remotes: none"]),
|
|
2637
|
-
...(status.next
|
|
2909
|
+
...(status.next ? [`next: ${shortenCommandRefs(status.next)}`] : []),
|
|
2638
2910
|
];
|
|
2639
2911
|
return lines.join("\n");
|
|
2640
2912
|
}
|
|
2641
2913
|
function formatVersion(version) {
|
|
2642
|
-
return `${version.id}\t${version.hash.slice(0, 12)}\t${version.message}`;
|
|
2914
|
+
return `${displayRef(version.id)}\t${version.hash.slice(0, 12)}\t${version.message}`;
|
|
2643
2915
|
}
|
|
2644
2916
|
function versionSummary(version) {
|
|
2645
2917
|
return {
|
|
@@ -2657,11 +2929,11 @@ function formatAgent(agent) {
|
|
|
2657
2929
|
function formatRun(run) {
|
|
2658
2930
|
const score = run.score === undefined ? "n/a" : run.score.toFixed(3);
|
|
2659
2931
|
const latency = run.latencyMs === undefined ? "n/a" : `${run.latencyMs}ms`;
|
|
2660
|
-
return `${run.id}\t${run.kind}\t${run.status}\tversion=${run.versionId}\tskill=${run.skillName}\tagent=${run.agentName}\tscore=${score}\tlatency=${latency}`;
|
|
2932
|
+
return `${displayRef(run.id)}\t${run.kind}\t${run.status}\tversion=${displayRef(run.versionId)}\tskill=${run.skillName}\tagent=${run.agentName}\tscore=${score}\tlatency=${latency}`;
|
|
2661
2933
|
}
|
|
2662
2934
|
function formatImproveResult(result) {
|
|
2663
2935
|
return [
|
|
2664
|
-
`Improved ${result.version.parentIds[0]
|
|
2936
|
+
`Improved ${result.version.parentIds[0] ? displayRef(result.version.parentIds[0]) : "current"} -> ${displayRef(result.version.id)}. ${formatRun(result.run)}`,
|
|
2665
2937
|
result.switched
|
|
2666
2938
|
? "Switched to improved version."
|
|
2667
2939
|
: `Did not switch: ${result.promotionReason}`,
|
|
@@ -2670,26 +2942,26 @@ function formatImproveResult(result) {
|
|
|
2670
2942
|
function formatJob(job) {
|
|
2671
2943
|
const score = job.score === undefined ? "n/a" : job.score.toFixed(3);
|
|
2672
2944
|
const duration = job.durationMs === undefined ? "n/a" : `${job.durationMs}ms`;
|
|
2673
|
-
return `${job.id}\trun=${job.runId}\tcase=${job.caseId}\tsample=${job.sample}\t${job.status}\tscore=${score}\tduration=${duration}`;
|
|
2945
|
+
return `${displayRef(job.id)}\trun=${displayRef(job.runId)}\tcase=${job.caseId}\tsample=${job.sample}\t${job.status}\tscore=${score}\tduration=${duration}`;
|
|
2674
2946
|
}
|
|
2675
2947
|
function formatComparison(comparison) {
|
|
2676
2948
|
const lines = ["version\tskill\tagent\tstatus\tscore\tcost\tlatency\trun"];
|
|
2677
2949
|
for (const cell of comparison.cells) {
|
|
2678
2950
|
lines.push([
|
|
2679
|
-
cell.versionId,
|
|
2951
|
+
displayRef(cell.versionId),
|
|
2680
2952
|
cell.skillName,
|
|
2681
2953
|
`${cell.agentName}@${shortObjectId(cell.agentHash)}`,
|
|
2682
2954
|
cell.status ?? "not-run",
|
|
2683
2955
|
cell.score === undefined ? "n/a" : cell.score.toFixed(3),
|
|
2684
2956
|
cell.costUsd === undefined ? "n/a" : `$${cell.costUsd.toFixed(4)}`,
|
|
2685
2957
|
cell.latencyMs === undefined ? "n/a" : `${cell.latencyMs}ms`,
|
|
2686
|
-
cell.runId
|
|
2958
|
+
cell.runId ? displayRef(cell.runId) : "n/a",
|
|
2687
2959
|
].join("\t"));
|
|
2688
2960
|
}
|
|
2689
2961
|
return lines.join("\n");
|
|
2690
2962
|
}
|
|
2691
2963
|
function shortObjectId(id) {
|
|
2692
|
-
return id.length >
|
|
2964
|
+
return id.length > 8 ? id.slice(0, 8) : id;
|
|
2693
2965
|
}
|
|
2694
2966
|
function formatTrace(trace) {
|
|
2695
2967
|
const result = asRecord(trace.result);
|
|
@@ -2698,7 +2970,7 @@ function formatTrace(trace) {
|
|
|
2698
2970
|
const error = typeof result?.error === "string" ? result.error.split(/\r?\n/u)[0] : undefined;
|
|
2699
2971
|
const files = trace.files.slice(0, 5).map((file) => file.path).join(",");
|
|
2700
2972
|
return [
|
|
2701
|
-
`${trace.id}\trun=${trace.runId}\tjob=${trace.jobId
|
|
2973
|
+
`${displayRef(trace.id)}\trun=${displayRef(trace.runId)}\tjob=${trace.jobId ? displayRef(trace.jobId) : "n/a"}\tversion=${displayRef(trace.versionId)}\tskill=${trace.skillName}\tagent=${trace.agentName}`,
|
|
2702
2974
|
status ? `status=${status}` : undefined,
|
|
2703
2975
|
score ? `score=${score}` : undefined,
|
|
2704
2976
|
error ? `error=${error}` : undefined,
|
|
@@ -2726,7 +2998,7 @@ function formatTraceDetail(detail) {
|
|
|
2726
2998
|
return detail.executions.map((execution) => {
|
|
2727
2999
|
const sessionLabels = execution.sessions.map((session) => session.label).join(",");
|
|
2728
3000
|
return [
|
|
2729
|
-
`${execution.id}\trun=${detail.runId}\tjobs=${execution.jobIds.join(",")}\tstatus=${execution.status}`,
|
|
3001
|
+
`${execution.id}\trun=${displayRef(detail.runId)}\tjobs=${execution.jobIds.map(displayRef).join(",")}\tstatus=${execution.status}`,
|
|
2730
3002
|
`events=${execution.trace.events.length}`,
|
|
2731
3003
|
`spans=${execution.trace.spans.length}`,
|
|
2732
3004
|
`summaries=${execution.trace.summaries.length}`,
|
|
@@ -2735,7 +3007,7 @@ function formatTraceDetail(detail) {
|
|
|
2735
3007
|
}).join("\n");
|
|
2736
3008
|
}
|
|
2737
3009
|
function formatArtifact(artifact) {
|
|
2738
|
-
return `${artifact.id}\trun=${artifact.runId}\tjob=${artifact.jobId}\t${artifact.kind}\tfiles=${artifact.files.length}`;
|
|
3010
|
+
return `${displayRef(artifact.id)}\trun=${displayRef(artifact.runId)}\tjob=${displayRef(artifact.jobId)}\t${artifact.kind}\tfiles=${artifact.files.length}`;
|
|
2739
3011
|
}
|
|
2740
3012
|
function artifactSummary(artifact) {
|
|
2741
3013
|
return {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@workbench-ai/workbench",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.71",
|
|
4
4
|
"repository": {
|
|
5
5
|
"type": "git",
|
|
6
6
|
"url": "git+https://github.com/workbench-ai/workbench.git",
|
|
@@ -21,10 +21,10 @@
|
|
|
21
21
|
],
|
|
22
22
|
"dependencies": {
|
|
23
23
|
"yaml": "^2.8.2",
|
|
24
|
-
"@workbench-ai/workbench-
|
|
25
|
-
"@workbench-ai/workbench-
|
|
26
|
-
"@workbench-ai/workbench-
|
|
27
|
-
"@workbench-ai/workbench-
|
|
24
|
+
"@workbench-ai/workbench-built-in-adapters": "0.0.71",
|
|
25
|
+
"@workbench-ai/workbench-core": "0.0.71",
|
|
26
|
+
"@workbench-ai/workbench-contract": "0.0.71",
|
|
27
|
+
"@workbench-ai/workbench-protocol": "0.0.71"
|
|
28
28
|
},
|
|
29
29
|
"devDependencies": {
|
|
30
30
|
"@tailwindcss/postcss": "^4.2.2",
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
"react-dom": "^19.2.0",
|
|
36
36
|
"typescript": "^5.9.2",
|
|
37
37
|
"vitest": "^3.2.4",
|
|
38
|
-
"@workbench-ai/workbench-ui": "0.0.
|
|
38
|
+
"@workbench-ai/workbench-ui": "0.0.71"
|
|
39
39
|
},
|
|
40
40
|
"scripts": {
|
|
41
41
|
"build": "rm -rf dist && tsc -p tsconfig.json && chmod 755 dist/workbench.js && node ./scripts/build-dev-open-assets.mjs",
|