@workbench-ai/workbench 0.0.69 → 0.0.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1024 -568
- package/dist/install-targets.js +2 -2
- package/package.json +5 -4
package/dist/index.js
CHANGED
|
@@ -4,156 +4,125 @@ import { createRequire } from "node:module";
|
|
|
4
4
|
import os from "node:os";
|
|
5
5
|
import path from "node:path";
|
|
6
6
|
import { gzipSync } from "node:zlib";
|
|
7
|
-
import { addWorkbenchCase, addWorkbenchRemote, addWorkbenchAgent,
|
|
7
|
+
import { addWorkbenchCase, addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchCases, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, publishWorkbenchVersion, removeWorkbenchCase, removeWorkbenchAgent, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
|
|
8
|
+
import { normalizeWorkbenchSkillName } from "@workbench-ai/workbench-contract";
|
|
8
9
|
import { emitError, emitResult } from "./output.js";
|
|
9
10
|
import { installSnapshotToTargets, installTargetsToJson, normalizeInstallSnapshotPath, resolveInstallTargets, supportedInstallTargets, } from "./install-targets.js";
|
|
10
11
|
import { startWorkbenchOpenServer } from "./open-server.js";
|
|
11
12
|
const require = createRequire(import.meta.url);
|
|
12
13
|
const HELP = [
|
|
13
14
|
"Usage:",
|
|
15
|
+
" workbench [--json]",
|
|
14
16
|
" workbench <command> [options]",
|
|
15
17
|
"",
|
|
16
|
-
"
|
|
17
|
-
"
|
|
18
|
-
"
|
|
19
|
-
" workbench
|
|
18
|
+
"Bare workbench prints project status and the next useful command.",
|
|
19
|
+
"",
|
|
20
|
+
"Taught commands:",
|
|
21
|
+
" workbench new [DIR] [--json]",
|
|
22
|
+
" workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
23
|
+
" workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
24
|
+
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
25
|
+
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
|
|
26
|
+
" workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--list] [--dry-run] [--json]",
|
|
27
|
+
"",
|
|
28
|
+
"More:",
|
|
29
|
+
" workbench help --all",
|
|
30
|
+
].join("\n");
|
|
31
|
+
const HELP_ALL = [
|
|
32
|
+
"Usage:",
|
|
33
|
+
" workbench # = workbench status",
|
|
34
|
+
" workbench new [DIR] [--json]",
|
|
35
|
+
" workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
20
36
|
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
21
|
-
" workbench improve [VERSION] [--
|
|
37
|
+
" workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
38
|
+
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
|
|
39
|
+
" workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--list] [--dry-run] [--json]",
|
|
22
40
|
"",
|
|
23
41
|
"Inspect:",
|
|
24
42
|
" workbench status [--dir DIR] [--json]",
|
|
25
|
-
" workbench
|
|
26
|
-
" workbench switch VERSION [--dir DIR] [--json]",
|
|
27
|
-
" workbench diff [A..B] [--dir DIR] [--json]",
|
|
43
|
+
" workbench log [--runs|--versions] [--json]",
|
|
28
44
|
" workbench show REF[:PATH] [--json]",
|
|
29
|
-
" workbench
|
|
30
|
-
" workbench
|
|
31
|
-
" workbench trace RUN_ID|JOB_ID|TRACE_ID [--json]",
|
|
45
|
+
" workbench diff [A..B] [--json]",
|
|
46
|
+
" workbench switch VERSION [--json]",
|
|
32
47
|
" workbench open [--host HOST] [--port PORT] [--no-open] [--json]",
|
|
33
48
|
"",
|
|
34
49
|
"Configure:",
|
|
35
|
-
" workbench
|
|
36
|
-
" workbench
|
|
37
|
-
" workbench case list|add|show|remove ...",
|
|
50
|
+
" workbench case add [RUN_ID] | list | rm ID [--json]",
|
|
51
|
+
" workbench agent add NAME --adapter X [--model M] [--with k=v]... | list | rm NAME [--json]",
|
|
38
52
|
"",
|
|
39
53
|
"Share and auth:",
|
|
40
|
-
" workbench
|
|
41
|
-
" workbench
|
|
42
|
-
" workbench
|
|
43
|
-
" workbench sync [REMOTE] [--dry-run] [--dir DIR] [--json]",
|
|
44
|
-
" workbench publish [VERSION] [--remote REMOTE] [--visibility private|internal|public] [--dry-run] [--dir DIR] [--json]",
|
|
45
|
-
" workbench install --source SOURCE [--agent codex|claude]... [--local] [--yes] [--list] [--dry-run] [--json]",
|
|
46
|
-
" workbench auth status [ADAPTER[/SLOT]] [--profile PROFILE] [--json]",
|
|
47
|
-
" workbench auth connect ADAPTER[/SLOT] [--method METHOD] [--profile PROFILE] [--profile-root DIR] [--local-only] [--json]",
|
|
48
|
-
" workbench auth disconnect ADAPTER[/SLOT] [--profile PROFILE] [--local-only] [--json]",
|
|
49
|
-
" workbench login [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--json]",
|
|
50
|
-
" workbench logout [--json]",
|
|
54
|
+
" workbench login [PROVIDER] [--method METHOD] [--profile P] [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--local-only] [--json]",
|
|
55
|
+
" workbench logout [PROVIDER] [--json]",
|
|
56
|
+
" workbench sync [REMOTE] [--dry-run] [--json]",
|
|
51
57
|
"",
|
|
52
58
|
"Remote URLs:",
|
|
53
59
|
" https://HOST/skills/OWNER/SKILL Workbench Cloud skill remote",
|
|
54
|
-
" file:///absolute/path local file remote",
|
|
55
|
-
"",
|
|
56
|
-
"Examples:",
|
|
57
|
-
" workbench init ./earnings-prep",
|
|
58
|
-
" workbench check --dir ./earnings-prep",
|
|
59
|
-
" workbench eval --agents default --samples 1",
|
|
60
|
-
" workbench compare",
|
|
61
|
-
" workbench status --json",
|
|
62
|
-
" workbench remote add --name origin --url https://v2.workbench.ai/skills/acme/earnings-prep",
|
|
63
|
-
" workbench publish --remote origin --visibility public --json",
|
|
64
|
-
" workbench install --source https://v2.workbench.ai/skills/acme/earnings-prep --agent codex --yes",
|
|
65
|
-
"",
|
|
66
|
-
"Environment:",
|
|
67
|
-
" CODEX_HOME and CLAUDE_HOME override read-only session discovery roots.",
|
|
68
|
-
" WORKBENCH_API_URL selects a Workbench Cloud API base URL for login, auth, and HTTP remotes.",
|
|
69
|
-
" WORKBENCH_API_TOKEN supplies a Workbench Cloud token without a login (WORKBENCH_SMOKE_BEARER_TOKEN is a fallback).",
|
|
70
|
-
" WORKBENCH_CONFIG overrides the CLI config path (default ~/.workbench/config.json).",
|
|
71
|
-
" WORKBENCH_DEVICE_AUTH overrides the pending device login record path.",
|
|
72
|
-
" WORKBENCH_ADAPTER_AUTH_STORE overrides the local adapter auth store directory.",
|
|
60
|
+
" file:///absolute/path local file remote for plumbing sync",
|
|
73
61
|
].join("\n");
|
|
74
62
|
const COMMAND_HELP = {
|
|
75
|
-
|
|
63
|
+
new: [
|
|
76
64
|
"Usage:",
|
|
77
|
-
" workbench
|
|
78
|
-
" workbench auth connect ADAPTER[/SLOT] [--method api-key|oauth|bedrock] [--profile PROFILE] [--profile-root DIR] [--local-only] [--json]",
|
|
79
|
-
" workbench auth disconnect ADAPTER[/SLOT] [--profile PROFILE] [--local-only] [--json]",
|
|
80
|
-
"",
|
|
81
|
-
"Stores adapter credentials locally and uploads them to Workbench Cloud when logged in unless --local-only is passed. Codex supports oauth and api-key. Claude supports oauth, api-key, and bedrock.",
|
|
65
|
+
" workbench new [DIR] [--json]",
|
|
82
66
|
"",
|
|
83
|
-
"
|
|
84
|
-
" workbench auth status --json",
|
|
85
|
-
" workbench auth connect codex --method api-key",
|
|
86
|
-
" workbench auth disconnect codex --json",
|
|
67
|
+
"Creates a Workbench skill project.",
|
|
87
68
|
].join("\n"),
|
|
88
69
|
eval: [
|
|
89
70
|
"Usage:",
|
|
90
|
-
" workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [
|
|
71
|
+
" workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
91
72
|
"",
|
|
92
73
|
"Runs eval jobs for the selected version, measured skills, and agents. Omitted selectors use manifest defaults.",
|
|
93
74
|
].join("\n"),
|
|
94
75
|
improve: [
|
|
95
76
|
"Usage:",
|
|
96
|
-
" workbench improve [VERSION] [--
|
|
77
|
+
" workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
97
78
|
"",
|
|
98
|
-
"Creates one improved child version from evidence.
|
|
79
|
+
"Creates one improved child version from evidence. The selected skills and agents must resolve to exactly one entry each.",
|
|
99
80
|
].join("\n"),
|
|
100
|
-
|
|
81
|
+
compare: [
|
|
101
82
|
"Usage:",
|
|
102
|
-
" workbench
|
|
103
|
-
"",
|
|
104
|
-
"Installs published Workbench Cloud source into explicit local agent targets.",
|
|
83
|
+
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
105
84
|
"",
|
|
106
|
-
"
|
|
107
|
-
" workbench install --source https://v2.workbench.ai/skills/acme/earnings-prep --agent codex --yes",
|
|
85
|
+
"Compares recorded eval evidence across selected skills, agents, and versions.",
|
|
108
86
|
].join("\n"),
|
|
109
|
-
|
|
87
|
+
install: [
|
|
110
88
|
"Usage:",
|
|
111
|
-
" workbench
|
|
112
|
-
" workbench remote list [--dir DIR] [--json]",
|
|
113
|
-
" workbench remote remove NAME [--dir DIR] [--json]",
|
|
89
|
+
" workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--list] [--dry-run] [--json]",
|
|
114
90
|
"",
|
|
115
|
-
"
|
|
91
|
+
"Installs published Workbench Cloud source into local agent targets.",
|
|
116
92
|
"",
|
|
117
|
-
"
|
|
118
|
-
" workbench
|
|
119
|
-
" workbench remote add --name scratch --url file:///tmp/earnings-prep-remote --replace",
|
|
93
|
+
"Example:",
|
|
94
|
+
" workbench install acme/earnings-prep --to codex --yes",
|
|
120
95
|
].join("\n"),
|
|
121
96
|
status: [
|
|
122
97
|
"Usage:",
|
|
123
98
|
" workbench status [--dir DIR] [--json]",
|
|
124
99
|
"",
|
|
125
100
|
"Reports project, worktree, run, per-remote sync/publication, and auth state. --json emits the workbench.status.v1 dashboard.",
|
|
126
|
-
"",
|
|
127
|
-
"Example:",
|
|
128
|
-
" workbench status --json",
|
|
129
101
|
].join("\n"),
|
|
130
102
|
logout: [
|
|
131
103
|
"Usage:",
|
|
132
|
-
" workbench logout [--json]",
|
|
104
|
+
" workbench logout [PROVIDER] [--json]",
|
|
133
105
|
"",
|
|
134
|
-
"
|
|
135
|
-
"",
|
|
136
|
-
"Example:",
|
|
137
|
-
" workbench logout --json",
|
|
106
|
+
"With no provider, logs out of Workbench Cloud. With a provider such as codex or claude, removes local adapter auth.",
|
|
138
107
|
].join("\n"),
|
|
139
108
|
show: [
|
|
140
109
|
"Usage:",
|
|
141
110
|
" workbench show REF [--json]",
|
|
142
111
|
" workbench show REF:PATH [--json]",
|
|
143
112
|
"",
|
|
144
|
-
"Shows a Workbench object
|
|
113
|
+
"Shows a Workbench object, lists files for file-backed objects, or prints one file.",
|
|
145
114
|
].join("\n"),
|
|
146
|
-
|
|
115
|
+
log: [
|
|
147
116
|
"Usage:",
|
|
148
|
-
" workbench
|
|
117
|
+
" workbench log [--runs|--versions] [--json]",
|
|
149
118
|
"",
|
|
150
|
-
"
|
|
119
|
+
"Shows one reverse-chronological timeline of versions and runs.",
|
|
151
120
|
].join("\n"),
|
|
152
|
-
|
|
121
|
+
diff: [
|
|
153
122
|
"Usage:",
|
|
154
|
-
" workbench
|
|
123
|
+
" workbench diff [A..B] [--json]",
|
|
155
124
|
"",
|
|
156
|
-
"
|
|
125
|
+
"Shows changed files between two Workbench source versions.",
|
|
157
126
|
].join("\n"),
|
|
158
127
|
switch: [
|
|
159
128
|
"Usage:",
|
|
@@ -161,148 +130,129 @@ const COMMAND_HELP = {
|
|
|
161
130
|
"",
|
|
162
131
|
"Switches the working skill source to a recorded Workbench version.",
|
|
163
132
|
].join("\n"),
|
|
133
|
+
open: [
|
|
134
|
+
"Usage:",
|
|
135
|
+
" workbench open [--host HOST] [--port PORT] [--no-open] [--json]",
|
|
136
|
+
"",
|
|
137
|
+
"Serves or emits the read-only Workbench inspection snapshot.",
|
|
138
|
+
].join("\n"),
|
|
139
|
+
case: [
|
|
140
|
+
"Usage:",
|
|
141
|
+
" workbench case list [--json]",
|
|
142
|
+
" workbench case add [RUN_ID] [--json]",
|
|
143
|
+
" workbench case rm ID [--json]",
|
|
144
|
+
"",
|
|
145
|
+
"Lists cases, creates a draft case, or removes a case.",
|
|
146
|
+
].join("\n"),
|
|
147
|
+
agent: [
|
|
148
|
+
"Usage:",
|
|
149
|
+
" workbench agent list [--json]",
|
|
150
|
+
" workbench agent add NAME --adapter X [--model M] [--with k=v]... [--json]",
|
|
151
|
+
" workbench agent rm NAME [--json]",
|
|
152
|
+
"",
|
|
153
|
+
"Lists, adds, or removes eval agent configurations.",
|
|
154
|
+
].join("\n"),
|
|
164
155
|
sync: [
|
|
165
156
|
"Usage:",
|
|
166
157
|
" workbench sync [REMOTE] [--dry-run] [--dir DIR] [--json]",
|
|
167
158
|
"",
|
|
168
|
-
"
|
|
169
|
-
"",
|
|
170
|
-
"Examples:",
|
|
171
|
-
" workbench sync origin --json",
|
|
172
|
-
" workbench sync origin --dry-run --json",
|
|
159
|
+
"Plumbing command: synchronizes local evidence and version objects with a Workbench remote.",
|
|
173
160
|
].join("\n"),
|
|
174
161
|
publish: [
|
|
175
162
|
"Usage:",
|
|
176
|
-
" workbench publish [VERSION] [--
|
|
177
|
-
"",
|
|
178
|
-
"Publishes installable skill source from the selected version to a Workbench Cloud remote.",
|
|
163
|
+
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--dir DIR] [--json]",
|
|
179
164
|
"",
|
|
180
|
-
"
|
|
181
|
-
" workbench publish --remote origin --visibility private --json",
|
|
182
|
-
" workbench publish <version-id> --remote origin --dry-run --json",
|
|
165
|
+
"Publishes installable skill source to Workbench Cloud. --as sets the linked OWNER/SKILL handle.",
|
|
183
166
|
].join("\n"),
|
|
184
167
|
login: [
|
|
185
168
|
"Usage:",
|
|
186
|
-
" workbench login [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--json]",
|
|
187
|
-
" workbench logout [--json]",
|
|
169
|
+
" workbench login [PROVIDER] [--method METHOD] [--profile P] [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--local-only] [--json]",
|
|
170
|
+
" workbench logout [PROVIDER] [--json]",
|
|
188
171
|
"",
|
|
189
|
-
"Connects the CLI to Workbench Cloud
|
|
190
|
-
"",
|
|
191
|
-
"Examples:",
|
|
192
|
-
" workbench login --start-only --json",
|
|
193
|
-
" workbench login --wait --timeout 120 --json",
|
|
172
|
+
"Connects the CLI to Workbench Cloud or captures local adapter auth for a provider.",
|
|
194
173
|
].join("\n"),
|
|
195
174
|
};
|
|
196
|
-
const
|
|
197
|
-
"
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
"local-only",
|
|
202
|
-
"list",
|
|
203
|
-
"no-open",
|
|
204
|
-
"start-only",
|
|
205
|
-
"replace",
|
|
206
|
-
"rerun",
|
|
207
|
-
"wait",
|
|
208
|
-
"yes",
|
|
209
|
-
]);
|
|
210
|
-
const FLAG_DEFINITIONS = {
|
|
211
|
-
adapter: "string",
|
|
212
|
-
"base-url": "string",
|
|
213
|
-
budget: "positive-integer",
|
|
175
|
+
const COMMON_FLAGS = {
|
|
176
|
+
json: "boolean",
|
|
177
|
+
};
|
|
178
|
+
const PROJECT_FLAGS = {
|
|
179
|
+
...COMMON_FLAGS,
|
|
214
180
|
dir: "string",
|
|
215
|
-
|
|
216
|
-
|
|
181
|
+
};
|
|
182
|
+
const HELP_FLAG = {
|
|
217
183
|
help: "boolean",
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
local: "boolean",
|
|
221
|
-
"local-only": "boolean",
|
|
222
|
-
list: "boolean",
|
|
223
|
-
method: "string",
|
|
224
|
-
model: "string",
|
|
225
|
-
name: "string",
|
|
226
|
-
"no-open": "boolean",
|
|
227
|
-
port: "positive-integer",
|
|
228
|
-
profile: "string",
|
|
229
|
-
"profile-root": "string",
|
|
230
|
-
remote: "string",
|
|
231
|
-
replace: "boolean",
|
|
232
|
-
rerun: "boolean",
|
|
233
|
-
samples: "positive-integer",
|
|
234
|
-
source: "string",
|
|
235
|
-
"start-only": "boolean",
|
|
236
|
-
agent: "string",
|
|
237
|
-
agents: "string",
|
|
238
|
-
skill: "string",
|
|
239
|
-
skills: "string",
|
|
184
|
+
};
|
|
185
|
+
const VERSION_FLAG = {
|
|
240
186
|
version: "boolean",
|
|
241
|
-
versions: "string",
|
|
242
|
-
visibility: "string",
|
|
243
|
-
timeout: "positive-integer",
|
|
244
|
-
url: "string",
|
|
245
|
-
wait: "boolean",
|
|
246
|
-
with: "repeat-string",
|
|
247
|
-
yes: "boolean",
|
|
248
187
|
};
|
|
249
188
|
const COMMAND_FLAGS = {
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
logout: ["json"],
|
|
261
|
-
open: ["dir", "host", "json", "no-open", "port"],
|
|
262
|
-
publish: ["dir", "dry-run", "json", "remote", "visibility"],
|
|
263
|
-
show: ["dir", "json"],
|
|
264
|
-
status: ["dir", "json"],
|
|
265
|
-
switch: ["dir", "json"],
|
|
266
|
-
sync: ["dir", "dry-run", "json"],
|
|
267
|
-
trace: ["dir", "json"],
|
|
268
|
-
versions: ["dir", "json"],
|
|
269
|
-
};
|
|
270
|
-
const SUBCOMMAND_FLAGS = {
|
|
271
|
-
auth: {
|
|
272
|
-
defaultSubcommand: "status",
|
|
273
|
-
flags: {
|
|
274
|
-
status: ["json", "profile"],
|
|
275
|
-
connect: ["json", "local-only", "method", "profile", "profile-root"],
|
|
276
|
-
disconnect: ["json", "local-only", "profile"],
|
|
277
|
-
},
|
|
189
|
+
compare: { ...PROJECT_FLAGS, ...HELP_FLAG, agents: "string", skills: "string", versions: "string" },
|
|
190
|
+
diff: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
191
|
+
eval: {
|
|
192
|
+
...PROJECT_FLAGS,
|
|
193
|
+
...HELP_FLAG,
|
|
194
|
+
agents: "string",
|
|
195
|
+
cloud: "boolean",
|
|
196
|
+
rerun: "boolean",
|
|
197
|
+
samples: "positive-integer",
|
|
198
|
+
skills: "string",
|
|
278
199
|
},
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
200
|
+
help: { ...COMMON_FLAGS, ...HELP_FLAG, all: "boolean" },
|
|
201
|
+
improve: {
|
|
202
|
+
...PROJECT_FLAGS,
|
|
203
|
+
...HELP_FLAG,
|
|
204
|
+
agents: "string",
|
|
205
|
+
budget: "positive-integer",
|
|
206
|
+
cloud: "boolean",
|
|
207
|
+
samples: "positive-integer",
|
|
208
|
+
skills: "string",
|
|
286
209
|
},
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
210
|
+
install: { ...COMMON_FLAGS, ...HELP_FLAG, "dry-run": "boolean", list: "boolean", to: "repeat-string", yes: "boolean" },
|
|
211
|
+
log: { ...PROJECT_FLAGS, ...HELP_FLAG, runs: "boolean", versions: "boolean" },
|
|
212
|
+
login: {
|
|
213
|
+
...COMMON_FLAGS,
|
|
214
|
+
...HELP_FLAG,
|
|
215
|
+
"base-url": "string",
|
|
216
|
+
"local-only": "boolean",
|
|
217
|
+
method: "string",
|
|
218
|
+
"no-open": "boolean",
|
|
219
|
+
profile: "string",
|
|
220
|
+
"profile-root": "string",
|
|
221
|
+
"start-only": "boolean",
|
|
222
|
+
timeout: "positive-integer",
|
|
223
|
+
wait: "boolean",
|
|
293
224
|
},
|
|
294
|
-
|
|
225
|
+
logout: { ...COMMON_FLAGS, ...HELP_FLAG },
|
|
226
|
+
new: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
227
|
+
open: { ...PROJECT_FLAGS, ...HELP_FLAG, host: "string", "no-open": "boolean", port: "positive-integer" },
|
|
228
|
+
publish: {
|
|
229
|
+
...PROJECT_FLAGS,
|
|
230
|
+
...HELP_FLAG,
|
|
231
|
+
as: "string",
|
|
232
|
+
"dry-run": "boolean",
|
|
233
|
+
private: "boolean",
|
|
234
|
+
public: "boolean",
|
|
235
|
+
team: "boolean",
|
|
236
|
+
},
|
|
237
|
+
show: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
238
|
+
status: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
239
|
+
switch: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
240
|
+
sync: { ...PROJECT_FLAGS, ...HELP_FLAG, "dry-run": "boolean" },
|
|
241
|
+
version: { ...COMMON_FLAGS, ...VERSION_FLAG },
|
|
242
|
+
};
|
|
243
|
+
const SUBCOMMAND_FLAGS = {
|
|
244
|
+
case: {
|
|
295
245
|
flags: {
|
|
296
|
-
list:
|
|
246
|
+
list: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
247
|
+
add: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
248
|
+
rm: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
297
249
|
},
|
|
298
250
|
},
|
|
299
251
|
agent: {
|
|
300
252
|
flags: {
|
|
301
|
-
list:
|
|
302
|
-
add:
|
|
303
|
-
|
|
304
|
-
default: ["dir", "json"],
|
|
305
|
-
remove: ["dir", "json"],
|
|
253
|
+
list: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
254
|
+
add: { ...PROJECT_FLAGS, ...HELP_FLAG, adapter: "string", model: "string", with: "repeat-string" },
|
|
255
|
+
rm: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
306
256
|
},
|
|
307
257
|
},
|
|
308
258
|
};
|
|
@@ -313,20 +263,23 @@ export async function runCli(argv, io = {
|
|
|
313
263
|
const parsed = parseArgs(argv);
|
|
314
264
|
const command = parsed.positionals[0];
|
|
315
265
|
try {
|
|
316
|
-
|
|
266
|
+
validateCommandFlags(parsed, command);
|
|
267
|
+
if (command === "version" || parsed.flags.version === true) {
|
|
317
268
|
io.stdout.write(`workbench ${getCliVersion()}\n`);
|
|
318
269
|
return 0;
|
|
319
270
|
}
|
|
320
|
-
if (
|
|
271
|
+
if (command === "help") {
|
|
321
272
|
const helpCommand = command === "help" ? optionalPositional(parsed, 1) : undefined;
|
|
322
|
-
io.stdout.write(`${helpCommand ? commandHelp(helpCommand) : HELP}\n`);
|
|
273
|
+
io.stdout.write(`${parsed.flags.all === true ? HELP_ALL : helpCommand ? commandHelp(helpCommand) : HELP}\n`);
|
|
323
274
|
return 0;
|
|
324
275
|
}
|
|
325
276
|
if (parsed.flags.help === true) {
|
|
326
|
-
io.stdout.write(`${commandHelp(command)}\n`);
|
|
277
|
+
io.stdout.write(`${command ? commandHelp(command) : HELP}\n`);
|
|
327
278
|
return 0;
|
|
328
279
|
}
|
|
329
|
-
|
|
280
|
+
if (!command) {
|
|
281
|
+
return await handleStatus(parsed, io);
|
|
282
|
+
}
|
|
330
283
|
if (command === "login") {
|
|
331
284
|
return await handleLogin(parsed, io);
|
|
332
285
|
}
|
|
@@ -337,27 +290,17 @@ export async function runCli(argv, io = {
|
|
|
337
290
|
return await handleInstall(parsed, io);
|
|
338
291
|
}
|
|
339
292
|
const core = await coreOptions(parsed);
|
|
340
|
-
if (command === "
|
|
293
|
+
if (command === "new") {
|
|
341
294
|
const status = await initWorkbenchSkill({ dir: parsed.positionals[1] ?? dirFlag(parsed) });
|
|
342
|
-
return output(status, parsed, io, () => `
|
|
295
|
+
return output(status, parsed, io, () => `Created Workbench skill at ${status.root}.\nnext: edit SKILL.md, then run workbench eval`);
|
|
343
296
|
}
|
|
344
297
|
if (command === "status") {
|
|
345
|
-
|
|
346
|
-
const auth = await workbenchCliAuthStatus();
|
|
347
|
-
return emitResult("workbench.status.v1", {
|
|
348
|
-
project: status.project,
|
|
349
|
-
worktree: status.worktree,
|
|
350
|
-
runs: status.runs,
|
|
351
|
-
remotes: status.remotes,
|
|
352
|
-
auth: auth,
|
|
353
|
-
next: status.next,
|
|
354
|
-
}, parsed, io, () => formatStatusSnapshot({ ...status, auth }));
|
|
355
|
-
}
|
|
356
|
-
if (command === "check") {
|
|
357
|
-
const result = await checkWorkbenchSkill(core);
|
|
358
|
-
return output(result, parsed, io, () => formatCheck(result));
|
|
298
|
+
return await handleStatus(parsed, io);
|
|
359
299
|
}
|
|
360
300
|
if (command === "eval") {
|
|
301
|
+
if (parsed.flags.cloud === true) {
|
|
302
|
+
return await handleCloudEval(parsed, io);
|
|
303
|
+
}
|
|
361
304
|
const runs = await evalWorkbenchSkill({
|
|
362
305
|
...core,
|
|
363
306
|
version: optionalPositional(parsed, 1),
|
|
@@ -371,21 +314,34 @@ export async function runCli(argv, io = {
|
|
|
371
314
|
if (failedRuns.length > 0) {
|
|
372
315
|
return emitEvalFailure(runs, failedRuns, artifactIds, parsed, io);
|
|
373
316
|
}
|
|
374
|
-
|
|
317
|
+
const deltas = await evalDeltas(core, runs);
|
|
318
|
+
const nextCommands = evalSuccessNextCommands(runs);
|
|
319
|
+
return emitResult("workbench.cli.eval.v1", {
|
|
320
|
+
result: runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
|
|
321
|
+
deltas: deltas,
|
|
322
|
+
nextCommands: nextCommands,
|
|
323
|
+
}, parsed, io, () => [
|
|
324
|
+
runs.map(formatRun).join("\n"),
|
|
325
|
+
...deltas.map(formatEvalDelta),
|
|
326
|
+
...(nextCommands[0] ? [`next: ${nextCommands[0]}`] : []),
|
|
327
|
+
].filter(Boolean).join("\n"));
|
|
375
328
|
}
|
|
376
329
|
if (command === "improve") {
|
|
330
|
+
if (parsed.flags.cloud === true) {
|
|
331
|
+
return await handleCloudImprove(parsed, io);
|
|
332
|
+
}
|
|
377
333
|
const result = await improveWorkbenchSkill({
|
|
378
334
|
...core,
|
|
379
335
|
version: optionalPositional(parsed, 1),
|
|
380
|
-
skill: stringFlag(parsed, "
|
|
381
|
-
agent: stringFlag(parsed, "
|
|
336
|
+
skill: stringFlag(parsed, "skills"),
|
|
337
|
+
agent: stringFlag(parsed, "agents"),
|
|
382
338
|
budget: intFlag(parsed, "budget"),
|
|
383
339
|
samples: intFlag(parsed, "samples"),
|
|
384
340
|
});
|
|
385
341
|
return output({
|
|
386
342
|
...result,
|
|
387
343
|
version: versionSummary(result.version),
|
|
388
|
-
}, parsed, io, () => formatImproveResult(result));
|
|
344
|
+
}, parsed, io, () => `${formatImproveResult(result)}\nnext: workbench eval`);
|
|
389
345
|
}
|
|
390
346
|
if (command === "compare") {
|
|
391
347
|
const comparison = await compareWorkbench({
|
|
@@ -396,90 +352,28 @@ export async function runCli(argv, io = {
|
|
|
396
352
|
});
|
|
397
353
|
return output(comparison, parsed, io, () => formatComparison(comparison));
|
|
398
354
|
}
|
|
399
|
-
if (command === "versions") {
|
|
400
|
-
const versions = await listWorkbenchVersions(core);
|
|
401
|
-
return output(versions.map(versionSummary), parsed, io, () => versions.map(formatVersion).join("\n") || "No versions.");
|
|
402
|
-
}
|
|
403
355
|
if (command === "switch") {
|
|
404
356
|
const versionRef = requiredPositional(parsed, 1, "workbench switch requires VERSION.");
|
|
405
357
|
const version = await switchWorkbenchVersion(versionRef, core);
|
|
406
358
|
return output(versionSummary(version), parsed, io, () => `Switched to ${version.id}.`);
|
|
407
359
|
}
|
|
408
360
|
if (command === "diff") {
|
|
409
|
-
const range =
|
|
361
|
+
const range = optionalPositional(parsed, 1) ?? await defaultDiffRange(core);
|
|
410
362
|
const diffs = await diffWorkbenchVersions(range, core);
|
|
411
363
|
return output(diffs, parsed, io, () => diffs.map((entry) => `${entry.status}\t${entry.path}`).join("\n") || "No diff.");
|
|
412
364
|
}
|
|
413
365
|
if (command === "show") {
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
}
|
|
419
|
-
const value = await showWorkbenchRef(ref, core);
|
|
420
|
-
return output(value, parsed, io, () => formatShow(value));
|
|
421
|
-
}
|
|
422
|
-
if (command === "files") {
|
|
423
|
-
const ref = requiredPositional(parsed, 1, "workbench files requires REF.");
|
|
424
|
-
const files = await filesForWorkbenchRef(ref, core);
|
|
425
|
-
return output(files.map(fileSummary), parsed, io, () => files.map((file) => file.path).join("\n") || "No files.");
|
|
426
|
-
}
|
|
427
|
-
if (command === "list") {
|
|
428
|
-
return await handleList(parsed, io);
|
|
429
|
-
}
|
|
430
|
-
if (command === "trace") {
|
|
431
|
-
const ref = optionalPositional(parsed, 1);
|
|
432
|
-
if (!ref) {
|
|
433
|
-
throw new WorkbenchCodedError("usage", "workbench trace requires RUN_ID, JOB_ID, or TRACE_ID.", {
|
|
434
|
-
remediation: "Run workbench list runs --json or workbench list traces --json.",
|
|
435
|
-
exitCode: 2,
|
|
436
|
-
});
|
|
437
|
-
}
|
|
438
|
-
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
439
|
-
const run = snapshot.runs.find((entry) => entry.id === ref);
|
|
440
|
-
const job = snapshot.jobs.find((entry) => entry.id === ref);
|
|
441
|
-
const traces = run
|
|
442
|
-
? snapshot.traces.filter((trace) => run.traceIds.includes(trace.id))
|
|
443
|
-
: job
|
|
444
|
-
? snapshot.traces.filter((trace) => job.traceIds.includes(trace.id))
|
|
445
|
-
: snapshot.traces.filter((trace) => trace.id === ref);
|
|
446
|
-
if (traces.length === 0) {
|
|
447
|
-
const jobs = run
|
|
448
|
-
? snapshot.jobs.filter((entry) => entry.runId === run.id)
|
|
449
|
-
: job ? [job] : [];
|
|
450
|
-
const details = jobs.flatMap((entry) => {
|
|
451
|
-
const detail = workbenchJobEvidenceForSnapshot(snapshot, {
|
|
452
|
-
runId: entry.runId,
|
|
453
|
-
jobId: entry.id,
|
|
454
|
-
});
|
|
455
|
-
return detail ? [detail] : [];
|
|
456
|
-
}).filter((detail) => detail.executions.some((execution) => execution.sessions.length > 0 ||
|
|
457
|
-
execution.trace.spans.length > 0 ||
|
|
458
|
-
execution.trace.events.length > 0 ||
|
|
459
|
-
execution.trace.summaries.length > 0));
|
|
460
|
-
if (details.length > 0) {
|
|
461
|
-
return output(details, parsed, io, () => details.map(formatTraceDetail).join("\n"));
|
|
462
|
-
}
|
|
463
|
-
throw new WorkbenchCodedError("ref_not_found", `Trace not found: ${ref}`, {
|
|
464
|
-
remediation: "Run workbench list runs --json, workbench list jobs --json, or workbench list traces --json.",
|
|
465
|
-
subject: { ref },
|
|
466
|
-
exitCode: 1,
|
|
467
|
-
});
|
|
468
|
-
}
|
|
469
|
-
return output(traces, parsed, io, () => traces.map(formatTrace).join("\n"));
|
|
366
|
+
return await handleShow(parsed, io);
|
|
367
|
+
}
|
|
368
|
+
if (command === "log") {
|
|
369
|
+
return await handleLog(parsed, io);
|
|
470
370
|
}
|
|
471
371
|
if (command === "agent") {
|
|
472
372
|
return await handleAgent(parsed, io);
|
|
473
373
|
}
|
|
474
|
-
if (command === "skills") {
|
|
475
|
-
return await handleSkills(parsed, io);
|
|
476
|
-
}
|
|
477
374
|
if (command === "case") {
|
|
478
375
|
return await handleCase(parsed, io);
|
|
479
376
|
}
|
|
480
|
-
if (command === "remote") {
|
|
481
|
-
return await handleRemote(parsed, io);
|
|
482
|
-
}
|
|
483
377
|
if (command === "sync") {
|
|
484
378
|
const result = await syncWorkbenchRemote({
|
|
485
379
|
...core,
|
|
@@ -496,17 +390,39 @@ export async function runCli(argv, io = {
|
|
|
496
390
|
}, parsed, io, () => `${result.dryRun ? "Would sync" : "Synced"} ${result.remote.name}: pushed ${result.pushed}, pulled ${result.pulled}${result.upToDate ? " (up to date)" : ""}.`);
|
|
497
391
|
}
|
|
498
392
|
if (command === "publish") {
|
|
393
|
+
const preview = parsed.flags["dry-run"] === true && !stringFlag(parsed, "as")
|
|
394
|
+
? await previewPublishWithDerivedRemote(parsed)
|
|
395
|
+
: undefined;
|
|
396
|
+
if (preview) {
|
|
397
|
+
return emitResult("workbench.cli.publish.v1", {
|
|
398
|
+
remote: preview.remote,
|
|
399
|
+
version: versionSummary(preview.version),
|
|
400
|
+
visibility: preview.visibility,
|
|
401
|
+
installHandle: preview.installHandle,
|
|
402
|
+
installUrl: preview.installUrl,
|
|
403
|
+
pinnedInstallUrl: preview.pinnedInstallUrl,
|
|
404
|
+
dryRun: true,
|
|
405
|
+
}, parsed, io, () => [
|
|
406
|
+
`Would publish ${preview.version.id} to remote ${preview.remote.name}.`,
|
|
407
|
+
`Visibility: ${preview.visibility}`,
|
|
408
|
+
`Install: ${preview.installUrl}`,
|
|
409
|
+
`Pinned: ${preview.pinnedInstallUrl}`,
|
|
410
|
+
`next: workbench install ${preview.installHandle}`,
|
|
411
|
+
].join("\n"));
|
|
412
|
+
}
|
|
413
|
+
const remote = await ensurePublishRemote(parsed);
|
|
499
414
|
const result = await publishWorkbenchVersion({
|
|
500
415
|
...core,
|
|
501
416
|
version: optionalPositional(parsed, 1),
|
|
502
|
-
remote
|
|
417
|
+
remote,
|
|
503
418
|
dryRun: parsed.flags["dry-run"] === true,
|
|
504
|
-
visibility:
|
|
419
|
+
visibility: parsePublishVisibilityFlags(parsed),
|
|
505
420
|
});
|
|
506
421
|
return emitResult("workbench.cli.publish.v1", {
|
|
507
422
|
remote: result.remote,
|
|
508
423
|
version: versionSummary(result.version),
|
|
509
424
|
visibility: result.visibility,
|
|
425
|
+
installHandle: result.installHandle,
|
|
510
426
|
installUrl: result.installUrl,
|
|
511
427
|
pinnedInstallUrl: result.pinnedInstallUrl,
|
|
512
428
|
...(result.dryRun ? { dryRun: true } : {}),
|
|
@@ -515,11 +431,9 @@ export async function runCli(argv, io = {
|
|
|
515
431
|
`Visibility: ${result.visibility}`,
|
|
516
432
|
`Install: ${result.installUrl}`,
|
|
517
433
|
`Pinned: ${result.pinnedInstallUrl}`,
|
|
434
|
+
`next: workbench install ${result.installHandle}`,
|
|
518
435
|
].join("\n"));
|
|
519
436
|
}
|
|
520
|
-
if (command === "auth") {
|
|
521
|
-
return await handleAuth(parsed, io);
|
|
522
|
-
}
|
|
523
437
|
if (command === "open") {
|
|
524
438
|
if (parsed.flags.json === true) {
|
|
525
439
|
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
@@ -545,29 +459,102 @@ export async function runCli(argv, io = {
|
|
|
545
459
|
return emitError(error, parsed, io);
|
|
546
460
|
}
|
|
547
461
|
}
|
|
548
|
-
async function
|
|
549
|
-
const
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
462
|
+
async function handleStatus(parsed, io) {
|
|
463
|
+
const status = await workbenchStatusSnapshot(await coreOptions(parsed));
|
|
464
|
+
const auth = await workbenchCliAuthStatus();
|
|
465
|
+
return emitResult("workbench.status.v1", {
|
|
466
|
+
project: status.project,
|
|
467
|
+
worktree: status.worktree,
|
|
468
|
+
runs: status.runs,
|
|
469
|
+
remotes: status.remotes,
|
|
470
|
+
auth: auth,
|
|
471
|
+
next: status.next,
|
|
472
|
+
}, parsed, io, () => formatStatusSnapshot({ ...status, auth }));
|
|
473
|
+
}
|
|
474
|
+
async function handleLog(parsed, io) {
|
|
475
|
+
if (parsed.flags.runs === true && parsed.flags.versions === true) {
|
|
476
|
+
throw new WorkbenchCodedError("usage", "workbench log accepts only one of --runs or --versions.", {
|
|
477
|
+
remediation: "Run workbench log --runs or workbench log --versions.",
|
|
478
|
+
exitCode: 2,
|
|
479
|
+
});
|
|
480
|
+
}
|
|
481
|
+
if (parsed.positionals.length > 1) {
|
|
482
|
+
if (parsed.flags.runs === true) {
|
|
483
|
+
throw new WorkbenchUserError("--runs does not accept a value.");
|
|
484
|
+
}
|
|
485
|
+
if (parsed.flags.versions === true) {
|
|
486
|
+
throw new WorkbenchUserError("--versions does not accept a value.");
|
|
487
|
+
}
|
|
488
|
+
rejectExtraInput(parsed, {
|
|
489
|
+
maxPositionals: 1,
|
|
490
|
+
message: "workbench log does not accept refs or paths.",
|
|
491
|
+
remediation: "Run workbench log, workbench log --runs, or workbench log --versions.",
|
|
492
|
+
});
|
|
553
493
|
}
|
|
554
494
|
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(await coreOptions(parsed));
|
|
555
|
-
|
|
556
|
-
|
|
495
|
+
const includeRuns = parsed.flags.versions !== true;
|
|
496
|
+
const includeVersions = parsed.flags.runs !== true;
|
|
497
|
+
const entries = [
|
|
498
|
+
...(includeVersions ? snapshot.versions.map((version) => ({
|
|
499
|
+
kind: "version",
|
|
500
|
+
id: version.id,
|
|
501
|
+
createdAt: version.createdAt,
|
|
502
|
+
message: version.message,
|
|
503
|
+
fileCount: version.files.length,
|
|
504
|
+
})) : []),
|
|
505
|
+
...(includeRuns ? snapshot.runs.map((run) => ({
|
|
506
|
+
kind: "run",
|
|
507
|
+
id: run.id,
|
|
508
|
+
createdAt: run.createdAt,
|
|
509
|
+
status: run.status,
|
|
510
|
+
versionId: run.versionId,
|
|
511
|
+
skillName: run.skillName,
|
|
512
|
+
agentName: run.agentName,
|
|
513
|
+
...(run.score !== undefined ? { score: run.score } : {}),
|
|
514
|
+
})) : []),
|
|
515
|
+
].sort((left, right) => right.createdAt.localeCompare(left.createdAt));
|
|
516
|
+
return emitResult("workbench.cli.log.v1", {
|
|
517
|
+
entries: entries,
|
|
518
|
+
}, parsed, io, () => entries.map(formatLogEntry).join("\n") || "No history.");
|
|
519
|
+
}
|
|
520
|
+
async function handleShow(parsed, io) {
|
|
521
|
+
const ref = requiredPositional(parsed, 1, "workbench show requires REF.");
|
|
522
|
+
const session = await showLocalAgentSession(ref);
|
|
523
|
+
if (session) {
|
|
524
|
+
return output(session, parsed, io, () => formatSessionDetail(session));
|
|
525
|
+
}
|
|
526
|
+
const core = await coreOptions(parsed);
|
|
527
|
+
const [objectRef, requestedPath] = splitShowRef(ref);
|
|
528
|
+
if (requestedPath) {
|
|
529
|
+
const runOrJobFile = await fileForRunOrJobRef(core, objectRef, requestedPath);
|
|
530
|
+
if (runOrJobFile) {
|
|
531
|
+
return output(runOrJobFile, parsed, io, () => formatShow(runOrJobFile));
|
|
532
|
+
}
|
|
533
|
+
const value = await showWorkbenchRef(ref, core);
|
|
534
|
+
return output(value, parsed, io, () => formatShow(value));
|
|
557
535
|
}
|
|
558
|
-
|
|
559
|
-
|
|
536
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
537
|
+
const version = snapshot.versions.find((entry) => entry.id === objectRef);
|
|
538
|
+
if (version) {
|
|
539
|
+
return output(fileListing("version", version.id, version.files), parsed, io, () => formatFileListing("version", version.id, version.files));
|
|
540
|
+
}
|
|
541
|
+
const trace = snapshot.traces.find((entry) => entry.id === objectRef);
|
|
542
|
+
if (trace) {
|
|
543
|
+
return output(fileListing("trace", trace.id, trace.files), parsed, io, () => formatFileListing("trace", trace.id, trace.files));
|
|
560
544
|
}
|
|
561
|
-
|
|
562
|
-
|
|
545
|
+
const artifact = snapshot.artifacts.find((entry) => entry.id === objectRef);
|
|
546
|
+
if (artifact) {
|
|
547
|
+
return output(fileListing("artifact", artifact.id, artifact.files), parsed, io, () => formatFileListing("artifact", artifact.id, artifact.files));
|
|
563
548
|
}
|
|
564
|
-
|
|
565
|
-
|
|
549
|
+
const details = evidenceDetailsForRunOrJob(snapshot, objectRef);
|
|
550
|
+
if (details.length > 0) {
|
|
551
|
+
return output(details, parsed, io, () => details.map(formatTraceDetail).join("\n"));
|
|
566
552
|
}
|
|
567
|
-
|
|
553
|
+
const value = await showWorkbenchRef(ref, core);
|
|
554
|
+
return output(value, parsed, io, () => formatShow(value));
|
|
568
555
|
}
|
|
569
556
|
async function handleAgent(parsed, io) {
|
|
570
|
-
const subcommand = requiredPositional(parsed, 1, "workbench agent requires list|add|
|
|
557
|
+
const subcommand = requiredPositional(parsed, 1, "workbench agent requires list|add|rm.");
|
|
571
558
|
if (subcommand === "list") {
|
|
572
559
|
const agents = await listWorkbenchAgents(await coreOptions(parsed));
|
|
573
560
|
return output(agents, parsed, io, () => agents.map(formatAgent).join("\n") || "No agents.");
|
|
@@ -587,173 +574,68 @@ async function handleAgent(parsed, io) {
|
|
|
587
574
|
});
|
|
588
575
|
return output(agent, parsed, io, () => `Added agent ${formatAgent(agent)}.`);
|
|
589
576
|
}
|
|
590
|
-
if (subcommand === "
|
|
591
|
-
const
|
|
592
|
-
const agent = (await listWorkbenchAgents(await coreOptions(parsed))).find((entry) => entry.name === name);
|
|
593
|
-
if (!agent) {
|
|
594
|
-
throw new WorkbenchCodedError("ref_not_found", `Agent not found: ${name}`, {
|
|
595
|
-
remediation: "Run workbench agent list.",
|
|
596
|
-
subject: { agent: name },
|
|
597
|
-
exitCode: 1,
|
|
598
|
-
});
|
|
599
|
-
}
|
|
600
|
-
return output(agent, parsed, io, () => formatAgent(agent));
|
|
601
|
-
}
|
|
602
|
-
if (subcommand === "default") {
|
|
603
|
-
const result = await setDefaultWorkbenchAgent(requiredPositional(parsed, 2, "workbench agent default requires NAME."), await coreOptions(parsed));
|
|
604
|
-
return output(result, parsed, io, () => `Default agent: ${result.defaultAgent}`);
|
|
605
|
-
}
|
|
606
|
-
if (subcommand === "remove") {
|
|
607
|
-
const result = await removeWorkbenchAgent(requiredPositional(parsed, 2, "workbench agent remove requires NAME."), await coreOptions(parsed));
|
|
577
|
+
if (subcommand === "rm") {
|
|
578
|
+
const result = await removeWorkbenchAgent(requiredPositional(parsed, 2, "workbench agent rm requires NAME."), await coreOptions(parsed));
|
|
608
579
|
return output(result, parsed, io, () => `Removed agent ${result.removed}.`);
|
|
609
580
|
}
|
|
610
581
|
throw new WorkbenchUserError(`Unsupported agent command: ${subcommand}`);
|
|
611
582
|
}
|
|
612
|
-
async function handleSkills(parsed, io) {
|
|
613
|
-
const subcommand = requiredPositional(parsed, 1, "workbench skills requires list.");
|
|
614
|
-
if (subcommand !== "list") {
|
|
615
|
-
throw new WorkbenchUserError(`Unsupported skills command: ${subcommand}`);
|
|
616
|
-
}
|
|
617
|
-
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(await coreOptions(parsed));
|
|
618
|
-
return output(snapshot.skillSources, parsed, io, () => snapshot.skillSources.map((source) => {
|
|
619
|
-
const where = source.kind === "remote"
|
|
620
|
-
? `${source.from}${source.ref ? `#${source.ref}` : ""}`
|
|
621
|
-
: source.kind === "none"
|
|
622
|
-
? "baseline:none"
|
|
623
|
-
: source.path;
|
|
624
|
-
return `${source.name}\t${source.kind}\t${where}\tincludes=${source.includes?.length ?? 0}`;
|
|
625
|
-
}).join("\n") || "No skills.");
|
|
626
|
-
}
|
|
627
583
|
async function handleCase(parsed, io) {
|
|
628
|
-
const subcommand = requiredPositional(parsed, 1, "workbench case requires list|add|
|
|
584
|
+
const subcommand = requiredPositional(parsed, 1, "workbench case requires list|add|rm.");
|
|
629
585
|
if (subcommand === "list") {
|
|
630
586
|
const cases = await listWorkbenchCases(await coreOptions(parsed));
|
|
631
587
|
return output(cases, parsed, io, () => cases.map((entry) => `${entry.id}\t${entry.path}`).join("\n") || "No cases.");
|
|
632
588
|
}
|
|
633
589
|
if (subcommand === "add") {
|
|
634
|
-
const
|
|
590
|
+
const core = await coreOptions(parsed);
|
|
591
|
+
const sourceRef = optionalPositional(parsed, 2);
|
|
592
|
+
const record = await addWorkbenchCase({ ...core, fromTraceId: sourceRef ? await traceIdForCaseSource(core, sourceRef) : undefined });
|
|
635
593
|
return output(record, parsed, io, () => `Added case ${record.id}.`);
|
|
636
594
|
}
|
|
637
|
-
if (subcommand === "
|
|
638
|
-
const
|
|
639
|
-
return output(record, parsed, io, () => record.content);
|
|
640
|
-
}
|
|
641
|
-
if (subcommand === "remove") {
|
|
642
|
-
const result = await removeWorkbenchCase(requiredPositional(parsed, 2, "workbench case remove requires CASE_ID."), await coreOptions(parsed));
|
|
595
|
+
if (subcommand === "rm") {
|
|
596
|
+
const result = await removeWorkbenchCase(requiredPositional(parsed, 2, "workbench case rm requires CASE_ID."), await coreOptions(parsed));
|
|
643
597
|
return output(result, parsed, io, () => `Removed case ${result.removed}.`);
|
|
644
598
|
}
|
|
645
599
|
throw new WorkbenchUserError(`Unsupported case command: ${subcommand}`);
|
|
646
600
|
}
|
|
647
|
-
async function
|
|
648
|
-
const
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
remote: result.remote,
|
|
686
|
-
removed: result.removed,
|
|
687
|
-
}, parsed, io, () => result.removed ? `Removed remote ${result.remote}.` : `Remote ${result.remote} was not configured.`);
|
|
688
|
-
}
|
|
689
|
-
throw new WorkbenchUserError(`Unsupported remote command: ${subcommand}`);
|
|
690
|
-
}
|
|
691
|
-
async function handleAuth(parsed, io) {
|
|
692
|
-
const subcommand = optionalPositional(parsed, 1) ?? "status";
|
|
693
|
-
if (subcommand === "status") {
|
|
694
|
-
const targetRaw = optionalPositional(parsed, 2);
|
|
695
|
-
const profile = authProfileFlag(parsed);
|
|
696
|
-
const store = localWorkbenchAdapterAuthStore(adapterAuthStoreRoot());
|
|
697
|
-
const cliAuth = await workbenchCliAuthStatus();
|
|
698
|
-
if (targetRaw) {
|
|
699
|
-
const status = await store.status(parseAuthTarget(targetRaw, profile));
|
|
700
|
-
return emitResult("workbench.cli.auth-status.v1", {
|
|
701
|
-
workbenchCloud: cliAuth.workbenchCloud,
|
|
702
|
-
adapters: [authStatusRecordToJson(status)],
|
|
703
|
-
}, parsed, io, () => [
|
|
704
|
-
formatWorkbenchCloudAuthStatus(cliAuth.workbenchCloud),
|
|
705
|
-
"Adapter auth:",
|
|
706
|
-
formatAuthStatusRecord(status),
|
|
707
|
-
].join("\n"));
|
|
708
|
-
}
|
|
709
|
-
const statuses = await store.listStatus();
|
|
710
|
-
const required = await requiredAgentAuthStatuses(parsed, statuses);
|
|
711
|
-
return emitResult("workbench.cli.auth-status.v1", {
|
|
712
|
-
workbenchCloud: cliAuth.workbenchCloud,
|
|
713
|
-
adapters: cliAuth.adapters,
|
|
714
|
-
required: required,
|
|
715
|
-
}, parsed, io, () => formatAuthStatusList(cliAuth.workbenchCloud, statuses, required));
|
|
716
|
-
}
|
|
717
|
-
if (subcommand === "connect") {
|
|
718
|
-
const targetRaw = requiredPositional(parsed, 2, "workbench auth connect requires ADAPTER[/SLOT].");
|
|
719
|
-
const target = parseAuthTarget(targetRaw, authProfileFlag(parsed));
|
|
720
|
-
const method = authMethod(parsed, target.adapterId);
|
|
721
|
-
const bundle = await collectAdapterAuthBundle({
|
|
722
|
-
target,
|
|
723
|
-
method,
|
|
724
|
-
profileRoot: path.resolve(stringFlag(parsed, "profile-root") ?? os.homedir()),
|
|
725
|
-
});
|
|
726
|
-
const saved = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).put(bundle);
|
|
727
|
-
const remote = await uploadAdapterConnection(saved, parsed);
|
|
728
|
-
return emitResult("workbench.cli.auth-connect.v1", {
|
|
729
|
-
localAdapter: {
|
|
730
|
-
adapter: saved.adapterId,
|
|
731
|
-
...(saved.slot ? { slot: saved.slot } : {}),
|
|
732
|
-
profile: saved.profile,
|
|
733
|
-
method: saved.method,
|
|
734
|
-
status: saved.status,
|
|
735
|
-
version: saved.version,
|
|
736
|
-
updatedAt: saved.updatedAt,
|
|
737
|
-
},
|
|
738
|
-
workbenchCloud: remote,
|
|
739
|
-
}, parsed, io, () => `Connected ${formatAuthTarget(saved)} ${saved.method} auth v${saved.version}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
|
|
740
|
-
}
|
|
741
|
-
if (subcommand === "disconnect") {
|
|
742
|
-
const targetRaw = requiredPositional(parsed, 2, "workbench auth disconnect requires ADAPTER[/SLOT].");
|
|
743
|
-
const target = parseAuthTarget(targetRaw, authProfileFlag(parsed));
|
|
744
|
-
await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).disconnect(target);
|
|
745
|
-
const remote = await deleteAdapterConnectionRemote(target, parsed);
|
|
746
|
-
return emitResult("workbench.cli.auth-disconnect.v1", {
|
|
747
|
-
localAdapter: {
|
|
748
|
-
adapter: target.adapterId,
|
|
749
|
-
...(target.slot ? { slot: target.slot } : {}),
|
|
750
|
-
profile: target.profile,
|
|
751
|
-
status: "disconnected",
|
|
752
|
-
},
|
|
753
|
-
workbenchCloud: remote,
|
|
754
|
-
}, parsed, io, () => `Disconnected ${formatAuthTarget(target)}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
|
|
755
|
-
}
|
|
756
|
-
throw new WorkbenchUserError(`Unsupported auth command: ${subcommand}`);
|
|
601
|
+
async function handleAdapterLogin(provider, parsed, io) {
|
|
602
|
+
const target = parseAuthTarget(provider, authProfileFlag(parsed));
|
|
603
|
+
const method = authMethod(parsed, target.adapterId);
|
|
604
|
+
const bundle = await collectAdapterAuthBundle({
|
|
605
|
+
target,
|
|
606
|
+
method,
|
|
607
|
+
profileRoot: path.resolve(stringFlag(parsed, "profile-root") ?? os.homedir()),
|
|
608
|
+
});
|
|
609
|
+
const saved = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).put(bundle);
|
|
610
|
+
const remote = await uploadAdapterConnection(saved, parsed);
|
|
611
|
+
return emitResult("workbench.cli.login.v1", {
|
|
612
|
+
provider: saved.adapterId,
|
|
613
|
+
localAdapter: {
|
|
614
|
+
adapter: saved.adapterId,
|
|
615
|
+
...(saved.slot ? { slot: saved.slot } : {}),
|
|
616
|
+
profile: saved.profile,
|
|
617
|
+
method: saved.method,
|
|
618
|
+
status: saved.status,
|
|
619
|
+
version: saved.version,
|
|
620
|
+
updatedAt: saved.updatedAt,
|
|
621
|
+
},
|
|
622
|
+
workbenchCloud: remote,
|
|
623
|
+
}, parsed, io, () => `Connected ${formatAuthTarget(saved)} ${saved.method} auth v${saved.version}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
|
|
624
|
+
}
|
|
625
|
+
async function handleAdapterLogout(provider, parsed, io) {
|
|
626
|
+
const target = parseAuthTarget(provider, authProfileFlag(parsed));
|
|
627
|
+
await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).disconnect(target);
|
|
628
|
+
const remote = await deleteAdapterConnectionRemote(target, parsed);
|
|
629
|
+
return emitResult("workbench.cli.logout.v1", {
|
|
630
|
+
provider: target.adapterId,
|
|
631
|
+
localAdapter: {
|
|
632
|
+
adapter: target.adapterId,
|
|
633
|
+
...(target.slot ? { slot: target.slot } : {}),
|
|
634
|
+
profile: target.profile,
|
|
635
|
+
status: "disconnected",
|
|
636
|
+
},
|
|
637
|
+
workbenchCloud: remote,
|
|
638
|
+
}, parsed, io, () => `Disconnected ${formatAuthTarget(target)}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
|
|
757
639
|
}
|
|
758
640
|
function getCliVersion() {
|
|
759
641
|
const manifest = require("../package.json");
|
|
@@ -763,19 +645,17 @@ function commandHelp(command) {
|
|
|
763
645
|
return COMMAND_HELP[command] ?? HELP;
|
|
764
646
|
}
|
|
765
647
|
function validateCommandFlags(parsed, command) {
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
}
|
|
769
|
-
const allowed = allowedFlagsForCommand(parsed, command);
|
|
648
|
+
const effectiveCommand = command ?? (parsed.flags.version === true ? "version" : "status");
|
|
649
|
+
const allowed = allowedFlagsForCommand(parsed, effectiveCommand);
|
|
770
650
|
if (!allowed) {
|
|
771
651
|
return;
|
|
772
652
|
}
|
|
773
|
-
const allowedSet = new Set(allowed);
|
|
653
|
+
const allowedSet = new Set(Object.keys(allowed));
|
|
774
654
|
for (const [name, value] of Object.entries(parsed.flags)) {
|
|
775
|
-
if (!allowedSet.has(name)
|
|
776
|
-
throw new WorkbenchUserError(`Unsupported flag --${name} for workbench ${
|
|
655
|
+
if (!allowedSet.has(name)) {
|
|
656
|
+
throw new WorkbenchUserError(`Unsupported flag --${name} for workbench ${effectiveCommand}.`);
|
|
777
657
|
}
|
|
778
|
-
validateFlagValue(name, value,
|
|
658
|
+
validateFlagValue(name, value, allowed[name]);
|
|
779
659
|
}
|
|
780
660
|
}
|
|
781
661
|
function allowedFlagsForCommand(parsed, command) {
|
|
@@ -784,25 +664,12 @@ function allowedFlagsForCommand(parsed, command) {
|
|
|
784
664
|
return COMMAND_FLAGS[command];
|
|
785
665
|
}
|
|
786
666
|
const subcommand = parsed.positionals[1] ?? subcommands.defaultSubcommand;
|
|
787
|
-
return subcommand ? subcommands.flags[subcommand] ??
|
|
667
|
+
return subcommand ? subcommands.flags[subcommand] ?? { ...COMMON_FLAGS, ...HELP_FLAG } : { ...COMMON_FLAGS, ...HELP_FLAG };
|
|
788
668
|
}
|
|
789
|
-
function validateFlagValue(name, value,
|
|
790
|
-
const kind = FLAG_DEFINITIONS[name];
|
|
669
|
+
function validateFlagValue(name, value, kind) {
|
|
791
670
|
if (!kind) {
|
|
792
671
|
return;
|
|
793
672
|
}
|
|
794
|
-
if (repeatString) {
|
|
795
|
-
if (Array.isArray(value)) {
|
|
796
|
-
if (value.some((entry) => !entry.trim())) {
|
|
797
|
-
throw new WorkbenchUserError(`--${name} requires a non-empty value.`);
|
|
798
|
-
}
|
|
799
|
-
return;
|
|
800
|
-
}
|
|
801
|
-
if (typeof value === "string" && value.trim()) {
|
|
802
|
-
return;
|
|
803
|
-
}
|
|
804
|
-
throw new WorkbenchUserError(`--${name} requires a non-empty value.`);
|
|
805
|
-
}
|
|
806
673
|
if (kind === "boolean") {
|
|
807
674
|
if (value !== true) {
|
|
808
675
|
throw new WorkbenchUserError(`--${name} does not accept a value.`);
|
|
@@ -826,11 +693,24 @@ function validateFlagValue(name, value, repeatString = false) {
|
|
|
826
693
|
}
|
|
827
694
|
}
|
|
828
695
|
const CONFIG_SCHEMA = "workbench.cli.config.v1";
|
|
696
|
+
const DEFAULT_WORKBENCH_CLOUD_BASE_URL = "https://v2.workbench.ai";
|
|
829
697
|
const API_REQUEST_MAX_ATTEMPTS = 3;
|
|
830
698
|
const API_REQUEST_GZIP_THRESHOLD_BYTES = 1024 * 1024;
|
|
699
|
+
const CLOUD_RUN_TIMEOUT_MS = 30 * 60 * 1000;
|
|
700
|
+
const CLOUD_RUN_POLL_INTERVAL_MS = 3000;
|
|
831
701
|
async function handleLogin(parsed, io) {
|
|
832
|
-
|
|
833
|
-
|
|
702
|
+
const provider = optionalPositional(parsed, 1);
|
|
703
|
+
if (provider) {
|
|
704
|
+
if (parsed.positionals.length > 2) {
|
|
705
|
+
throw new WorkbenchUserError("workbench login PROVIDER accepts only one provider argument.");
|
|
706
|
+
}
|
|
707
|
+
if (parsed.flags["start-only"] === true || parsed.flags.wait === true || parsed.flags.timeout !== undefined || parsed.flags["no-open"] === true) {
|
|
708
|
+
throw new WorkbenchCodedError("usage", "Workbench Cloud login flags do not apply to provider login.", {
|
|
709
|
+
remediation: `Run workbench login ${provider} --method ${authMethod(parsed, provider)}.`,
|
|
710
|
+
exitCode: 2,
|
|
711
|
+
});
|
|
712
|
+
}
|
|
713
|
+
return await handleAdapterLogin(provider, parsed, io);
|
|
834
714
|
}
|
|
835
715
|
if (parsed.flags["start-only"] === true && parsed.flags.wait === true) {
|
|
836
716
|
throw new WorkbenchCodedError("usage", "workbench login accepts only one of --start-only or --wait.", {
|
|
@@ -911,8 +791,12 @@ async function handleLogin(parsed, io) {
|
|
|
911
791
|
}, parsed, io, () => `Workbench Cloud: authenticated${username ? ` as ${username}` : ""}\nWorkbench API: ${baseUrl}`);
|
|
912
792
|
}
|
|
913
793
|
async function handleLogout(parsed, io) {
|
|
914
|
-
|
|
915
|
-
|
|
794
|
+
const provider = optionalPositional(parsed, 1);
|
|
795
|
+
if (provider) {
|
|
796
|
+
if (parsed.positionals.length > 2) {
|
|
797
|
+
throw new WorkbenchUserError("workbench logout PROVIDER accepts only one provider argument.");
|
|
798
|
+
}
|
|
799
|
+
return await handleAdapterLogout(provider, parsed, io);
|
|
916
800
|
}
|
|
917
801
|
const config = await loadConfig();
|
|
918
802
|
const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
|
|
@@ -950,36 +834,28 @@ async function handleLogout(parsed, io) {
|
|
|
950
834
|
`Logged out of Workbench${baseUrl ? ` (${baseUrl})` : ""}.`,
|
|
951
835
|
`Token: ${tokenPresent ? "present" : "absent"}; revoke ${revoke}; config ${configRemoved ? "removed" : "unchanged"}.`,
|
|
952
836
|
adapterAuthRetained
|
|
953
|
-
? "Local adapter auth records were retained; run workbench
|
|
837
|
+
? "Local adapter auth records were retained; run workbench logout PROVIDER to remove them."
|
|
954
838
|
: "No local adapter auth records remain.",
|
|
955
839
|
].join("\n"));
|
|
956
840
|
}
|
|
957
841
|
async function handleInstall(parsed, io) {
|
|
958
|
-
const
|
|
959
|
-
flag: "source",
|
|
960
|
-
usage: "workbench install requires --source SOURCE.",
|
|
961
|
-
remediation: "Run workbench install --source https://HOST/skills/OWNER/SKILL --agent codex.",
|
|
962
|
-
});
|
|
842
|
+
const sourceInput = requiredPositional(parsed, 1, "workbench install requires HANDLE_OR_URL.");
|
|
963
843
|
rejectExtraInput(parsed, {
|
|
964
|
-
maxPositionals:
|
|
965
|
-
message: "workbench install accepts
|
|
966
|
-
remediation: "Run workbench install
|
|
844
|
+
maxPositionals: 2,
|
|
845
|
+
message: "workbench install accepts one HANDLE_OR_URL argument.",
|
|
846
|
+
remediation: "Run workbench install OWNER/SKILL --to codex.",
|
|
967
847
|
});
|
|
968
|
-
|
|
969
|
-
throw new WorkbenchCodedError("install_target_required", "workbench install requires an explicit target.", {
|
|
970
|
-
remediation: "Run workbench install --source SOURCE --agent codex, workbench install --source SOURCE --agent claude, or workbench install --source SOURCE --local.",
|
|
971
|
-
exitCode: 2,
|
|
972
|
-
});
|
|
973
|
-
}
|
|
848
|
+
const source = await resolveWorkbenchInstallSourceInput(sourceInput);
|
|
974
849
|
const workbenchSource = parseWorkbenchInstallSource(source);
|
|
975
850
|
if (!workbenchSource) {
|
|
976
851
|
throw new WorkbenchCodedError("usage", "workbench install requires a Workbench Cloud source URL.", {
|
|
977
|
-
remediation: "Run workbench install
|
|
852
|
+
remediation: "Run workbench install OWNER/SKILL --to codex.",
|
|
978
853
|
exitCode: 2,
|
|
979
854
|
});
|
|
980
855
|
}
|
|
981
856
|
const snapshot = await fetchWorkbenchInstallSourceSnapshot(workbenchSource, source);
|
|
982
857
|
const sourceSummary = workbenchInstallSourceSummary(workbenchSource, snapshot);
|
|
858
|
+
const config = await loadConfig();
|
|
983
859
|
if (parsed.flags.list === true) {
|
|
984
860
|
return emitResult("workbench.cli.install.v1", {
|
|
985
861
|
source: sourceSummary,
|
|
@@ -992,9 +868,11 @@ async function handleInstall(parsed, io) {
|
|
|
992
868
|
...supportedInstallTargets().map((target) => ` ${target.agent}\t${target.destination}`),
|
|
993
869
|
].join("\n"));
|
|
994
870
|
}
|
|
871
|
+
const toTargets = stringsFlag(parsed, "to");
|
|
872
|
+
const selectedTargets = toTargets.length > 0 ? normalizeInstallTargetNames(toTargets) : await defaultInstallTargetNames(config);
|
|
995
873
|
const targets = resolveInstallTargets({
|
|
996
|
-
agents:
|
|
997
|
-
local:
|
|
874
|
+
agents: selectedTargets.filter((target) => target !== "local"),
|
|
875
|
+
local: selectedTargets.some((target) => target === "local"),
|
|
998
876
|
skillName: snapshot.name,
|
|
999
877
|
});
|
|
1000
878
|
const result = await installSnapshotToTargets({
|
|
@@ -1003,6 +881,9 @@ async function handleInstall(parsed, io) {
|
|
|
1003
881
|
overwrite: parsed.flags.yes === true,
|
|
1004
882
|
dryRun: parsed.flags["dry-run"] === true,
|
|
1005
883
|
});
|
|
884
|
+
if (toTargets.length > 0 && parsed.flags["dry-run"] !== true) {
|
|
885
|
+
await writeConfig({ ...config, installTargets: selectedTargets });
|
|
886
|
+
}
|
|
1006
887
|
return emitResult("workbench.cli.install.v1", {
|
|
1007
888
|
source: sourceSummary,
|
|
1008
889
|
result: result.result,
|
|
@@ -1016,6 +897,330 @@ async function handleInstall(parsed, io) {
|
|
|
1016
897
|
...result.targets.map((target) => ` ${target.agent}\t${target.previous}\t${target.destination}`),
|
|
1017
898
|
].join("\n"));
|
|
1018
899
|
}
|
|
900
|
+
async function handleCloudEval(parsed, io) {
|
|
901
|
+
const started = await startCloudExecution("eval", parsed);
|
|
902
|
+
const artifactIds = await artifactIdsByRunId(started.core, started.runs);
|
|
903
|
+
const failedRuns = started.runs.filter((run) => run.status === "failed" || run.status === "canceled");
|
|
904
|
+
if (failedRuns.length > 0) {
|
|
905
|
+
return emitEvalFailure(started.runs, failedRuns, artifactIds, parsed, io);
|
|
906
|
+
}
|
|
907
|
+
const deltas = await evalDeltas(started.core, started.runs);
|
|
908
|
+
const nextCommands = cloudEvalNextCommands(started.runs);
|
|
909
|
+
return emitResult("workbench.cli.eval.v1", {
|
|
910
|
+
result: started.runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
|
|
911
|
+
deltas: deltas,
|
|
912
|
+
nextCommands: nextCommands,
|
|
913
|
+
cloud: cloudExecutionSummary(started),
|
|
914
|
+
}, parsed, io, () => [
|
|
915
|
+
`Completed hosted eval on ${started.remote.url}.`,
|
|
916
|
+
started.runs.map(formatRun).join("\n"),
|
|
917
|
+
...deltas.map(formatEvalDelta),
|
|
918
|
+
...(nextCommands[0] ? [`next: ${nextCommands[0]}`] : []),
|
|
919
|
+
].filter(Boolean).join("\n"));
|
|
920
|
+
}
|
|
921
|
+
async function handleCloudImprove(parsed, io) {
|
|
922
|
+
const started = await startCloudExecution("improve", parsed);
|
|
923
|
+
const artifactIds = await artifactIdsByRunId(started.core, started.runs);
|
|
924
|
+
const failedRuns = started.runs.filter((run) => run.status === "failed" || run.status === "canceled");
|
|
925
|
+
if (failedRuns.length > 0) {
|
|
926
|
+
const first = failedRuns[0];
|
|
927
|
+
throw new WorkbenchCodedError("improve_failed", "Hosted improve failed; evidence was saved.", {
|
|
928
|
+
remediation: `Run workbench show ${first.id}.`,
|
|
929
|
+
subject: {
|
|
930
|
+
runIds: failedRuns.map((run) => run.id),
|
|
931
|
+
statuses: Object.fromEntries(failedRuns.map((run) => [run.id, run.status])),
|
|
932
|
+
},
|
|
933
|
+
exitCode: 1,
|
|
934
|
+
});
|
|
935
|
+
}
|
|
936
|
+
const switchedVersionId = await switchHostedImproveVersionIfPromoted(started);
|
|
937
|
+
const nextCommands = cloudImproveNextCommands(started.runs);
|
|
938
|
+
return emitResult("workbench.cli.improve.v1", {
|
|
939
|
+
result: started.runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
|
|
940
|
+
nextCommands: nextCommands,
|
|
941
|
+
cloud: cloudExecutionSummary(started),
|
|
942
|
+
...(switchedVersionId ? { switchedVersionId } : {}),
|
|
943
|
+
}, parsed, io, () => [
|
|
944
|
+
`Completed hosted improve on ${started.remote.url}.`,
|
|
945
|
+
started.runs.map(formatRun).join("\n"),
|
|
946
|
+
...(switchedVersionId ? [`Switched local source to ${switchedVersionId}.`] : []),
|
|
947
|
+
...(nextCommands[0] ? [`next: ${nextCommands[0]}`] : []),
|
|
948
|
+
].filter(Boolean).join("\n"));
|
|
949
|
+
}
|
|
950
|
+
async function defaultInstallTargetNames(config) {
|
|
951
|
+
if (config.installTargets && config.installTargets.length > 0) {
|
|
952
|
+
return config.installTargets;
|
|
953
|
+
}
|
|
954
|
+
const detected = [];
|
|
955
|
+
for (const target of supportedInstallTargets()) {
|
|
956
|
+
if (target.agent === "local") {
|
|
957
|
+
continue;
|
|
958
|
+
}
|
|
959
|
+
const home = path.dirname(path.dirname(target.destination));
|
|
960
|
+
if (await pathExists(home)) {
|
|
961
|
+
detected.push(target.agent);
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
return detected.length > 0 ? detected : ["local"];
|
|
965
|
+
}
|
|
966
|
+
function normalizeInstallTargetNames(values) {
|
|
967
|
+
const normalized = [];
|
|
968
|
+
for (const value of values) {
|
|
969
|
+
const target = value.trim().toLowerCase();
|
|
970
|
+
if (target !== "codex" && target !== "claude" && target !== "local") {
|
|
971
|
+
throw new WorkbenchCodedError("usage", `Unsupported install target: ${value}`, {
|
|
972
|
+
remediation: "Use --to codex, --to claude, or --to local.",
|
|
973
|
+
exitCode: 2,
|
|
974
|
+
});
|
|
975
|
+
}
|
|
976
|
+
normalized.push(target);
|
|
977
|
+
}
|
|
978
|
+
return [...new Set(normalized)];
|
|
979
|
+
}
|
|
980
|
+
async function pathExists(filePath) {
|
|
981
|
+
try {
|
|
982
|
+
await fs.access(filePath);
|
|
983
|
+
return true;
|
|
984
|
+
}
|
|
985
|
+
catch {
|
|
986
|
+
return false;
|
|
987
|
+
}
|
|
988
|
+
}
|
|
989
|
+
async function startCloudExecution(command, parsed) {
|
|
990
|
+
const root = dirFlag(parsed) ?? process.cwd();
|
|
991
|
+
const remote = await ensureCloudRemoteForExecution(root, parsed);
|
|
992
|
+
const source = parseWorkbenchInstallSource(remote.url);
|
|
993
|
+
if (!source) {
|
|
994
|
+
throw new WorkbenchCodedError("remote_invalid_url", `Workbench remote is not a Cloud skill URL: ${remote.url}`, {
|
|
995
|
+
remediation: "Run workbench publish to recreate the Workbench Cloud link.",
|
|
996
|
+
subject: { remote: remote.name, url: remote.url },
|
|
997
|
+
exitCode: 2,
|
|
998
|
+
});
|
|
999
|
+
}
|
|
1000
|
+
const token = await workbenchCloudToken({ baseUrl: source.baseUrl });
|
|
1001
|
+
if (!token) {
|
|
1002
|
+
throw new WorkbenchCodedError("auth_required", `workbench ${command} --cloud requires Workbench Cloud auth.`, {
|
|
1003
|
+
remediation: `Run workbench login --base-url ${source.baseUrl}.`,
|
|
1004
|
+
exitCode: 1,
|
|
1005
|
+
});
|
|
1006
|
+
}
|
|
1007
|
+
const core = { dir: root, authToken: token };
|
|
1008
|
+
const syncBefore = await syncWorkbenchRemote({ ...core, remote: remote.name });
|
|
1009
|
+
const startSnapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
1010
|
+
const skillId = await resolveCloudSkillId(source);
|
|
1011
|
+
const response = await apiRequest(`/api/workbench/skills/${encodeURIComponent(skillId)}${command === "improve" ? "/improve" : "/runs"}`, { method: "POST", body: cloudExecutionRequestBody(command, parsed) }, source.baseUrl);
|
|
1012
|
+
const runs = response.runs ?? [];
|
|
1013
|
+
if (runs.length === 0) {
|
|
1014
|
+
throw new WorkbenchCodedError("cloud_run_missing", `Workbench Cloud did not return a run for ${command}.`, {
|
|
1015
|
+
retryable: true,
|
|
1016
|
+
remediation: "Run workbench log --runs.",
|
|
1017
|
+
subject: { remote: remote.name, skillId },
|
|
1018
|
+
exitCode: 1,
|
|
1019
|
+
});
|
|
1020
|
+
}
|
|
1021
|
+
const initialSyncAfter = await syncWorkbenchRemote({ ...core, remote: remote.name });
|
|
1022
|
+
const completed = await waitForCloudRuns({
|
|
1023
|
+
core,
|
|
1024
|
+
remote,
|
|
1025
|
+
runs,
|
|
1026
|
+
initialSync: initialSyncAfter,
|
|
1027
|
+
});
|
|
1028
|
+
return {
|
|
1029
|
+
core,
|
|
1030
|
+
remote,
|
|
1031
|
+
skillId,
|
|
1032
|
+
runs: completed.runs,
|
|
1033
|
+
startVersionId: startSnapshot.status.currentVersionId ?? startSnapshot.refs.current,
|
|
1034
|
+
source,
|
|
1035
|
+
sync: {
|
|
1036
|
+
before: { pushed: syncBefore.pushed, pulled: syncBefore.pulled, upToDate: syncBefore.upToDate },
|
|
1037
|
+
after: { pushed: completed.sync.pushed, pulled: completed.sync.pulled, upToDate: completed.sync.upToDate },
|
|
1038
|
+
},
|
|
1039
|
+
};
|
|
1040
|
+
}
|
|
1041
|
+
async function waitForCloudRuns(input) {
|
|
1042
|
+
const runIds = input.runs
|
|
1043
|
+
.map((run) => run.id)
|
|
1044
|
+
.filter((id) => typeof id === "string" && id.length > 0);
|
|
1045
|
+
if (runIds.length === 0 || runIds.length !== input.runs.length) {
|
|
1046
|
+
throw new WorkbenchCodedError("cloud_run_missing", "Workbench Cloud did not return a run id.", {
|
|
1047
|
+
retryable: true,
|
|
1048
|
+
remediation: "Run workbench log --runs.",
|
|
1049
|
+
exitCode: 1,
|
|
1050
|
+
});
|
|
1051
|
+
}
|
|
1052
|
+
let sync = input.initialSync;
|
|
1053
|
+
const timeoutMs = positiveIntEnv("WORKBENCH_CLOUD_RUN_TIMEOUT_MS") ?? CLOUD_RUN_TIMEOUT_MS;
|
|
1054
|
+
const pollIntervalMs = positiveIntEnv("WORKBENCH_CLOUD_RUN_POLL_INTERVAL_MS") ?? CLOUD_RUN_POLL_INTERVAL_MS;
|
|
1055
|
+
const deadline = Date.now() + timeoutMs;
|
|
1056
|
+
while (true) {
|
|
1057
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(input.core);
|
|
1058
|
+
const runs = runIds
|
|
1059
|
+
.map((id) => snapshot.runs.find((entry) => entry.id === id))
|
|
1060
|
+
.filter((run) => Boolean(run));
|
|
1061
|
+
if (runs.length === runIds.length && runs.every(isTerminalRun)) {
|
|
1062
|
+
return { runs, sync };
|
|
1063
|
+
}
|
|
1064
|
+
if (Date.now() >= deadline) {
|
|
1065
|
+
throw new WorkbenchCodedError("cloud_run_pending", "Hosted Workbench run is still running.", {
|
|
1066
|
+
retryable: true,
|
|
1067
|
+
remediation: runIds[0] ? `Run workbench show ${runIds[0]}.` : "Run workbench log --runs.",
|
|
1068
|
+
subject: {
|
|
1069
|
+
runIds,
|
|
1070
|
+
statuses: Object.fromEntries(runs.map((run) => [run.id, run.status])),
|
|
1071
|
+
},
|
|
1072
|
+
exitCode: 1,
|
|
1073
|
+
});
|
|
1074
|
+
}
|
|
1075
|
+
await sleep(pollIntervalMs);
|
|
1076
|
+
sync = await syncWorkbenchRemote({ ...input.core, remote: input.remote.name });
|
|
1077
|
+
}
|
|
1078
|
+
}
|
|
1079
|
+
function isTerminalRun(run) {
|
|
1080
|
+
return run.status === "succeeded" || run.status === "failed" || run.status === "canceled";
|
|
1081
|
+
}
|
|
1082
|
+
async function switchHostedImproveVersionIfPromoted(started) {
|
|
1083
|
+
const outputVersionId = started.runs.find((run) => run.status === "succeeded" && run.outputVersionId)?.outputVersionId;
|
|
1084
|
+
if (!outputVersionId) {
|
|
1085
|
+
return undefined;
|
|
1086
|
+
}
|
|
1087
|
+
const refs = await fetchCloudObjectRefs(started);
|
|
1088
|
+
if (refs.current !== outputVersionId) {
|
|
1089
|
+
return undefined;
|
|
1090
|
+
}
|
|
1091
|
+
await listWorkbenchVersions(started.core);
|
|
1092
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(started.core);
|
|
1093
|
+
const currentVersionId = snapshot.status.currentVersionId ?? snapshot.refs.current;
|
|
1094
|
+
if (started.startVersionId && currentVersionId && currentVersionId !== started.startVersionId) {
|
|
1095
|
+
throw new WorkbenchCodedError("worktree_changed", "Local source changed while hosted improve was running; refusing to overwrite it.", {
|
|
1096
|
+
remediation: `Review workbench diff, then run workbench switch ${outputVersionId} when ready.`,
|
|
1097
|
+
subject: {
|
|
1098
|
+
startedFrom: started.startVersionId,
|
|
1099
|
+
current: currentVersionId,
|
|
1100
|
+
hostedVersion: outputVersionId,
|
|
1101
|
+
},
|
|
1102
|
+
exitCode: 1,
|
|
1103
|
+
});
|
|
1104
|
+
}
|
|
1105
|
+
const version = await switchWorkbenchVersion(outputVersionId, started.core);
|
|
1106
|
+
return version.id;
|
|
1107
|
+
}
|
|
1108
|
+
async function fetchCloudObjectRefs(started) {
|
|
1109
|
+
const response = await apiRequest(`/api/workbench/skills/${encodeURIComponent(started.skillId)}/objects`, {}, started.source.baseUrl);
|
|
1110
|
+
return response.objectPack?.refs ?? {};
|
|
1111
|
+
}
|
|
1112
|
+
async function ensureCloudRemoteForExecution(root, parsed) {
|
|
1113
|
+
const linked = await linkedCloudRemote(root);
|
|
1114
|
+
if (linked) {
|
|
1115
|
+
return linked;
|
|
1116
|
+
}
|
|
1117
|
+
const link = await cloudRemoteLinkTarget(root);
|
|
1118
|
+
const remote = await derivePublishCloudRemote(parsed, "workbench --cloud", link.name);
|
|
1119
|
+
const source = parseWorkbenchInstallSource(remote.url);
|
|
1120
|
+
if (!source) {
|
|
1121
|
+
throw new WorkbenchCodedError("remote_invalid_url", `Workbench remote is not a Cloud skill URL: ${remote.url}`, {
|
|
1122
|
+
remediation: "Run workbench publish to recreate the Workbench Cloud link.",
|
|
1123
|
+
subject: { remote: remote.name, url: remote.url },
|
|
1124
|
+
exitCode: 2,
|
|
1125
|
+
});
|
|
1126
|
+
}
|
|
1127
|
+
const token = await workbenchCloudToken({ baseUrl: source.baseUrl });
|
|
1128
|
+
if (!token) {
|
|
1129
|
+
throw new WorkbenchCodedError("auth_required", "workbench --cloud requires Workbench Cloud auth.", {
|
|
1130
|
+
remediation: `Run workbench login --base-url ${source.baseUrl}.`,
|
|
1131
|
+
exitCode: 1,
|
|
1132
|
+
});
|
|
1133
|
+
}
|
|
1134
|
+
const result = await addWorkbenchRemote(remote.name, remote.url, {
|
|
1135
|
+
dir: root,
|
|
1136
|
+
authToken: token,
|
|
1137
|
+
replace: link.replace,
|
|
1138
|
+
});
|
|
1139
|
+
return result.remote;
|
|
1140
|
+
}
|
|
1141
|
+
async function linkedCloudRemote(root) {
|
|
1142
|
+
return preferredCloudRemote(await inspectionRemotes(root)) ?? null;
|
|
1143
|
+
}
|
|
1144
|
+
async function inspectionRemotes(root) {
|
|
1145
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot({ dir: root }).catch((error) => {
|
|
1146
|
+
if (error instanceof WorkbenchCodedError || error instanceof WorkbenchUserError) {
|
|
1147
|
+
return null;
|
|
1148
|
+
}
|
|
1149
|
+
throw error;
|
|
1150
|
+
});
|
|
1151
|
+
return snapshot?.remotes ?? [];
|
|
1152
|
+
}
|
|
1153
|
+
async function cloudRemoteLinkTarget(root) {
|
|
1154
|
+
return cloudRemoteLinkTargetFromRemotes(await inspectionRemotes(root));
|
|
1155
|
+
}
|
|
1156
|
+
function cloudRemoteLinkTargetFromRemotes(remotes) {
|
|
1157
|
+
const existing = preferredCloudRemote(remotes);
|
|
1158
|
+
if (existing) {
|
|
1159
|
+
return { name: existing.name, replace: true, existing };
|
|
1160
|
+
}
|
|
1161
|
+
return { name: availableCloudRemoteName(remotes), replace: false };
|
|
1162
|
+
}
|
|
1163
|
+
function preferredCloudRemote(remotes) {
|
|
1164
|
+
const cloudRemotes = remotes.filter((remote) => remote.kind === "workbench-cloud");
|
|
1165
|
+
return cloudRemotes.find((remote) => remote.name === "cloud") ?? cloudRemotes[0];
|
|
1166
|
+
}
|
|
1167
|
+
function availableCloudRemoteName(remotes) {
|
|
1168
|
+
const names = new Set(remotes.map((remote) => remote.name));
|
|
1169
|
+
if (!names.has("cloud")) {
|
|
1170
|
+
return "cloud";
|
|
1171
|
+
}
|
|
1172
|
+
for (let index = 1;; index += 1) {
|
|
1173
|
+
const name = `cloud-${index}`;
|
|
1174
|
+
if (!names.has(name)) {
|
|
1175
|
+
return name;
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
}
|
|
1179
|
+
async function resolveCloudSkillId(source) {
|
|
1180
|
+
const listed = await apiRequest("/api/workbench/skills", {}, source.baseUrl);
|
|
1181
|
+
const skill = listed.skills?.find((entry) => entry.ownerSlug === source.owner && entry.name === source.skill);
|
|
1182
|
+
if (!skill?.id) {
|
|
1183
|
+
throw new WorkbenchCodedError("remote_not_found", `Workbench Cloud skill not found: ${source.owner}/${source.skill}`, {
|
|
1184
|
+
remediation: "Run workbench publish.",
|
|
1185
|
+
subject: { owner: source.owner, skill: source.skill },
|
|
1186
|
+
exitCode: 1,
|
|
1187
|
+
});
|
|
1188
|
+
}
|
|
1189
|
+
return skill.id;
|
|
1190
|
+
}
|
|
1191
|
+
function cloudExecutionRequestBody(command, parsed) {
|
|
1192
|
+
return {
|
|
1193
|
+
version: optionalPositional(parsed, 1),
|
|
1194
|
+
skill: stringFlag(parsed, "skills"),
|
|
1195
|
+
agent: stringFlag(parsed, "agents"),
|
|
1196
|
+
samples: intFlag(parsed, "samples"),
|
|
1197
|
+
...(command === "improve" ? { budget: intFlag(parsed, "budget") } : {}),
|
|
1198
|
+
};
|
|
1199
|
+
}
|
|
1200
|
+
function cloudEvalNextCommands(runs) {
|
|
1201
|
+
return cloudExecutionNextCommands(runs, "workbench publish");
|
|
1202
|
+
}
|
|
1203
|
+
function cloudImproveNextCommands(runs) {
|
|
1204
|
+
return cloudExecutionNextCommands(runs, "workbench eval");
|
|
1205
|
+
}
|
|
1206
|
+
function cloudExecutionNextCommands(runs, successCommand) {
|
|
1207
|
+
const first = runs[0];
|
|
1208
|
+
if (!first) {
|
|
1209
|
+
return ["workbench log --runs"];
|
|
1210
|
+
}
|
|
1211
|
+
if (first.status === "running" || first.status === "failed" || first.status === "canceled") {
|
|
1212
|
+
return [`workbench show ${first.id}`];
|
|
1213
|
+
}
|
|
1214
|
+
return [successCommand];
|
|
1215
|
+
}
|
|
1216
|
+
function cloudExecutionSummary(started) {
|
|
1217
|
+
return {
|
|
1218
|
+
remote: started.remote.name,
|
|
1219
|
+
url: started.remote.url,
|
|
1220
|
+
skillId: started.skillId,
|
|
1221
|
+
sync: started.sync,
|
|
1222
|
+
};
|
|
1223
|
+
}
|
|
1019
1224
|
function workbenchInstallSourceSummary(source, snapshot) {
|
|
1020
1225
|
const installUrl = `${source.baseUrl}/skills/${encodeURIComponent(source.owner)}/${encodeURIComponent(source.skill)}`;
|
|
1021
1226
|
return {
|
|
@@ -1175,6 +1380,7 @@ async function loadConfig() {
|
|
|
1175
1380
|
...(typeof parsed.baseUrl === "string" ? { baseUrl: normalizeBaseUrl(parsed.baseUrl) } : {}),
|
|
1176
1381
|
...(typeof parsed.accessToken === "string" ? { accessToken: parsed.accessToken } : {}),
|
|
1177
1382
|
...(typeof parsed.username === "string" ? { username: parsed.username } : {}),
|
|
1383
|
+
...(Array.isArray(parsed.installTargets) ? { installTargets: normalizeInstallTargetNames(parsed.installTargets.flatMap((entry) => typeof entry === "string" ? [entry] : [])) } : {}),
|
|
1178
1384
|
};
|
|
1179
1385
|
}
|
|
1180
1386
|
// Single resolver for the Workbench Cloud token used by every authenticated
|
|
@@ -1428,8 +1634,11 @@ function encodeJsonRequestBody(body) {
|
|
|
1428
1634
|
if (Buffer.byteLength(text) < API_REQUEST_GZIP_THRESHOLD_BYTES) {
|
|
1429
1635
|
return { body: text, headers: { "content-type": "application/json" } };
|
|
1430
1636
|
}
|
|
1637
|
+
const compressed = gzipSync(text);
|
|
1638
|
+
const compressedBody = new ArrayBuffer(compressed.byteLength);
|
|
1639
|
+
new Uint8Array(compressedBody).set(compressed);
|
|
1431
1640
|
return {
|
|
1432
|
-
body:
|
|
1641
|
+
body: compressedBody,
|
|
1433
1642
|
headers: {
|
|
1434
1643
|
"content-encoding": "gzip",
|
|
1435
1644
|
"content-type": "application/json",
|
|
@@ -1535,6 +1744,14 @@ function errorMessage(error) {
|
|
|
1535
1744
|
function sleep(ms) {
|
|
1536
1745
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1537
1746
|
}
|
|
1747
|
+
function positiveIntEnv(name) {
|
|
1748
|
+
const raw = process.env[name]?.trim();
|
|
1749
|
+
if (!raw) {
|
|
1750
|
+
return undefined;
|
|
1751
|
+
}
|
|
1752
|
+
const value = Number(raw);
|
|
1753
|
+
return Number.isSafeInteger(value) && value > 0 ? value : undefined;
|
|
1754
|
+
}
|
|
1538
1755
|
async function openBrowser(url) {
|
|
1539
1756
|
const command = process.platform === "darwin"
|
|
1540
1757
|
? "open"
|
|
@@ -1870,6 +2087,17 @@ function parseArgs(argv) {
|
|
|
1870
2087
|
addFlag(flags, "version", true);
|
|
1871
2088
|
continue;
|
|
1872
2089
|
}
|
|
2090
|
+
if (arg === "-n") {
|
|
2091
|
+
const value = argv[index + 1];
|
|
2092
|
+
if (value && !value.startsWith("-")) {
|
|
2093
|
+
index += 1;
|
|
2094
|
+
addFlag(flags, "samples", value);
|
|
2095
|
+
}
|
|
2096
|
+
else {
|
|
2097
|
+
addFlag(flags, "samples", true);
|
|
2098
|
+
}
|
|
2099
|
+
continue;
|
|
2100
|
+
}
|
|
1873
2101
|
if (!arg.startsWith("--") || arg === "--") {
|
|
1874
2102
|
positionals.push(arg);
|
|
1875
2103
|
continue;
|
|
@@ -1877,7 +2105,9 @@ function parseArgs(argv) {
|
|
|
1877
2105
|
const eq = arg.indexOf("=");
|
|
1878
2106
|
const name = eq === -1 ? arg.slice(2) : arg.slice(2, eq);
|
|
1879
2107
|
const value = eq === -1 ? argv[index + 1] : arg.slice(eq + 1);
|
|
1880
|
-
|
|
2108
|
+
const flagSpec = flagSpecForParsedPrefix(positionals, flags);
|
|
2109
|
+
const kind = flagSpec?.[name];
|
|
2110
|
+
if (eq === -1 && kind === "boolean") {
|
|
1881
2111
|
addFlag(flags, name, true);
|
|
1882
2112
|
}
|
|
1883
2113
|
else if (eq === -1 && value && !value.startsWith("-")) {
|
|
@@ -1890,8 +2120,12 @@ function parseArgs(argv) {
|
|
|
1890
2120
|
}
|
|
1891
2121
|
return { positionals, flags };
|
|
1892
2122
|
}
|
|
2123
|
+
function flagSpecForParsedPrefix(positionals, flags) {
|
|
2124
|
+
const command = positionals[0] ?? (flags.version === true ? "version" : "status");
|
|
2125
|
+
return allowedFlagsForCommand({ positionals: [...positionals], flags: {} }, command);
|
|
2126
|
+
}
|
|
1893
2127
|
function addFlag(flags, name, value) {
|
|
1894
|
-
if (name === "with") {
|
|
2128
|
+
if (name === "with" || name === "to") {
|
|
1895
2129
|
const existing = flags[name];
|
|
1896
2130
|
flags[name] = Array.isArray(existing)
|
|
1897
2131
|
? [...existing, String(value)]
|
|
@@ -1900,15 +2134,6 @@ function addFlag(flags, name, value) {
|
|
|
1900
2134
|
: [String(existing), String(value)];
|
|
1901
2135
|
return;
|
|
1902
2136
|
}
|
|
1903
|
-
if (name === "agent" || name === "skill") {
|
|
1904
|
-
const existing = flags[name];
|
|
1905
|
-
flags[name] = Array.isArray(existing)
|
|
1906
|
-
? [...existing, String(value)]
|
|
1907
|
-
: existing === undefined
|
|
1908
|
-
? String(value)
|
|
1909
|
-
: [String(existing), String(value)];
|
|
1910
|
-
return;
|
|
1911
|
-
}
|
|
1912
2137
|
flags[name] = value;
|
|
1913
2138
|
}
|
|
1914
2139
|
function dirFlag(parsed) {
|
|
@@ -1972,14 +2197,158 @@ function rejectExtraInput(parsed, input) {
|
|
|
1972
2197
|
exitCode: 2,
|
|
1973
2198
|
});
|
|
1974
2199
|
}
|
|
1975
|
-
function
|
|
1976
|
-
|
|
2200
|
+
async function defaultDiffRange(core) {
|
|
2201
|
+
await listWorkbenchVersions(core);
|
|
2202
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2203
|
+
const currentId = snapshot.status.currentVersionId ?? snapshot.refs.current;
|
|
2204
|
+
const current = snapshot.versions.find((version) => version.id === currentId);
|
|
2205
|
+
if (!current) {
|
|
2206
|
+
throw new WorkbenchCodedError("version_not_found", "Current Workbench version was not found.", {
|
|
2207
|
+
remediation: "Run workbench log --versions.",
|
|
2208
|
+
exitCode: 1,
|
|
2209
|
+
});
|
|
2210
|
+
}
|
|
2211
|
+
const parent = current.parentIds[0];
|
|
2212
|
+
return parent ? `${parent}..${current.id}` : `${current.id}..${current.id}`;
|
|
2213
|
+
}
|
|
2214
|
+
function parsePublishVisibilityFlags(parsed) {
|
|
2215
|
+
const selected = [
|
|
2216
|
+
parsed.flags.private === true ? "private" : undefined,
|
|
2217
|
+
parsed.flags.team === true ? "internal" : undefined,
|
|
2218
|
+
parsed.flags.public === true ? "public" : undefined,
|
|
2219
|
+
].filter((value) => Boolean(value));
|
|
2220
|
+
if (selected.length > 1) {
|
|
2221
|
+
throw new WorkbenchCodedError("usage", "workbench publish accepts only one visibility flag.", {
|
|
2222
|
+
remediation: "Run workbench publish --private, workbench publish --team, or workbench publish --public.",
|
|
2223
|
+
exitCode: 2,
|
|
2224
|
+
});
|
|
2225
|
+
}
|
|
2226
|
+
return selected[0];
|
|
2227
|
+
}
|
|
2228
|
+
async function previewPublishWithDerivedRemote(parsed) {
|
|
2229
|
+
const root = path.resolve(dirFlag(parsed) ?? process.cwd());
|
|
2230
|
+
const core = await coreOptions(parsed);
|
|
2231
|
+
await listWorkbenchVersions(core);
|
|
2232
|
+
const reconciledSnapshot = await createWorkbenchReadOnlyInspectionSnapshot({ dir: root });
|
|
2233
|
+
const link = cloudRemoteLinkTargetFromRemotes(reconciledSnapshot.remotes);
|
|
2234
|
+
if (link.existing) {
|
|
1977
2235
|
return undefined;
|
|
1978
2236
|
}
|
|
1979
|
-
|
|
1980
|
-
|
|
2237
|
+
const remote = await derivePublishCloudRemote(parsed, "workbench publish", link.name);
|
|
2238
|
+
const requestedVersion = optionalPositional(parsed, 1);
|
|
2239
|
+
const versionId = requestedVersion && requestedVersion !== "current"
|
|
2240
|
+
? requestedVersion
|
|
2241
|
+
: reconciledSnapshot.status.currentVersionId ?? reconciledSnapshot.refs.current;
|
|
2242
|
+
const version = reconciledSnapshot.versions.find((entry) => entry.id === versionId);
|
|
2243
|
+
if (!version) {
|
|
2244
|
+
throw new WorkbenchCodedError("version_not_found", `Version not found: ${requestedVersion ?? "current"}`, {
|
|
2245
|
+
remediation: "Run workbench log --versions.",
|
|
2246
|
+
subject: { version: requestedVersion ?? "current" },
|
|
2247
|
+
exitCode: 1,
|
|
2248
|
+
});
|
|
2249
|
+
}
|
|
2250
|
+
return {
|
|
2251
|
+
remote,
|
|
2252
|
+
version,
|
|
2253
|
+
visibility: parsePublishVisibilityFlags(parsed) ?? "private",
|
|
2254
|
+
installHandle: installHandleFromCloudRemote(remote),
|
|
2255
|
+
installUrl: remote.url,
|
|
2256
|
+
pinnedInstallUrl: `${remote.url}/releases/${encodeURIComponent(version.id)}`,
|
|
2257
|
+
};
|
|
2258
|
+
}
|
|
2259
|
+
async function ensurePublishRemote(parsed) {
|
|
2260
|
+
const core = await coreOptions(parsed);
|
|
2261
|
+
const root = path.resolve(dirFlag(parsed) ?? process.cwd());
|
|
2262
|
+
const link = await cloudRemoteLinkTarget(root);
|
|
2263
|
+
const override = stringFlag(parsed, "as");
|
|
2264
|
+
if (override) {
|
|
2265
|
+
const remote = await derivePublishCloudRemote(parsed, "workbench publish", link.name);
|
|
2266
|
+
const result = await addWorkbenchRemote(remote.name, remote.url, { ...core, replace: link.replace });
|
|
2267
|
+
return result.remote.name;
|
|
2268
|
+
}
|
|
2269
|
+
if (link.existing) {
|
|
2270
|
+
return link.existing.name;
|
|
2271
|
+
}
|
|
2272
|
+
const remote = await derivePublishCloudRemote(parsed, "workbench publish", link.name);
|
|
2273
|
+
const result = await addWorkbenchRemote(remote.name, remote.url, core);
|
|
2274
|
+
return result.remote.name;
|
|
2275
|
+
}
|
|
2276
|
+
async function derivePublishCloudRemote(parsed, action = "workbench publish", name = "cloud") {
|
|
2277
|
+
const config = await loadConfig();
|
|
2278
|
+
const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl }) ?? DEFAULT_WORKBENCH_CLOUD_BASE_URL;
|
|
2279
|
+
const override = stringFlag(parsed, "as");
|
|
2280
|
+
const handle = override ? parseOwnerSkillHandle(override) : derivedOwnerSkillHandle(parsed, config, action);
|
|
2281
|
+
const url = `${baseUrl}/skills/${encodeURIComponent(handle.owner)}/${encodeURIComponent(handle.skill)}`;
|
|
2282
|
+
return { name, kind: "workbench-cloud", url };
|
|
2283
|
+
}
|
|
2284
|
+
function installHandleFromCloudRemote(remote) {
|
|
2285
|
+
const source = parseWorkbenchInstallSource(remote.url);
|
|
2286
|
+
if (!source) {
|
|
2287
|
+
throw new WorkbenchCodedError("remote_invalid_url", `Workbench remote is not a Cloud skill URL: ${remote.url}`, {
|
|
2288
|
+
remediation: "Run workbench publish to recreate the Workbench Cloud link.",
|
|
2289
|
+
subject: { remote: remote.name, url: remote.url },
|
|
2290
|
+
exitCode: 2,
|
|
2291
|
+
});
|
|
2292
|
+
}
|
|
2293
|
+
return `${source.owner}/${source.skill}`;
|
|
2294
|
+
}
|
|
2295
|
+
function parseOwnerSkillHandle(input) {
|
|
2296
|
+
const handle = normalizedOwnerSkillHandle(input);
|
|
2297
|
+
if (!handle) {
|
|
2298
|
+
throw new WorkbenchCodedError("usage", "workbench publish --as expects OWNER/SKILL.", {
|
|
2299
|
+
remediation: "Run workbench publish --as OWNER/SKILL.",
|
|
2300
|
+
exitCode: 2,
|
|
2301
|
+
});
|
|
2302
|
+
}
|
|
2303
|
+
return handle;
|
|
2304
|
+
}
|
|
2305
|
+
function derivedOwnerSkillHandle(parsed, config, action) {
|
|
2306
|
+
const owner = config.username?.trim();
|
|
2307
|
+
if (!owner) {
|
|
2308
|
+
throw new WorkbenchCodedError("auth_required", `${action} needs a logged-in Workbench Cloud username before it can derive OWNER/SKILL.`, {
|
|
2309
|
+
remediation: "Run workbench login.",
|
|
2310
|
+
exitCode: 1,
|
|
2311
|
+
});
|
|
2312
|
+
}
|
|
2313
|
+
const root = path.resolve(dirFlag(parsed) ?? process.cwd());
|
|
2314
|
+
const handle = normalizeOwnerSkillHandle(owner, path.basename(root));
|
|
2315
|
+
if (!handle.owner || !handle.skill) {
|
|
2316
|
+
throw new WorkbenchCodedError("usage", `${action} could not derive a valid OWNER/SKILL handle.`, {
|
|
2317
|
+
remediation: `Run ${action} --as OWNER/SKILL.`,
|
|
2318
|
+
subject: { owner, skill: path.basename(root) },
|
|
2319
|
+
exitCode: 2,
|
|
2320
|
+
});
|
|
2321
|
+
}
|
|
2322
|
+
return handle;
|
|
2323
|
+
}
|
|
2324
|
+
async function resolveWorkbenchInstallSourceInput(input) {
|
|
2325
|
+
if (/^https?:\/\//u.test(input)) {
|
|
2326
|
+
return input;
|
|
2327
|
+
}
|
|
2328
|
+
const handle = normalizedOwnerSkillHandle(input);
|
|
2329
|
+
if (!handle) {
|
|
2330
|
+
throw new WorkbenchCodedError("usage", "workbench install expects OWNER/SKILL or a Workbench Cloud skill URL.", {
|
|
2331
|
+
remediation: "Run workbench install OWNER/SKILL --to codex.",
|
|
2332
|
+
exitCode: 2,
|
|
2333
|
+
});
|
|
1981
2334
|
}
|
|
1982
|
-
|
|
2335
|
+
const config = await loadConfig();
|
|
2336
|
+
const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl }) ?? DEFAULT_WORKBENCH_CLOUD_BASE_URL;
|
|
2337
|
+
return `${baseUrl}/skills/${encodeURIComponent(handle.owner)}/${encodeURIComponent(handle.skill)}`;
|
|
2338
|
+
}
|
|
2339
|
+
function normalizedOwnerSkillHandle(value) {
|
|
2340
|
+
const parts = value.trim().split("/");
|
|
2341
|
+
if (parts.length !== 2) {
|
|
2342
|
+
return null;
|
|
2343
|
+
}
|
|
2344
|
+
const handle = normalizeOwnerSkillHandle(parts[0] ?? "", parts[1] ?? "");
|
|
2345
|
+
return handle.owner && handle.skill ? handle : null;
|
|
2346
|
+
}
|
|
2347
|
+
function normalizeOwnerSkillHandle(owner, skill) {
|
|
2348
|
+
return {
|
|
2349
|
+
owner: normalizeWorkbenchSkillName(owner),
|
|
2350
|
+
skill: normalizeWorkbenchSkillName(skill),
|
|
2351
|
+
};
|
|
1983
2352
|
}
|
|
1984
2353
|
function parseWithFlags(parsed) {
|
|
1985
2354
|
const raw = parsed.flags.with;
|
|
@@ -2039,7 +2408,7 @@ function emitEvalFailure(runs, failedRuns, artifactIds, parsed, io) {
|
|
|
2039
2408
|
io.stdout.write([
|
|
2040
2409
|
"Eval failed; evidence was saved.",
|
|
2041
2410
|
...failedRuns.map(formatRun),
|
|
2042
|
-
...(nextCommands
|
|
2411
|
+
...(nextCommands[0] ? [`next: ${nextCommands[0]}`] : []),
|
|
2043
2412
|
].join("\n") + "\n");
|
|
2044
2413
|
return 1;
|
|
2045
2414
|
}
|
|
@@ -2075,14 +2444,13 @@ function runFailureSummary(run, artifactIds) {
|
|
|
2075
2444
|
function evalFailureNextCommands(failedRuns) {
|
|
2076
2445
|
const first = failedRuns[0];
|
|
2077
2446
|
if (!first) {
|
|
2078
|
-
return ["workbench
|
|
2447
|
+
return ["workbench log --runs"];
|
|
2079
2448
|
}
|
|
2080
|
-
const traceId = first.traceIds[0];
|
|
2081
2449
|
return [
|
|
2082
|
-
|
|
2083
|
-
`workbench
|
|
2084
|
-
|
|
2085
|
-
`workbench improve --
|
|
2450
|
+
`workbench show ${first.id}`,
|
|
2451
|
+
`workbench show ${first.id}:stderr.log`,
|
|
2452
|
+
`workbench case add ${first.id}`,
|
|
2453
|
+
`workbench improve --agents ${first.agentName} --budget 1 -n 1`,
|
|
2086
2454
|
];
|
|
2087
2455
|
}
|
|
2088
2456
|
function output(value, parsed, io, text) {
|
|
@@ -2091,7 +2459,7 @@ function output(value, parsed, io, text) {
|
|
|
2091
2459
|
function commandSchema(parsed) {
|
|
2092
2460
|
const command = parsed.positionals[0] ?? "result";
|
|
2093
2461
|
const subcommand = parsed.positionals[1];
|
|
2094
|
-
const suffix = ["
|
|
2462
|
+
const suffix = ["agent", "case"].includes(command) && subcommand
|
|
2095
2463
|
? `${command}-${subcommand}`
|
|
2096
2464
|
: command;
|
|
2097
2465
|
return `workbench.cli.${suffix}.v1`;
|
|
@@ -2116,6 +2484,126 @@ async function workbenchCliAuthStatus() {
|
|
|
2116
2484
|
})),
|
|
2117
2485
|
};
|
|
2118
2486
|
}
|
|
2487
|
+
function formatLogEntry(entry) {
|
|
2488
|
+
if (entry.kind === "version") {
|
|
2489
|
+
return `${entry.createdAt}\tversion\t${entry.id}\tfiles=${entry.fileCount}\t${entry.message}`;
|
|
2490
|
+
}
|
|
2491
|
+
const score = entry.score === undefined ? "n/a" : entry.score.toFixed(3);
|
|
2492
|
+
return `${entry.createdAt}\trun\t${entry.id}\t${entry.status}\tversion=${entry.versionId}\tskill=${entry.skillName}\tagent=${entry.agentName}\tscore=${score}`;
|
|
2493
|
+
}
|
|
2494
|
+
function splitShowRef(ref) {
|
|
2495
|
+
const index = ref.indexOf(":");
|
|
2496
|
+
if (index === -1) {
|
|
2497
|
+
return [ref, null];
|
|
2498
|
+
}
|
|
2499
|
+
return [ref.slice(0, index), ref.slice(index + 1)];
|
|
2500
|
+
}
|
|
2501
|
+
async function fileForRunOrJobRef(core, objectRef, requestedPath) {
|
|
2502
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2503
|
+
const run = snapshot.runs.find((entry) => entry.id === objectRef);
|
|
2504
|
+
const job = snapshot.jobs.find((entry) => entry.id === objectRef);
|
|
2505
|
+
if (!run && !job) {
|
|
2506
|
+
return null;
|
|
2507
|
+
}
|
|
2508
|
+
const traceIds = run?.traceIds ?? job?.traceIds ?? [];
|
|
2509
|
+
const traces = snapshot.traces.filter((trace) => traceIds.includes(trace.id));
|
|
2510
|
+
for (const trace of traces) {
|
|
2511
|
+
const file = findShowFile(trace.files, requestedPath);
|
|
2512
|
+
if (file) {
|
|
2513
|
+
return file;
|
|
2514
|
+
}
|
|
2515
|
+
}
|
|
2516
|
+
throw new WorkbenchCodedError("ref_not_found", `File not found in ${objectRef}: ${requestedPath}`, {
|
|
2517
|
+
remediation: `Run workbench show ${objectRef}.`,
|
|
2518
|
+
subject: { ref: objectRef, path: requestedPath },
|
|
2519
|
+
exitCode: 1,
|
|
2520
|
+
});
|
|
2521
|
+
}
|
|
2522
|
+
function evidenceDetailsForRunOrJob(snapshot, ref) {
|
|
2523
|
+
const run = snapshot.runs.find((entry) => entry.id === ref);
|
|
2524
|
+
const job = snapshot.jobs.find((entry) => entry.id === ref);
|
|
2525
|
+
const jobs = run
|
|
2526
|
+
? snapshot.jobs.filter((entry) => entry.runId === run.id)
|
|
2527
|
+
: job ? [job] : [];
|
|
2528
|
+
return jobs.flatMap((entry) => {
|
|
2529
|
+
const detail = workbenchJobEvidenceForSnapshot(snapshot, {
|
|
2530
|
+
runId: entry.runId,
|
|
2531
|
+
jobId: entry.id,
|
|
2532
|
+
});
|
|
2533
|
+
return detail ? [detail] : [];
|
|
2534
|
+
}).filter((detail) => detail.executions.some((execution) => execution.sessions.length > 0 ||
|
|
2535
|
+
execution.trace.spans.length > 0 ||
|
|
2536
|
+
execution.trace.events.length > 0 ||
|
|
2537
|
+
execution.trace.summaries.length > 0));
|
|
2538
|
+
}
|
|
2539
|
+
function findShowFile(files, requestedPath) {
|
|
2540
|
+
const normalized = requestedPath.replace(/\\/gu, "/");
|
|
2541
|
+
return files.find((file) => file.path === normalized) ??
|
|
2542
|
+
files.find((file) => file.path.endsWith(`/${normalized}`)) ??
|
|
2543
|
+
files.find((file) => path.basename(file.path) === normalized) ??
|
|
2544
|
+
null;
|
|
2545
|
+
}
|
|
2546
|
+
function fileListing(kind, id, files) {
|
|
2547
|
+
return {
|
|
2548
|
+
kind,
|
|
2549
|
+
id,
|
|
2550
|
+
fileCount: files.length,
|
|
2551
|
+
files: files.map(fileSummary),
|
|
2552
|
+
};
|
|
2553
|
+
}
|
|
2554
|
+
function formatFileListing(kind, id, files) {
|
|
2555
|
+
return [`${kind}\t${id}\tfiles=${files.length}`, ...files.map((file) => file.path)].join("\n");
|
|
2556
|
+
}
|
|
2557
|
+
async function traceIdForCaseSource(core, ref) {
|
|
2558
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2559
|
+
const trace = snapshot.traces.find((entry) => entry.id === ref);
|
|
2560
|
+
if (trace) {
|
|
2561
|
+
return trace.id;
|
|
2562
|
+
}
|
|
2563
|
+
const run = snapshot.runs.find((entry) => entry.id === ref);
|
|
2564
|
+
const job = snapshot.jobs.find((entry) => entry.id === ref);
|
|
2565
|
+
const traceId = run?.traceIds[0] ?? job?.traceIds[0];
|
|
2566
|
+
if (traceId) {
|
|
2567
|
+
return traceId;
|
|
2568
|
+
}
|
|
2569
|
+
throw new WorkbenchCodedError("ref_not_found", `Run, job, or trace not found: ${ref}`, {
|
|
2570
|
+
remediation: "Run workbench log, then workbench case add RUN_ID.",
|
|
2571
|
+
subject: { ref },
|
|
2572
|
+
exitCode: 1,
|
|
2573
|
+
});
|
|
2574
|
+
}
|
|
2575
|
+
async function evalDeltas(core, runs) {
|
|
2576
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2577
|
+
return runs.map((run) => {
|
|
2578
|
+
const previous = snapshot.runs
|
|
2579
|
+
.filter((candidate) => candidate.id !== run.id &&
|
|
2580
|
+
candidate.skillName === run.skillName &&
|
|
2581
|
+
candidate.agentName === run.agentName &&
|
|
2582
|
+
typeof candidate.score === "number" &&
|
|
2583
|
+
candidate.createdAt < run.createdAt)
|
|
2584
|
+
.sort((left, right) => right.createdAt.localeCompare(left.createdAt))[0];
|
|
2585
|
+
return {
|
|
2586
|
+
runId: run.id,
|
|
2587
|
+
versionId: run.versionId,
|
|
2588
|
+
skillName: run.skillName,
|
|
2589
|
+
agentName: run.agentName,
|
|
2590
|
+
...(run.score !== undefined ? { score: run.score } : {}),
|
|
2591
|
+
...(previous?.score !== undefined ? { previousScore: previous.score } : {}),
|
|
2592
|
+
...(run.score !== undefined && previous?.score !== undefined ? { delta: run.score - previous.score } : {}),
|
|
2593
|
+
};
|
|
2594
|
+
});
|
|
2595
|
+
}
|
|
2596
|
+
function formatEvalDelta(delta) {
|
|
2597
|
+
const score = delta.score === undefined ? "n/a" : delta.score.toFixed(3);
|
|
2598
|
+
if (delta.previousScore === undefined || delta.delta === undefined) {
|
|
2599
|
+
return `${delta.skillName} ${delta.versionId} ${score} (was n/a)`;
|
|
2600
|
+
}
|
|
2601
|
+
const sign = delta.delta >= 0 ? "+" : "";
|
|
2602
|
+
return `${delta.skillName} ${delta.versionId} ${score} (was ${delta.previousScore.toFixed(3)}, ${sign}${delta.delta.toFixed(3)})`;
|
|
2603
|
+
}
|
|
2604
|
+
function evalSuccessNextCommands(runs) {
|
|
2605
|
+
return runs.length > 0 ? ["workbench publish"] : ["workbench eval"];
|
|
2606
|
+
}
|
|
2119
2607
|
function formatStatusSnapshot(status) {
|
|
2120
2608
|
const lines = [
|
|
2121
2609
|
`Root: ${status.project.root}`,
|
|
@@ -2146,42 +2634,10 @@ function formatStatusSnapshot(status) {
|
|
|
2146
2634
|
: []),
|
|
2147
2635
|
];
|
|
2148
2636
|
})] : ["Remotes: none"]),
|
|
2149
|
-
...(status.next
|
|
2637
|
+
...(status.next[0] ? [`next: ${status.next[0]}`] : []),
|
|
2150
2638
|
];
|
|
2151
2639
|
return lines.join("\n");
|
|
2152
2640
|
}
|
|
2153
|
-
function formatCheck(result) {
|
|
2154
|
-
return [
|
|
2155
|
-
"Workbench skill is valid.",
|
|
2156
|
-
`Cases: ${result.cases} (${result.plan.source.smokeCaseCount} smoke)`,
|
|
2157
|
-
`Skills: ${result.skills}`,
|
|
2158
|
-
`Agents: ${result.agents}`,
|
|
2159
|
-
`Skill files: ${result.plan.source.skillFiles}`,
|
|
2160
|
-
`Eval files: ${result.plan.source.evalFiles}`,
|
|
2161
|
-
"",
|
|
2162
|
-
"Skill plan:",
|
|
2163
|
-
...result.plan.skills.map((skill) => [
|
|
2164
|
-
skill.name,
|
|
2165
|
-
`bundle=${skill.bundleHash.slice(0, 12)}`,
|
|
2166
|
-
`files=${skill.fileCount}`,
|
|
2167
|
-
`includes=${skill.includedSkillCount}`,
|
|
2168
|
-
].join("\t")),
|
|
2169
|
-
"",
|
|
2170
|
-
"Agent plan:",
|
|
2171
|
-
...result.plan.agents.map((agent) => [
|
|
2172
|
-
agent.name,
|
|
2173
|
-
agent.adapter,
|
|
2174
|
-
agent.model,
|
|
2175
|
-
agent.providerBacked ? "provider-eval" : "local-eval",
|
|
2176
|
-
`network=${agent.network.egress}`,
|
|
2177
|
-
`cpu=${agent.resources.cpu}`,
|
|
2178
|
-
`memoryGb=${agent.resources.memoryGb}`,
|
|
2179
|
-
`timeout=${agent.resources.timeoutMinutes}m`,
|
|
2180
|
-
`image=${agent.image}`,
|
|
2181
|
-
agent.auth ? `auth=${agent.auth}` : undefined,
|
|
2182
|
-
].filter(Boolean).join("\t")),
|
|
2183
|
-
].join("\n");
|
|
2184
|
-
}
|
|
2185
2641
|
function formatVersion(version) {
|
|
2186
2642
|
return `${version.id}\t${version.hash.slice(0, 12)}\t${version.message}`;
|
|
2187
2643
|
}
|