@workbench-ai/workbench 0.0.69 → 0.0.71
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1361 -633
- package/dist/install-targets.js +2 -2
- package/package.json +6 -5
package/dist/index.js
CHANGED
|
@@ -4,119 +4,106 @@ import { createRequire } from "node:module";
|
|
|
4
4
|
import os from "node:os";
|
|
5
5
|
import path from "node:path";
|
|
6
6
|
import { gzipSync } from "node:zlib";
|
|
7
|
-
import { addWorkbenchCase, addWorkbenchRemote, addWorkbenchAgent,
|
|
7
|
+
import { addWorkbenchCase, addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchInspectionSnapshot, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, publishWorkbenchVersion, removeWorkbenchAgent, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchSkillImproveCanUseQueuedAdapter, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
|
|
8
|
+
import { normalizeWorkbenchSkillName } from "@workbench-ai/workbench-contract";
|
|
8
9
|
import { emitError, emitResult } from "./output.js";
|
|
9
|
-
import { installSnapshotToTargets,
|
|
10
|
+
import { installSnapshotToTargets, normalizeInstallSnapshotPath, resolveInstallTargets, supportedInstallTargets, } from "./install-targets.js";
|
|
10
11
|
import { startWorkbenchOpenServer } from "./open-server.js";
|
|
11
12
|
const require = createRequire(import.meta.url);
|
|
12
13
|
const HELP = [
|
|
13
14
|
"Usage:",
|
|
15
|
+
" workbench [--json]",
|
|
14
16
|
" workbench <command> [options]",
|
|
15
17
|
"",
|
|
16
|
-
"
|
|
17
|
-
"
|
|
18
|
-
"
|
|
19
|
-
" workbench
|
|
18
|
+
"Bare workbench prints project status and the next useful command.",
|
|
19
|
+
"",
|
|
20
|
+
"Taught commands:",
|
|
21
|
+
" workbench new [DIR] [--json]",
|
|
22
|
+
" workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
23
|
+
" workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
20
24
|
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
21
|
-
" workbench
|
|
25
|
+
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
|
|
26
|
+
" workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--dry-run] [--json]",
|
|
27
|
+
"",
|
|
28
|
+
"More:",
|
|
29
|
+
" workbench help --all",
|
|
30
|
+
].join("\n");
|
|
31
|
+
const HELP_ALL = [
|
|
32
|
+
"Usage:",
|
|
33
|
+
" workbench # = workbench status",
|
|
34
|
+
" workbench new [DIR] [--json]",
|
|
35
|
+
" workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
36
|
+
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
37
|
+
" workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
38
|
+
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
|
|
39
|
+
" workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--dry-run] [--json]",
|
|
22
40
|
"",
|
|
23
41
|
"Inspect:",
|
|
24
42
|
" workbench status [--dir DIR] [--json]",
|
|
25
|
-
" workbench
|
|
26
|
-
" workbench switch VERSION [--dir DIR] [--json]",
|
|
27
|
-
" workbench diff [A..B] [--dir DIR] [--json]",
|
|
43
|
+
" workbench log [--runs|--versions] [--json]",
|
|
28
44
|
" workbench show REF[:PATH] [--json]",
|
|
29
|
-
" workbench
|
|
30
|
-
" workbench
|
|
31
|
-
" workbench trace RUN_ID|JOB_ID|TRACE_ID [--json]",
|
|
45
|
+
" workbench diff [A..B] [--json]",
|
|
46
|
+
" workbench switch VERSION [--json]",
|
|
32
47
|
" workbench open [--host HOST] [--port PORT] [--no-open] [--json]",
|
|
33
48
|
"",
|
|
34
49
|
"Configure:",
|
|
35
|
-
" workbench
|
|
36
|
-
" workbench
|
|
37
|
-
" workbench case list|add|show|remove ...",
|
|
50
|
+
" workbench case add RUN_ID [--json]",
|
|
51
|
+
" workbench agent add NAME --adapter X [--model M] [--with k=v]... | list | rm NAME [--json]",
|
|
38
52
|
"",
|
|
39
53
|
"Share and auth:",
|
|
40
|
-
" workbench
|
|
41
|
-
" workbench
|
|
42
|
-
" workbench
|
|
43
|
-
" workbench sync [REMOTE] [--dry-run] [--dir DIR] [--json]",
|
|
44
|
-
" workbench publish [VERSION] [--remote REMOTE] [--visibility private|internal|public] [--dry-run] [--dir DIR] [--json]",
|
|
45
|
-
" workbench install --source SOURCE [--agent codex|claude]... [--local] [--yes] [--list] [--dry-run] [--json]",
|
|
46
|
-
" workbench auth status [ADAPTER[/SLOT]] [--profile PROFILE] [--json]",
|
|
47
|
-
" workbench auth connect ADAPTER[/SLOT] [--method METHOD] [--profile PROFILE] [--profile-root DIR] [--local-only] [--json]",
|
|
48
|
-
" workbench auth disconnect ADAPTER[/SLOT] [--profile PROFILE] [--local-only] [--json]",
|
|
49
|
-
" workbench login [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--json]",
|
|
50
|
-
" workbench logout [--json]",
|
|
54
|
+
" workbench login [PROVIDER] [--method METHOD] [--profile P] [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--local-only] [--json]",
|
|
55
|
+
" workbench logout [PROVIDER] [--json]",
|
|
56
|
+
" workbench sync [REMOTE] [--dry-run] [--json]",
|
|
51
57
|
"",
|
|
52
58
|
"Remote URLs:",
|
|
53
59
|
" https://HOST/skills/OWNER/SKILL Workbench Cloud skill remote",
|
|
54
|
-
" file:///absolute/path local file remote",
|
|
55
|
-
"",
|
|
56
|
-
"Examples:",
|
|
57
|
-
" workbench init ./earnings-prep",
|
|
58
|
-
" workbench check --dir ./earnings-prep",
|
|
59
|
-
" workbench eval --agents default --samples 1",
|
|
60
|
-
" workbench compare",
|
|
61
|
-
" workbench status --json",
|
|
62
|
-
" workbench remote add --name origin --url https://v2.workbench.ai/skills/acme/earnings-prep",
|
|
63
|
-
" workbench publish --remote origin --visibility public --json",
|
|
64
|
-
" workbench install --source https://v2.workbench.ai/skills/acme/earnings-prep --agent codex --yes",
|
|
65
|
-
"",
|
|
66
|
-
"Environment:",
|
|
67
|
-
" CODEX_HOME and CLAUDE_HOME override read-only session discovery roots.",
|
|
68
|
-
" WORKBENCH_API_URL selects a Workbench Cloud API base URL for login, auth, and HTTP remotes.",
|
|
69
|
-
" WORKBENCH_API_TOKEN supplies a Workbench Cloud token without a login (WORKBENCH_SMOKE_BEARER_TOKEN is a fallback).",
|
|
70
|
-
" WORKBENCH_CONFIG overrides the CLI config path (default ~/.workbench/config.json).",
|
|
71
|
-
" WORKBENCH_DEVICE_AUTH overrides the pending device login record path.",
|
|
72
|
-
" WORKBENCH_ADAPTER_AUTH_STORE overrides the local adapter auth store directory.",
|
|
60
|
+
" file:///absolute/path local file remote for plumbing sync",
|
|
73
61
|
].join("\n");
|
|
74
62
|
const COMMAND_HELP = {
|
|
75
|
-
|
|
63
|
+
new: [
|
|
76
64
|
"Usage:",
|
|
77
|
-
" workbench
|
|
78
|
-
" workbench auth connect ADAPTER[/SLOT] [--method api-key|oauth|bedrock] [--profile PROFILE] [--profile-root DIR] [--local-only] [--json]",
|
|
79
|
-
" workbench auth disconnect ADAPTER[/SLOT] [--profile PROFILE] [--local-only] [--json]",
|
|
65
|
+
" workbench new [DIR] [--json]",
|
|
80
66
|
"",
|
|
81
|
-
"
|
|
67
|
+
"Creates a Workbench skill project.",
|
|
82
68
|
"",
|
|
83
|
-
"
|
|
84
|
-
" workbench
|
|
85
|
-
" workbench auth connect codex --method api-key",
|
|
86
|
-
" workbench auth disconnect codex --json",
|
|
69
|
+
"Example:",
|
|
70
|
+
" workbench new earnings-prep",
|
|
87
71
|
].join("\n"),
|
|
88
72
|
eval: [
|
|
89
73
|
"Usage:",
|
|
90
|
-
" workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [
|
|
74
|
+
" workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
91
75
|
"",
|
|
92
76
|
"Runs eval jobs for the selected version, measured skills, and agents. Omitted selectors use manifest defaults.",
|
|
77
|
+
"",
|
|
78
|
+
"Example:",
|
|
79
|
+
" workbench eval -n 5",
|
|
93
80
|
].join("\n"),
|
|
94
81
|
improve: [
|
|
95
82
|
"Usage:",
|
|
96
|
-
" workbench improve [VERSION] [--
|
|
83
|
+
" workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
84
|
+
"",
|
|
85
|
+
"Creates one improved child version from evidence. The selected skills and agents must resolve to exactly one entry each.",
|
|
97
86
|
"",
|
|
98
|
-
"
|
|
87
|
+
"Example:",
|
|
88
|
+
" workbench improve --budget 1 -n 1",
|
|
99
89
|
].join("\n"),
|
|
100
|
-
|
|
90
|
+
compare: [
|
|
101
91
|
"Usage:",
|
|
102
|
-
" workbench
|
|
92
|
+
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
103
93
|
"",
|
|
104
|
-
"
|
|
94
|
+
"Compares recorded eval evidence across selected skills, agents, and versions.",
|
|
105
95
|
"",
|
|
106
96
|
"Example:",
|
|
107
|
-
" workbench
|
|
97
|
+
" workbench compare --agents all",
|
|
108
98
|
].join("\n"),
|
|
109
|
-
|
|
99
|
+
install: [
|
|
110
100
|
"Usage:",
|
|
111
|
-
" workbench
|
|
112
|
-
" workbench remote list [--dir DIR] [--json]",
|
|
113
|
-
" workbench remote remove NAME [--dir DIR] [--json]",
|
|
101
|
+
" workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--dry-run] [--json]",
|
|
114
102
|
"",
|
|
115
|
-
"
|
|
103
|
+
"Installs published Workbench Cloud source into local agent targets.",
|
|
116
104
|
"",
|
|
117
|
-
"
|
|
118
|
-
" workbench
|
|
119
|
-
" workbench remote add --name scratch --url file:///tmp/earnings-prep-remote --replace",
|
|
105
|
+
"Example:",
|
|
106
|
+
" workbench install acme/earnings-prep --to codex --yes",
|
|
120
107
|
].join("\n"),
|
|
121
108
|
status: [
|
|
122
109
|
"Usage:",
|
|
@@ -129,180 +116,187 @@ const COMMAND_HELP = {
|
|
|
129
116
|
].join("\n"),
|
|
130
117
|
logout: [
|
|
131
118
|
"Usage:",
|
|
132
|
-
" workbench logout [--json]",
|
|
119
|
+
" workbench logout [PROVIDER] [--json]",
|
|
133
120
|
"",
|
|
134
|
-
"
|
|
121
|
+
"With no provider, logs out of Workbench Cloud. With a provider such as codex or claude, removes local adapter auth.",
|
|
135
122
|
"",
|
|
136
123
|
"Example:",
|
|
137
|
-
" workbench logout
|
|
124
|
+
" workbench logout claude",
|
|
138
125
|
].join("\n"),
|
|
139
126
|
show: [
|
|
140
127
|
"Usage:",
|
|
141
128
|
" workbench show REF [--json]",
|
|
142
129
|
" workbench show REF:PATH [--json]",
|
|
143
130
|
"",
|
|
144
|
-
"Shows a Workbench object
|
|
131
|
+
"Shows a Workbench object, lists files for file-backed objects, or prints one file.",
|
|
132
|
+
"",
|
|
133
|
+
"Example:",
|
|
134
|
+
" workbench show run_abc12345:result.json",
|
|
145
135
|
].join("\n"),
|
|
146
|
-
|
|
136
|
+
log: [
|
|
147
137
|
"Usage:",
|
|
148
|
-
" workbench
|
|
138
|
+
" workbench log [--runs|--versions] [--json]",
|
|
139
|
+
"",
|
|
140
|
+
"Shows one reverse-chronological timeline of versions and runs.",
|
|
149
141
|
"",
|
|
150
|
-
"
|
|
142
|
+
"Example:",
|
|
143
|
+
" workbench log --runs",
|
|
151
144
|
].join("\n"),
|
|
152
|
-
|
|
145
|
+
diff: [
|
|
153
146
|
"Usage:",
|
|
154
|
-
" workbench
|
|
147
|
+
" workbench diff [A..B] [--json]",
|
|
148
|
+
"",
|
|
149
|
+
"Shows changed files between two Workbench source versions.",
|
|
155
150
|
"",
|
|
156
|
-
"
|
|
151
|
+
"Example:",
|
|
152
|
+
" workbench diff 26059f9a..eac5699c",
|
|
157
153
|
].join("\n"),
|
|
158
154
|
switch: [
|
|
159
155
|
"Usage:",
|
|
160
156
|
" workbench switch VERSION [--json]",
|
|
161
157
|
"",
|
|
162
158
|
"Switches the working skill source to a recorded Workbench version.",
|
|
159
|
+
"",
|
|
160
|
+
"Example:",
|
|
161
|
+
" workbench switch 26059f9a",
|
|
162
|
+
].join("\n"),
|
|
163
|
+
open: [
|
|
164
|
+
"Usage:",
|
|
165
|
+
" workbench open [--host HOST] [--port PORT] [--no-open] [--json]",
|
|
166
|
+
"",
|
|
167
|
+
"Serves or emits the read-only Workbench inspection snapshot.",
|
|
168
|
+
"",
|
|
169
|
+
"Example:",
|
|
170
|
+
" workbench open --no-open",
|
|
171
|
+
].join("\n"),
|
|
172
|
+
case: [
|
|
173
|
+
"Usage:",
|
|
174
|
+
" workbench case add RUN_ID [--json]",
|
|
175
|
+
"",
|
|
176
|
+
"Captures a regression case from a recorded run.",
|
|
177
|
+
"",
|
|
178
|
+
"Example:",
|
|
179
|
+
" workbench case add run_abc12345",
|
|
180
|
+
].join("\n"),
|
|
181
|
+
agent: [
|
|
182
|
+
"Usage:",
|
|
183
|
+
" workbench agent list [--json]",
|
|
184
|
+
" workbench agent add NAME --adapter X [--model M] [--with k=v]... [--json]",
|
|
185
|
+
" workbench agent rm NAME [--json]",
|
|
186
|
+
"",
|
|
187
|
+
"Lists, adds, or removes eval agent configurations.",
|
|
188
|
+
"",
|
|
189
|
+
"Example:",
|
|
190
|
+
" workbench agent add claude --adapter claude --model sonnet",
|
|
163
191
|
].join("\n"),
|
|
164
192
|
sync: [
|
|
165
193
|
"Usage:",
|
|
166
194
|
" workbench sync [REMOTE] [--dry-run] [--dir DIR] [--json]",
|
|
167
195
|
"",
|
|
168
|
-
"
|
|
196
|
+
"Plumbing command: synchronizes local evidence and version objects with a Workbench remote.",
|
|
169
197
|
"",
|
|
170
|
-
"
|
|
171
|
-
" workbench sync
|
|
172
|
-
" workbench sync origin --dry-run --json",
|
|
198
|
+
"Example:",
|
|
199
|
+
" workbench sync cloud --dry-run",
|
|
173
200
|
].join("\n"),
|
|
174
201
|
publish: [
|
|
175
202
|
"Usage:",
|
|
176
|
-
" workbench publish [VERSION] [--
|
|
203
|
+
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--dir DIR] [--json]",
|
|
177
204
|
"",
|
|
178
|
-
"Publishes installable skill source
|
|
205
|
+
"Publishes installable skill source to Workbench Cloud. --as sets the linked OWNER/SKILL handle.",
|
|
179
206
|
"",
|
|
180
|
-
"
|
|
181
|
-
" workbench publish --
|
|
182
|
-
" workbench publish <version-id> --remote origin --dry-run --json",
|
|
207
|
+
"Example:",
|
|
208
|
+
" workbench publish --as acme/earnings-prep --dry-run",
|
|
183
209
|
].join("\n"),
|
|
184
210
|
login: [
|
|
185
211
|
"Usage:",
|
|
186
|
-
" workbench login [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--json]",
|
|
187
|
-
" workbench logout [--json]",
|
|
212
|
+
" workbench login [PROVIDER] [--method METHOD] [--profile P] [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--local-only] [--json]",
|
|
213
|
+
" workbench logout [PROVIDER] [--json]",
|
|
188
214
|
"",
|
|
189
|
-
"Connects the CLI to Workbench Cloud
|
|
215
|
+
"Connects the CLI to Workbench Cloud or captures local adapter auth for a provider.",
|
|
190
216
|
"",
|
|
191
|
-
"
|
|
192
|
-
" workbench login --start-only --
|
|
193
|
-
" workbench login --wait --timeout 120 --json",
|
|
217
|
+
"Example:",
|
|
218
|
+
" workbench login --start-only --no-open",
|
|
194
219
|
].join("\n"),
|
|
195
220
|
};
|
|
196
|
-
const
|
|
197
|
-
"
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
"local-only",
|
|
202
|
-
"list",
|
|
203
|
-
"no-open",
|
|
204
|
-
"start-only",
|
|
205
|
-
"replace",
|
|
206
|
-
"rerun",
|
|
207
|
-
"wait",
|
|
208
|
-
"yes",
|
|
209
|
-
]);
|
|
210
|
-
const FLAG_DEFINITIONS = {
|
|
211
|
-
adapter: "string",
|
|
212
|
-
"base-url": "string",
|
|
213
|
-
budget: "positive-integer",
|
|
221
|
+
const COMMON_FLAGS = {
|
|
222
|
+
json: "boolean",
|
|
223
|
+
};
|
|
224
|
+
const PROJECT_FLAGS = {
|
|
225
|
+
...COMMON_FLAGS,
|
|
214
226
|
dir: "string",
|
|
215
|
-
|
|
216
|
-
|
|
227
|
+
};
|
|
228
|
+
const HELP_FLAG = {
|
|
217
229
|
help: "boolean",
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
local: "boolean",
|
|
221
|
-
"local-only": "boolean",
|
|
222
|
-
list: "boolean",
|
|
223
|
-
method: "string",
|
|
224
|
-
model: "string",
|
|
225
|
-
name: "string",
|
|
226
|
-
"no-open": "boolean",
|
|
227
|
-
port: "positive-integer",
|
|
228
|
-
profile: "string",
|
|
229
|
-
"profile-root": "string",
|
|
230
|
-
remote: "string",
|
|
231
|
-
replace: "boolean",
|
|
232
|
-
rerun: "boolean",
|
|
233
|
-
samples: "positive-integer",
|
|
234
|
-
source: "string",
|
|
235
|
-
"start-only": "boolean",
|
|
236
|
-
agent: "string",
|
|
237
|
-
agents: "string",
|
|
238
|
-
skill: "string",
|
|
239
|
-
skills: "string",
|
|
230
|
+
};
|
|
231
|
+
const VERSION_FLAG = {
|
|
240
232
|
version: "boolean",
|
|
241
|
-
versions: "string",
|
|
242
|
-
visibility: "string",
|
|
243
|
-
timeout: "positive-integer",
|
|
244
|
-
url: "string",
|
|
245
|
-
wait: "boolean",
|
|
246
|
-
with: "repeat-string",
|
|
247
|
-
yes: "boolean",
|
|
248
233
|
};
|
|
249
234
|
const COMMAND_FLAGS = {
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
logout: ["json"],
|
|
261
|
-
open: ["dir", "host", "json", "no-open", "port"],
|
|
262
|
-
publish: ["dir", "dry-run", "json", "remote", "visibility"],
|
|
263
|
-
show: ["dir", "json"],
|
|
264
|
-
status: ["dir", "json"],
|
|
265
|
-
switch: ["dir", "json"],
|
|
266
|
-
sync: ["dir", "dry-run", "json"],
|
|
267
|
-
trace: ["dir", "json"],
|
|
268
|
-
versions: ["dir", "json"],
|
|
269
|
-
};
|
|
270
|
-
const SUBCOMMAND_FLAGS = {
|
|
271
|
-
auth: {
|
|
272
|
-
defaultSubcommand: "status",
|
|
273
|
-
flags: {
|
|
274
|
-
status: ["json", "profile"],
|
|
275
|
-
connect: ["json", "local-only", "method", "profile", "profile-root"],
|
|
276
|
-
disconnect: ["json", "local-only", "profile"],
|
|
277
|
-
},
|
|
235
|
+
compare: { ...PROJECT_FLAGS, ...HELP_FLAG, agents: "string", skills: "string", versions: "string" },
|
|
236
|
+
diff: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
237
|
+
eval: {
|
|
238
|
+
...PROJECT_FLAGS,
|
|
239
|
+
...HELP_FLAG,
|
|
240
|
+
agents: "string",
|
|
241
|
+
cloud: "boolean",
|
|
242
|
+
rerun: "boolean",
|
|
243
|
+
samples: "positive-integer",
|
|
244
|
+
skills: "string",
|
|
278
245
|
},
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
246
|
+
help: { ...COMMON_FLAGS, ...HELP_FLAG, all: "boolean" },
|
|
247
|
+
improve: {
|
|
248
|
+
...PROJECT_FLAGS,
|
|
249
|
+
...HELP_FLAG,
|
|
250
|
+
agents: "string",
|
|
251
|
+
budget: "positive-integer",
|
|
252
|
+
cloud: "boolean",
|
|
253
|
+
samples: "positive-integer",
|
|
254
|
+
skills: "string",
|
|
286
255
|
},
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
256
|
+
install: { ...COMMON_FLAGS, ...HELP_FLAG, "dry-run": "boolean", to: "repeat-string", yes: "boolean" },
|
|
257
|
+
log: { ...PROJECT_FLAGS, ...HELP_FLAG, runs: "boolean", versions: "boolean" },
|
|
258
|
+
login: {
|
|
259
|
+
...COMMON_FLAGS,
|
|
260
|
+
...HELP_FLAG,
|
|
261
|
+
"base-url": "string",
|
|
262
|
+
"local-only": "boolean",
|
|
263
|
+
method: "string",
|
|
264
|
+
"no-open": "boolean",
|
|
265
|
+
profile: "string",
|
|
266
|
+
"profile-root": "string",
|
|
267
|
+
"start-only": "boolean",
|
|
268
|
+
timeout: "positive-integer",
|
|
269
|
+
wait: "boolean",
|
|
270
|
+
},
|
|
271
|
+
logout: { ...COMMON_FLAGS, ...HELP_FLAG },
|
|
272
|
+
new: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
273
|
+
open: { ...PROJECT_FLAGS, ...HELP_FLAG, host: "string", "no-open": "boolean", port: "positive-integer" },
|
|
274
|
+
publish: {
|
|
275
|
+
...PROJECT_FLAGS,
|
|
276
|
+
...HELP_FLAG,
|
|
277
|
+
as: "string",
|
|
278
|
+
"dry-run": "boolean",
|
|
279
|
+
private: "boolean",
|
|
280
|
+
public: "boolean",
|
|
281
|
+
team: "boolean",
|
|
293
282
|
},
|
|
294
|
-
|
|
283
|
+
show: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
284
|
+
status: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
285
|
+
switch: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
286
|
+
sync: { ...PROJECT_FLAGS, ...HELP_FLAG, "dry-run": "boolean" },
|
|
287
|
+
version: { ...COMMON_FLAGS, ...VERSION_FLAG },
|
|
288
|
+
};
|
|
289
|
+
const SUBCOMMAND_FLAGS = {
|
|
290
|
+
case: {
|
|
295
291
|
flags: {
|
|
296
|
-
|
|
292
|
+
add: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
297
293
|
},
|
|
298
294
|
},
|
|
299
295
|
agent: {
|
|
300
296
|
flags: {
|
|
301
|
-
list:
|
|
302
|
-
add:
|
|
303
|
-
|
|
304
|
-
default: ["dir", "json"],
|
|
305
|
-
remove: ["dir", "json"],
|
|
297
|
+
list: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
298
|
+
add: { ...PROJECT_FLAGS, ...HELP_FLAG, adapter: "string", model: "string", with: "repeat-string" },
|
|
299
|
+
rm: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
306
300
|
},
|
|
307
301
|
},
|
|
308
302
|
};
|
|
@@ -313,20 +307,23 @@ export async function runCli(argv, io = {
|
|
|
313
307
|
const parsed = parseArgs(argv);
|
|
314
308
|
const command = parsed.positionals[0];
|
|
315
309
|
try {
|
|
316
|
-
|
|
310
|
+
validateCommandFlags(parsed, command);
|
|
311
|
+
if (command === "version" || parsed.flags.version === true) {
|
|
317
312
|
io.stdout.write(`workbench ${getCliVersion()}\n`);
|
|
318
313
|
return 0;
|
|
319
314
|
}
|
|
320
|
-
if (
|
|
315
|
+
if (command === "help") {
|
|
321
316
|
const helpCommand = command === "help" ? optionalPositional(parsed, 1) : undefined;
|
|
322
|
-
io.stdout.write(`${helpCommand ? commandHelp(helpCommand) : HELP}\n`);
|
|
317
|
+
io.stdout.write(`${parsed.flags.all === true ? HELP_ALL : helpCommand ? commandHelp(helpCommand) : HELP}\n`);
|
|
323
318
|
return 0;
|
|
324
319
|
}
|
|
325
320
|
if (parsed.flags.help === true) {
|
|
326
|
-
io.stdout.write(`${commandHelp(command)}\n`);
|
|
321
|
+
io.stdout.write(`${command ? commandHelp(command) : HELP}\n`);
|
|
327
322
|
return 0;
|
|
328
323
|
}
|
|
329
|
-
|
|
324
|
+
if (!command) {
|
|
325
|
+
return await handleStatus(parsed, io);
|
|
326
|
+
}
|
|
330
327
|
if (command === "login") {
|
|
331
328
|
return await handleLogin(parsed, io);
|
|
332
329
|
}
|
|
@@ -337,27 +334,17 @@ export async function runCli(argv, io = {
|
|
|
337
334
|
return await handleInstall(parsed, io);
|
|
338
335
|
}
|
|
339
336
|
const core = await coreOptions(parsed);
|
|
340
|
-
if (command === "
|
|
337
|
+
if (command === "new") {
|
|
341
338
|
const status = await initWorkbenchSkill({ dir: parsed.positionals[1] ?? dirFlag(parsed) });
|
|
342
|
-
return output(status, parsed, io, () => `
|
|
339
|
+
return output(status, parsed, io, () => `Created Workbench skill at ${status.root}.\nnext: edit SKILL.md, then run workbench eval`);
|
|
343
340
|
}
|
|
344
341
|
if (command === "status") {
|
|
345
|
-
|
|
346
|
-
const auth = await workbenchCliAuthStatus();
|
|
347
|
-
return emitResult("workbench.status.v1", {
|
|
348
|
-
project: status.project,
|
|
349
|
-
worktree: status.worktree,
|
|
350
|
-
runs: status.runs,
|
|
351
|
-
remotes: status.remotes,
|
|
352
|
-
auth: auth,
|
|
353
|
-
next: status.next,
|
|
354
|
-
}, parsed, io, () => formatStatusSnapshot({ ...status, auth }));
|
|
355
|
-
}
|
|
356
|
-
if (command === "check") {
|
|
357
|
-
const result = await checkWorkbenchSkill(core);
|
|
358
|
-
return output(result, parsed, io, () => formatCheck(result));
|
|
342
|
+
return await handleStatus(parsed, io);
|
|
359
343
|
}
|
|
360
344
|
if (command === "eval") {
|
|
345
|
+
if (parsed.flags.cloud === true) {
|
|
346
|
+
return await handleCloudEval(parsed, io);
|
|
347
|
+
}
|
|
361
348
|
const runs = await evalWorkbenchSkill({
|
|
362
349
|
...core,
|
|
363
350
|
version: optionalPositional(parsed, 1),
|
|
@@ -371,21 +358,36 @@ export async function runCli(argv, io = {
|
|
|
371
358
|
if (failedRuns.length > 0) {
|
|
372
359
|
return emitEvalFailure(runs, failedRuns, artifactIds, parsed, io);
|
|
373
360
|
}
|
|
374
|
-
|
|
361
|
+
const deltas = await evalDeltas(core, runs);
|
|
362
|
+
const next = await evalSuccessNextCommand(core, runs);
|
|
363
|
+
return emitResult("workbench.cli.eval.v1", {
|
|
364
|
+
result: runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
|
|
365
|
+
deltas: deltas,
|
|
366
|
+
next: next,
|
|
367
|
+
}, parsed, io, () => [
|
|
368
|
+
runs.map(formatRun).join("\n"),
|
|
369
|
+
...deltas.map(formatEvalDelta),
|
|
370
|
+
...(next ? [`next: ${next}`] : []),
|
|
371
|
+
].filter(Boolean).join("\n"));
|
|
375
372
|
}
|
|
376
373
|
if (command === "improve") {
|
|
374
|
+
if (parsed.flags.cloud === true) {
|
|
375
|
+
return await handleCloudImprove(parsed, io);
|
|
376
|
+
}
|
|
377
|
+
const improverAgent = await resolveLocalImproverAgent(parsed, core);
|
|
377
378
|
const result = await improveWorkbenchSkill({
|
|
378
379
|
...core,
|
|
379
380
|
version: optionalPositional(parsed, 1),
|
|
380
|
-
skill: stringFlag(parsed, "
|
|
381
|
-
agent: stringFlag(parsed, "
|
|
381
|
+
skill: stringFlag(parsed, "skills"),
|
|
382
|
+
agent: stringFlag(parsed, "agents"),
|
|
383
|
+
...(improverAgent ? { improverAgent } : {}),
|
|
382
384
|
budget: intFlag(parsed, "budget"),
|
|
383
385
|
samples: intFlag(parsed, "samples"),
|
|
384
386
|
});
|
|
385
387
|
return output({
|
|
386
388
|
...result,
|
|
387
389
|
version: versionSummary(result.version),
|
|
388
|
-
}, parsed, io, () => formatImproveResult(result));
|
|
390
|
+
}, parsed, io, () => `${formatImproveResult(result)}\nnext: workbench eval`);
|
|
389
391
|
}
|
|
390
392
|
if (command === "compare") {
|
|
391
393
|
const comparison = await compareWorkbench({
|
|
@@ -394,92 +396,30 @@ export async function runCli(argv, io = {
|
|
|
394
396
|
skills: stringFlag(parsed, "skills"),
|
|
395
397
|
agents: stringFlag(parsed, "agents"),
|
|
396
398
|
});
|
|
397
|
-
return output(comparison, parsed, io, () => formatComparison(comparison));
|
|
398
|
-
}
|
|
399
|
-
if (command === "versions") {
|
|
400
|
-
const versions = await listWorkbenchVersions(core);
|
|
401
|
-
return output(versions.map(versionSummary), parsed, io, () => versions.map(formatVersion).join("\n") || "No versions.");
|
|
399
|
+
return output(manifestOnly(comparison), parsed, io, () => formatComparison(comparison));
|
|
402
400
|
}
|
|
403
401
|
if (command === "switch") {
|
|
404
402
|
const versionRef = requiredPositional(parsed, 1, "workbench switch requires VERSION.");
|
|
405
403
|
const version = await switchWorkbenchVersion(versionRef, core);
|
|
406
|
-
return output(versionSummary(version), parsed, io, () => `Switched to ${version.id}.`);
|
|
404
|
+
return output(versionSummary(version), parsed, io, () => `Switched to ${displayRef(version.id)}.`);
|
|
407
405
|
}
|
|
408
406
|
if (command === "diff") {
|
|
409
|
-
const range =
|
|
407
|
+
const range = optionalPositional(parsed, 1) ?? await defaultDiffRange(core);
|
|
410
408
|
const diffs = await diffWorkbenchVersions(range, core);
|
|
411
409
|
return output(diffs, parsed, io, () => diffs.map((entry) => `${entry.status}\t${entry.path}`).join("\n") || "No diff.");
|
|
412
410
|
}
|
|
413
411
|
if (command === "show") {
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
}
|
|
419
|
-
const value = await showWorkbenchRef(ref, core);
|
|
420
|
-
return output(value, parsed, io, () => formatShow(value));
|
|
421
|
-
}
|
|
422
|
-
if (command === "files") {
|
|
423
|
-
const ref = requiredPositional(parsed, 1, "workbench files requires REF.");
|
|
424
|
-
const files = await filesForWorkbenchRef(ref, core);
|
|
425
|
-
return output(files.map(fileSummary), parsed, io, () => files.map((file) => file.path).join("\n") || "No files.");
|
|
426
|
-
}
|
|
427
|
-
if (command === "list") {
|
|
428
|
-
return await handleList(parsed, io);
|
|
429
|
-
}
|
|
430
|
-
if (command === "trace") {
|
|
431
|
-
const ref = optionalPositional(parsed, 1);
|
|
432
|
-
if (!ref) {
|
|
433
|
-
throw new WorkbenchCodedError("usage", "workbench trace requires RUN_ID, JOB_ID, or TRACE_ID.", {
|
|
434
|
-
remediation: "Run workbench list runs --json or workbench list traces --json.",
|
|
435
|
-
exitCode: 2,
|
|
436
|
-
});
|
|
437
|
-
}
|
|
438
|
-
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
439
|
-
const run = snapshot.runs.find((entry) => entry.id === ref);
|
|
440
|
-
const job = snapshot.jobs.find((entry) => entry.id === ref);
|
|
441
|
-
const traces = run
|
|
442
|
-
? snapshot.traces.filter((trace) => run.traceIds.includes(trace.id))
|
|
443
|
-
: job
|
|
444
|
-
? snapshot.traces.filter((trace) => job.traceIds.includes(trace.id))
|
|
445
|
-
: snapshot.traces.filter((trace) => trace.id === ref);
|
|
446
|
-
if (traces.length === 0) {
|
|
447
|
-
const jobs = run
|
|
448
|
-
? snapshot.jobs.filter((entry) => entry.runId === run.id)
|
|
449
|
-
: job ? [job] : [];
|
|
450
|
-
const details = jobs.flatMap((entry) => {
|
|
451
|
-
const detail = workbenchJobEvidenceForSnapshot(snapshot, {
|
|
452
|
-
runId: entry.runId,
|
|
453
|
-
jobId: entry.id,
|
|
454
|
-
});
|
|
455
|
-
return detail ? [detail] : [];
|
|
456
|
-
}).filter((detail) => detail.executions.some((execution) => execution.sessions.length > 0 ||
|
|
457
|
-
execution.trace.spans.length > 0 ||
|
|
458
|
-
execution.trace.events.length > 0 ||
|
|
459
|
-
execution.trace.summaries.length > 0));
|
|
460
|
-
if (details.length > 0) {
|
|
461
|
-
return output(details, parsed, io, () => details.map(formatTraceDetail).join("\n"));
|
|
462
|
-
}
|
|
463
|
-
throw new WorkbenchCodedError("ref_not_found", `Trace not found: ${ref}`, {
|
|
464
|
-
remediation: "Run workbench list runs --json, workbench list jobs --json, or workbench list traces --json.",
|
|
465
|
-
subject: { ref },
|
|
466
|
-
exitCode: 1,
|
|
467
|
-
});
|
|
468
|
-
}
|
|
469
|
-
return output(traces, parsed, io, () => traces.map(formatTrace).join("\n"));
|
|
412
|
+
return await handleShow(parsed, io);
|
|
413
|
+
}
|
|
414
|
+
if (command === "log") {
|
|
415
|
+
return await handleLog(parsed, io);
|
|
470
416
|
}
|
|
471
417
|
if (command === "agent") {
|
|
472
418
|
return await handleAgent(parsed, io);
|
|
473
419
|
}
|
|
474
|
-
if (command === "skills") {
|
|
475
|
-
return await handleSkills(parsed, io);
|
|
476
|
-
}
|
|
477
420
|
if (command === "case") {
|
|
478
421
|
return await handleCase(parsed, io);
|
|
479
422
|
}
|
|
480
|
-
if (command === "remote") {
|
|
481
|
-
return await handleRemote(parsed, io);
|
|
482
|
-
}
|
|
483
423
|
if (command === "sync") {
|
|
484
424
|
const result = await syncWorkbenchRemote({
|
|
485
425
|
...core,
|
|
@@ -496,34 +436,54 @@ export async function runCli(argv, io = {
|
|
|
496
436
|
}, parsed, io, () => `${result.dryRun ? "Would sync" : "Synced"} ${result.remote.name}: pushed ${result.pushed}, pulled ${result.pulled}${result.upToDate ? " (up to date)" : ""}.`);
|
|
497
437
|
}
|
|
498
438
|
if (command === "publish") {
|
|
439
|
+
const preview = parsed.flags["dry-run"] === true
|
|
440
|
+
? await previewPublishWithDerivedRemote(parsed)
|
|
441
|
+
: undefined;
|
|
442
|
+
if (preview) {
|
|
443
|
+
return emitResult("workbench.cli.publish.v1", {
|
|
444
|
+
remote: preview.remote,
|
|
445
|
+
version: versionSummary(preview.version),
|
|
446
|
+
visibility: preview.visibility,
|
|
447
|
+
installHandle: preview.installHandle,
|
|
448
|
+
installUrl: preview.installUrl,
|
|
449
|
+
pinnedInstallUrl: preview.pinnedInstallUrl,
|
|
450
|
+
dryRun: true,
|
|
451
|
+
}, parsed, io, () => [
|
|
452
|
+
`Would publish ${displayRef(preview.version.id)} to remote ${preview.remote.name}.`,
|
|
453
|
+
`Visibility: ${preview.visibility}`,
|
|
454
|
+
`Install: ${preview.installUrl}`,
|
|
455
|
+
`Pinned: ${preview.pinnedInstallUrl}`,
|
|
456
|
+
`next: workbench install ${preview.installHandle}`,
|
|
457
|
+
].join("\n"));
|
|
458
|
+
}
|
|
459
|
+
const remote = await ensurePublishRemote(parsed);
|
|
499
460
|
const result = await publishWorkbenchVersion({
|
|
500
461
|
...core,
|
|
501
462
|
version: optionalPositional(parsed, 1),
|
|
502
|
-
remote
|
|
463
|
+
remote,
|
|
503
464
|
dryRun: parsed.flags["dry-run"] === true,
|
|
504
|
-
visibility:
|
|
465
|
+
visibility: parsePublishVisibilityFlags(parsed),
|
|
505
466
|
});
|
|
506
467
|
return emitResult("workbench.cli.publish.v1", {
|
|
507
468
|
remote: result.remote,
|
|
508
469
|
version: versionSummary(result.version),
|
|
509
470
|
visibility: result.visibility,
|
|
471
|
+
installHandle: result.installHandle,
|
|
510
472
|
installUrl: result.installUrl,
|
|
511
473
|
pinnedInstallUrl: result.pinnedInstallUrl,
|
|
512
474
|
...(result.dryRun ? { dryRun: true } : {}),
|
|
513
475
|
}, parsed, io, () => [
|
|
514
|
-
`${result.dryRun ? "Would publish" : "Published"} ${result.version.id} to remote ${result.remote.name}.`,
|
|
476
|
+
`${result.dryRun ? "Would publish" : "Published"} ${displayRef(result.version.id)} to remote ${result.remote.name}.`,
|
|
515
477
|
`Visibility: ${result.visibility}`,
|
|
516
478
|
`Install: ${result.installUrl}`,
|
|
517
479
|
`Pinned: ${result.pinnedInstallUrl}`,
|
|
480
|
+
`next: workbench install ${result.installHandle}`,
|
|
518
481
|
].join("\n"));
|
|
519
482
|
}
|
|
520
|
-
if (command === "auth") {
|
|
521
|
-
return await handleAuth(parsed, io);
|
|
522
|
-
}
|
|
523
483
|
if (command === "open") {
|
|
524
484
|
if (parsed.flags.json === true) {
|
|
525
485
|
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
526
|
-
return output(snapshot, parsed, io, () => "Read-only Workbench inspection data is available with --json.");
|
|
486
|
+
return output(manifestOnly(snapshot), parsed, io, () => "Read-only Workbench inspection data is available with --json.");
|
|
527
487
|
}
|
|
528
488
|
// The browser server serves committed object state through a read-only
|
|
529
489
|
// snapshot path, so long-running commands do not block page loads.
|
|
@@ -545,29 +505,107 @@ export async function runCli(argv, io = {
|
|
|
545
505
|
return emitError(error, parsed, io);
|
|
546
506
|
}
|
|
547
507
|
}
|
|
548
|
-
async function
|
|
549
|
-
const
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
508
|
+
async function handleStatus(parsed, io) {
|
|
509
|
+
const status = await workbenchStatusSnapshot(await coreOptions(parsed));
|
|
510
|
+
const auth = await workbenchCliAuthStatus();
|
|
511
|
+
const cliStatus = statusWithCausalNext(status, auth);
|
|
512
|
+
return emitResult("workbench.status.v1", {
|
|
513
|
+
project: cliStatus.project,
|
|
514
|
+
worktree: cliStatus.worktree,
|
|
515
|
+
runs: cliStatus.runs,
|
|
516
|
+
remotes: cliStatus.remotes,
|
|
517
|
+
auth: auth,
|
|
518
|
+
next: cliStatus.next,
|
|
519
|
+
}, parsed, io, () => formatStatusSnapshot({ ...cliStatus, auth }));
|
|
520
|
+
}
|
|
521
|
+
async function handleLog(parsed, io) {
|
|
522
|
+
if (parsed.flags.runs === true && parsed.flags.versions === true) {
|
|
523
|
+
throw new WorkbenchCodedError("usage", "workbench log accepts only one of --runs or --versions.", {
|
|
524
|
+
remediation: "Run workbench log --runs or workbench log --versions.",
|
|
525
|
+
exitCode: 2,
|
|
526
|
+
});
|
|
553
527
|
}
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
528
|
+
if (parsed.positionals.length > 1) {
|
|
529
|
+
if (parsed.flags.runs === true) {
|
|
530
|
+
throw new WorkbenchUserError("--runs does not accept a value.");
|
|
531
|
+
}
|
|
532
|
+
if (parsed.flags.versions === true) {
|
|
533
|
+
throw new WorkbenchUserError("--versions does not accept a value.");
|
|
534
|
+
}
|
|
535
|
+
rejectExtraInput(parsed, {
|
|
536
|
+
maxPositionals: 1,
|
|
537
|
+
message: "workbench log does not accept refs or paths.",
|
|
538
|
+
remediation: "Run workbench log, workbench log --runs, or workbench log --versions.",
|
|
539
|
+
});
|
|
540
|
+
}
|
|
541
|
+
const snapshot = await createWorkbenchInspectionSnapshot(await coreOptions(parsed));
|
|
542
|
+
const includeRuns = parsed.flags.versions !== true;
|
|
543
|
+
const includeVersions = parsed.flags.runs !== true;
|
|
544
|
+
const entries = [
|
|
545
|
+
...(includeVersions ? snapshot.versions.map((version) => ({
|
|
546
|
+
kind: "version",
|
|
547
|
+
id: version.id,
|
|
548
|
+
createdAt: version.createdAt,
|
|
549
|
+
message: version.message,
|
|
550
|
+
fileCount: version.files.length,
|
|
551
|
+
})) : []),
|
|
552
|
+
...(includeRuns ? snapshot.runs.map((run) => ({
|
|
553
|
+
kind: "run",
|
|
554
|
+
id: run.id,
|
|
555
|
+
createdAt: run.createdAt,
|
|
556
|
+
status: run.status,
|
|
557
|
+
versionId: run.versionId,
|
|
558
|
+
skillName: run.skillName,
|
|
559
|
+
agentName: run.agentName,
|
|
560
|
+
...(run.score !== undefined ? { score: run.score } : {}),
|
|
561
|
+
})) : []),
|
|
562
|
+
].sort((left, right) => right.createdAt.localeCompare(left.createdAt));
|
|
563
|
+
return emitResult("workbench.cli.log.v1", {
|
|
564
|
+
entries: entries,
|
|
565
|
+
}, parsed, io, () => entries.map(formatLogEntry).join("\n") || "No history.");
|
|
566
|
+
}
|
|
567
|
+
async function handleShow(parsed, io) {
|
|
568
|
+
const ref = requiredPositional(parsed, 1, "workbench show requires REF.");
|
|
569
|
+
const session = await showLocalAgentSession(ref);
|
|
570
|
+
if (session) {
|
|
571
|
+
return output(session, parsed, io, () => formatSessionDetail(session));
|
|
572
|
+
}
|
|
573
|
+
const core = await coreOptions(parsed);
|
|
574
|
+
const [objectRef, requestedPath] = splitShowRef(ref);
|
|
575
|
+
if (requestedPath) {
|
|
576
|
+
const runOrJobFile = await fileForRunOrJobRef(core, objectRef, requestedPath);
|
|
577
|
+
if (runOrJobFile) {
|
|
578
|
+
return output(runOrJobFile, parsed, io, () => formatShow(runOrJobFile));
|
|
579
|
+
}
|
|
580
|
+
const value = await showWorkbenchRef(ref, core);
|
|
581
|
+
return output(value, parsed, io, () => formatShow(value));
|
|
557
582
|
}
|
|
558
|
-
|
|
559
|
-
|
|
583
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
584
|
+
const version = snapshotVersionByRef(snapshot, objectRef);
|
|
585
|
+
if (version) {
|
|
586
|
+
return output(fileListing("version", version.id, version.files), parsed, io, () => formatFileListing("version", version.id, version.files));
|
|
560
587
|
}
|
|
561
|
-
|
|
562
|
-
|
|
588
|
+
const trace = snapshotObjectByRef(snapshot.traces, objectRef, "trace");
|
|
589
|
+
if (trace) {
|
|
590
|
+
return output(fileListing("trace", trace.id, trace.files), parsed, io, () => formatFileListing("trace", trace.id, trace.files));
|
|
563
591
|
}
|
|
564
|
-
|
|
565
|
-
|
|
592
|
+
const artifact = snapshotObjectByRef(snapshot.artifacts, objectRef, "artifact");
|
|
593
|
+
if (artifact) {
|
|
594
|
+
return output(fileListing("artifact", artifact.id, artifact.files), parsed, io, () => formatFileListing("artifact", artifact.id, artifact.files));
|
|
566
595
|
}
|
|
567
|
-
|
|
596
|
+
const details = evidenceDetailsForRunOrJob(snapshot, objectRef);
|
|
597
|
+
const evidenceFiles = evidenceFilesForRunOrJob(snapshot, objectRef);
|
|
598
|
+
if (details.length > 0 || evidenceFiles.length > 0) {
|
|
599
|
+
return output({
|
|
600
|
+
details: details,
|
|
601
|
+
files: evidenceFiles.map(fileSummary),
|
|
602
|
+
}, parsed, io, () => formatRunOrJobEvidence(details, evidenceFiles));
|
|
603
|
+
}
|
|
604
|
+
const value = await showWorkbenchRef(ref, core);
|
|
605
|
+
return output(value, parsed, io, () => formatShow(value));
|
|
568
606
|
}
|
|
569
607
|
async function handleAgent(parsed, io) {
|
|
570
|
-
const subcommand = requiredPositional(parsed, 1, "workbench agent requires list|add|
|
|
608
|
+
const subcommand = requiredPositional(parsed, 1, "workbench agent requires list|add|rm.");
|
|
571
609
|
if (subcommand === "list") {
|
|
572
610
|
const agents = await listWorkbenchAgents(await coreOptions(parsed));
|
|
573
611
|
return output(agents, parsed, io, () => agents.map(formatAgent).join("\n") || "No agents.");
|
|
@@ -587,173 +625,65 @@ async function handleAgent(parsed, io) {
|
|
|
587
625
|
});
|
|
588
626
|
return output(agent, parsed, io, () => `Added agent ${formatAgent(agent)}.`);
|
|
589
627
|
}
|
|
590
|
-
if (subcommand === "
|
|
591
|
-
const
|
|
592
|
-
const agent = (await listWorkbenchAgents(await coreOptions(parsed))).find((entry) => entry.name === name);
|
|
593
|
-
if (!agent) {
|
|
594
|
-
throw new WorkbenchCodedError("ref_not_found", `Agent not found: ${name}`, {
|
|
595
|
-
remediation: "Run workbench agent list.",
|
|
596
|
-
subject: { agent: name },
|
|
597
|
-
exitCode: 1,
|
|
598
|
-
});
|
|
599
|
-
}
|
|
600
|
-
return output(agent, parsed, io, () => formatAgent(agent));
|
|
601
|
-
}
|
|
602
|
-
if (subcommand === "default") {
|
|
603
|
-
const result = await setDefaultWorkbenchAgent(requiredPositional(parsed, 2, "workbench agent default requires NAME."), await coreOptions(parsed));
|
|
604
|
-
return output(result, parsed, io, () => `Default agent: ${result.defaultAgent}`);
|
|
605
|
-
}
|
|
606
|
-
if (subcommand === "remove") {
|
|
607
|
-
const result = await removeWorkbenchAgent(requiredPositional(parsed, 2, "workbench agent remove requires NAME."), await coreOptions(parsed));
|
|
628
|
+
if (subcommand === "rm") {
|
|
629
|
+
const result = await removeWorkbenchAgent(requiredPositional(parsed, 2, "workbench agent rm requires NAME."), await coreOptions(parsed));
|
|
608
630
|
return output(result, parsed, io, () => `Removed agent ${result.removed}.`);
|
|
609
631
|
}
|
|
610
632
|
throw new WorkbenchUserError(`Unsupported agent command: ${subcommand}`);
|
|
611
633
|
}
|
|
612
|
-
async function handleSkills(parsed, io) {
|
|
613
|
-
const subcommand = requiredPositional(parsed, 1, "workbench skills requires list.");
|
|
614
|
-
if (subcommand !== "list") {
|
|
615
|
-
throw new WorkbenchUserError(`Unsupported skills command: ${subcommand}`);
|
|
616
|
-
}
|
|
617
|
-
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(await coreOptions(parsed));
|
|
618
|
-
return output(snapshot.skillSources, parsed, io, () => snapshot.skillSources.map((source) => {
|
|
619
|
-
const where = source.kind === "remote"
|
|
620
|
-
? `${source.from}${source.ref ? `#${source.ref}` : ""}`
|
|
621
|
-
: source.kind === "none"
|
|
622
|
-
? "baseline:none"
|
|
623
|
-
: source.path;
|
|
624
|
-
return `${source.name}\t${source.kind}\t${where}\tincludes=${source.includes?.length ?? 0}`;
|
|
625
|
-
}).join("\n") || "No skills.");
|
|
626
|
-
}
|
|
627
634
|
async function handleCase(parsed, io) {
|
|
628
|
-
const subcommand = requiredPositional(parsed, 1, "workbench case requires
|
|
629
|
-
if (subcommand === "list") {
|
|
630
|
-
const cases = await listWorkbenchCases(await coreOptions(parsed));
|
|
631
|
-
return output(cases, parsed, io, () => cases.map((entry) => `${entry.id}\t${entry.path}`).join("\n") || "No cases.");
|
|
632
|
-
}
|
|
635
|
+
const subcommand = requiredPositional(parsed, 1, "workbench case requires add.");
|
|
633
636
|
if (subcommand === "add") {
|
|
634
|
-
const
|
|
635
|
-
|
|
636
|
-
}
|
|
637
|
-
if (subcommand === "show") {
|
|
638
|
-
const record = await showWorkbenchCase(requiredPositional(parsed, 2, "workbench case show requires CASE_ID."), await coreOptions(parsed));
|
|
639
|
-
return output(record, parsed, io, () => record.content);
|
|
640
|
-
}
|
|
641
|
-
if (subcommand === "remove") {
|
|
642
|
-
const result = await removeWorkbenchCase(requiredPositional(parsed, 2, "workbench case remove requires CASE_ID."), await coreOptions(parsed));
|
|
643
|
-
return output(result, parsed, io, () => `Removed case ${result.removed}.`);
|
|
644
|
-
}
|
|
645
|
-
throw new WorkbenchUserError(`Unsupported case command: ${subcommand}`);
|
|
646
|
-
}
|
|
647
|
-
async function handleRemote(parsed, io) {
|
|
648
|
-
const subcommand = requiredPositional(parsed, 1, "workbench remote requires add|list|remove.");
|
|
649
|
-
if (subcommand === "add") {
|
|
650
|
-
const name = requiredFlag(parsed, {
|
|
651
|
-
flag: "name",
|
|
652
|
-
usage: "workbench remote add requires --name NAME.",
|
|
653
|
-
remediation: "Run workbench remote add --name origin --url https://HOST/skills/OWNER/SKILL.",
|
|
654
|
-
});
|
|
655
|
-
const url = requiredFlag(parsed, {
|
|
656
|
-
flag: "url",
|
|
657
|
-
usage: "workbench remote add requires --url URL.",
|
|
658
|
-
remediation: `Run workbench remote add --name ${name} --url https://HOST/skills/OWNER/SKILL.`,
|
|
659
|
-
});
|
|
637
|
+
const core = await coreOptions(parsed);
|
|
638
|
+
const sourceRef = requiredPositional(parsed, 2, "workbench case add requires RUN_ID.");
|
|
660
639
|
rejectExtraInput(parsed, {
|
|
661
|
-
maxPositionals:
|
|
662
|
-
message: "workbench
|
|
663
|
-
remediation: "Run workbench
|
|
664
|
-
});
|
|
665
|
-
const result = await addWorkbenchRemote(name, url, {
|
|
666
|
-
...(await coreOptions(parsed)),
|
|
667
|
-
replace: parsed.flags.replace === true,
|
|
668
|
-
dryRun: parsed.flags["dry-run"] === true,
|
|
640
|
+
maxPositionals: 3,
|
|
641
|
+
message: "workbench case add accepts one RUN_ID argument.",
|
|
642
|
+
remediation: "Run workbench case add RUN_ID.",
|
|
669
643
|
});
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
adapters: cliAuth.adapters,
|
|
714
|
-
required: required,
|
|
715
|
-
}, parsed, io, () => formatAuthStatusList(cliAuth.workbenchCloud, statuses, required));
|
|
716
|
-
}
|
|
717
|
-
if (subcommand === "connect") {
|
|
718
|
-
const targetRaw = requiredPositional(parsed, 2, "workbench auth connect requires ADAPTER[/SLOT].");
|
|
719
|
-
const target = parseAuthTarget(targetRaw, authProfileFlag(parsed));
|
|
720
|
-
const method = authMethod(parsed, target.adapterId);
|
|
721
|
-
const bundle = await collectAdapterAuthBundle({
|
|
722
|
-
target,
|
|
723
|
-
method,
|
|
724
|
-
profileRoot: path.resolve(stringFlag(parsed, "profile-root") ?? os.homedir()),
|
|
725
|
-
});
|
|
726
|
-
const saved = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).put(bundle);
|
|
727
|
-
const remote = await uploadAdapterConnection(saved, parsed);
|
|
728
|
-
return emitResult("workbench.cli.auth-connect.v1", {
|
|
729
|
-
localAdapter: {
|
|
730
|
-
adapter: saved.adapterId,
|
|
731
|
-
...(saved.slot ? { slot: saved.slot } : {}),
|
|
732
|
-
profile: saved.profile,
|
|
733
|
-
method: saved.method,
|
|
734
|
-
status: saved.status,
|
|
735
|
-
version: saved.version,
|
|
736
|
-
updatedAt: saved.updatedAt,
|
|
737
|
-
},
|
|
738
|
-
workbenchCloud: remote,
|
|
739
|
-
}, parsed, io, () => `Connected ${formatAuthTarget(saved)} ${saved.method} auth v${saved.version}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
|
|
740
|
-
}
|
|
741
|
-
if (subcommand === "disconnect") {
|
|
742
|
-
const targetRaw = requiredPositional(parsed, 2, "workbench auth disconnect requires ADAPTER[/SLOT].");
|
|
743
|
-
const target = parseAuthTarget(targetRaw, authProfileFlag(parsed));
|
|
744
|
-
await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).disconnect(target);
|
|
745
|
-
const remote = await deleteAdapterConnectionRemote(target, parsed);
|
|
746
|
-
return emitResult("workbench.cli.auth-disconnect.v1", {
|
|
747
|
-
localAdapter: {
|
|
748
|
-
adapter: target.adapterId,
|
|
749
|
-
...(target.slot ? { slot: target.slot } : {}),
|
|
750
|
-
profile: target.profile,
|
|
751
|
-
status: "disconnected",
|
|
752
|
-
},
|
|
753
|
-
workbenchCloud: remote,
|
|
754
|
-
}, parsed, io, () => `Disconnected ${formatAuthTarget(target)}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
|
|
755
|
-
}
|
|
756
|
-
throw new WorkbenchUserError(`Unsupported auth command: ${subcommand}`);
|
|
644
|
+
const record = await addWorkbenchCase({ ...core, fromTraceId: await traceIdForCaseSource(core, sourceRef) });
|
|
645
|
+
return output(record, parsed, io, () => `Added draft case ${record.id}. Edit .workbench/cases/${record.path}/case.yaml before using it as score evidence.`);
|
|
646
|
+
}
|
|
647
|
+
throw new WorkbenchUserError(`Unknown command: workbench case ${subcommand}`);
|
|
648
|
+
}
|
|
649
|
+
async function handleAdapterLogin(provider, parsed, io) {
|
|
650
|
+
const target = parseAuthTarget(provider, authProfileFlag(parsed));
|
|
651
|
+
const method = authMethod(parsed, target.adapterId);
|
|
652
|
+
const bundle = await collectAdapterAuthBundle({
|
|
653
|
+
target,
|
|
654
|
+
method,
|
|
655
|
+
profileRoot: path.resolve(stringFlag(parsed, "profile-root") ?? os.homedir()),
|
|
656
|
+
});
|
|
657
|
+
const saved = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).put(bundle);
|
|
658
|
+
const remote = await uploadAdapterConnection(saved, parsed);
|
|
659
|
+
return emitResult("workbench.cli.login.v1", {
|
|
660
|
+
provider: saved.adapterId,
|
|
661
|
+
localAdapter: {
|
|
662
|
+
adapter: saved.adapterId,
|
|
663
|
+
...(saved.slot ? { slot: saved.slot } : {}),
|
|
664
|
+
profile: saved.profile,
|
|
665
|
+
method: saved.method,
|
|
666
|
+
status: saved.status,
|
|
667
|
+
version: saved.version,
|
|
668
|
+
updatedAt: saved.updatedAt,
|
|
669
|
+
},
|
|
670
|
+
workbenchCloud: remote,
|
|
671
|
+
}, parsed, io, () => `Connected ${formatAuthTarget(saved)} ${saved.method} auth v${saved.version}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
|
|
672
|
+
}
|
|
673
|
+
async function handleAdapterLogout(provider, parsed, io) {
|
|
674
|
+
const target = parseAuthTarget(provider, authProfileFlag(parsed));
|
|
675
|
+
await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).disconnect(target);
|
|
676
|
+
const remote = await deleteAdapterConnectionRemote(target, parsed);
|
|
677
|
+
return emitResult("workbench.cli.logout.v1", {
|
|
678
|
+
provider: target.adapterId,
|
|
679
|
+
localAdapter: {
|
|
680
|
+
adapter: target.adapterId,
|
|
681
|
+
...(target.slot ? { slot: target.slot } : {}),
|
|
682
|
+
profile: target.profile,
|
|
683
|
+
status: "disconnected",
|
|
684
|
+
},
|
|
685
|
+
workbenchCloud: remote,
|
|
686
|
+
}, parsed, io, () => `Disconnected ${formatAuthTarget(target)}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
|
|
757
687
|
}
|
|
758
688
|
function getCliVersion() {
|
|
759
689
|
const manifest = require("../package.json");
|
|
@@ -763,19 +693,17 @@ function commandHelp(command) {
|
|
|
763
693
|
return COMMAND_HELP[command] ?? HELP;
|
|
764
694
|
}
|
|
765
695
|
function validateCommandFlags(parsed, command) {
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
}
|
|
769
|
-
const allowed = allowedFlagsForCommand(parsed, command);
|
|
696
|
+
const effectiveCommand = command ?? (parsed.flags.version === true ? "version" : "status");
|
|
697
|
+
const allowed = allowedFlagsForCommand(parsed, effectiveCommand);
|
|
770
698
|
if (!allowed) {
|
|
771
699
|
return;
|
|
772
700
|
}
|
|
773
|
-
const allowedSet = new Set(allowed);
|
|
701
|
+
const allowedSet = new Set(Object.keys(allowed));
|
|
774
702
|
for (const [name, value] of Object.entries(parsed.flags)) {
|
|
775
|
-
if (!allowedSet.has(name)
|
|
776
|
-
throw new WorkbenchUserError(`Unsupported flag --${name} for workbench ${
|
|
703
|
+
if (!allowedSet.has(name)) {
|
|
704
|
+
throw new WorkbenchUserError(`Unsupported flag --${name} for workbench ${effectiveCommand}.`);
|
|
777
705
|
}
|
|
778
|
-
validateFlagValue(name, value,
|
|
706
|
+
validateFlagValue(name, value, allowed[name]);
|
|
779
707
|
}
|
|
780
708
|
}
|
|
781
709
|
function allowedFlagsForCommand(parsed, command) {
|
|
@@ -784,25 +712,12 @@ function allowedFlagsForCommand(parsed, command) {
|
|
|
784
712
|
return COMMAND_FLAGS[command];
|
|
785
713
|
}
|
|
786
714
|
const subcommand = parsed.positionals[1] ?? subcommands.defaultSubcommand;
|
|
787
|
-
return subcommand ? subcommands.flags[subcommand] ??
|
|
715
|
+
return subcommand ? subcommands.flags[subcommand] ?? { ...COMMON_FLAGS, ...HELP_FLAG } : { ...COMMON_FLAGS, ...HELP_FLAG };
|
|
788
716
|
}
|
|
789
|
-
function validateFlagValue(name, value,
|
|
790
|
-
const kind = FLAG_DEFINITIONS[name];
|
|
717
|
+
function validateFlagValue(name, value, kind) {
|
|
791
718
|
if (!kind) {
|
|
792
719
|
return;
|
|
793
720
|
}
|
|
794
|
-
if (repeatString) {
|
|
795
|
-
if (Array.isArray(value)) {
|
|
796
|
-
if (value.some((entry) => !entry.trim())) {
|
|
797
|
-
throw new WorkbenchUserError(`--${name} requires a non-empty value.`);
|
|
798
|
-
}
|
|
799
|
-
return;
|
|
800
|
-
}
|
|
801
|
-
if (typeof value === "string" && value.trim()) {
|
|
802
|
-
return;
|
|
803
|
-
}
|
|
804
|
-
throw new WorkbenchUserError(`--${name} requires a non-empty value.`);
|
|
805
|
-
}
|
|
806
721
|
if (kind === "boolean") {
|
|
807
722
|
if (value !== true) {
|
|
808
723
|
throw new WorkbenchUserError(`--${name} does not accept a value.`);
|
|
@@ -826,15 +741,28 @@ function validateFlagValue(name, value, repeatString = false) {
|
|
|
826
741
|
}
|
|
827
742
|
}
|
|
828
743
|
const CONFIG_SCHEMA = "workbench.cli.config.v1";
|
|
744
|
+
const DEFAULT_WORKBENCH_CLOUD_BASE_URL = "https://v2.workbench.ai";
|
|
829
745
|
const API_REQUEST_MAX_ATTEMPTS = 3;
|
|
830
746
|
const API_REQUEST_GZIP_THRESHOLD_BYTES = 1024 * 1024;
|
|
747
|
+
const CLOUD_RUN_TIMEOUT_MS = 30 * 60 * 1000;
|
|
748
|
+
const CLOUD_RUN_POLL_INTERVAL_MS = 3000;
|
|
831
749
|
async function handleLogin(parsed, io) {
|
|
832
|
-
|
|
833
|
-
|
|
750
|
+
const provider = optionalPositional(parsed, 1);
|
|
751
|
+
if (provider) {
|
|
752
|
+
if (parsed.positionals.length > 2) {
|
|
753
|
+
throw new WorkbenchUserError("workbench login PROVIDER accepts only one provider argument.");
|
|
754
|
+
}
|
|
755
|
+
if (parsed.flags["start-only"] === true || parsed.flags.wait === true || parsed.flags.timeout !== undefined || parsed.flags["no-open"] === true) {
|
|
756
|
+
throw new WorkbenchCodedError("usage", "Workbench Cloud login flags do not apply to provider login.", {
|
|
757
|
+
remediation: `Run workbench login ${provider} --method ${authMethod(parsed, provider)}.`,
|
|
758
|
+
exitCode: 2,
|
|
759
|
+
});
|
|
760
|
+
}
|
|
761
|
+
return await handleAdapterLogin(provider, parsed, io);
|
|
834
762
|
}
|
|
835
763
|
if (parsed.flags["start-only"] === true && parsed.flags.wait === true) {
|
|
836
764
|
throw new WorkbenchCodedError("usage", "workbench login accepts only one of --start-only or --wait.", {
|
|
837
|
-
remediation: "Run workbench login --start-only or workbench login --wait
|
|
765
|
+
remediation: "Run workbench login --start-only or workbench login --wait.",
|
|
838
766
|
exitCode: 2,
|
|
839
767
|
});
|
|
840
768
|
}
|
|
@@ -843,22 +771,17 @@ async function handleLogin(parsed, io) {
|
|
|
843
771
|
const timeoutSeconds = intFlag(parsed, "timeout");
|
|
844
772
|
if (startOnly && timeoutSeconds !== undefined) {
|
|
845
773
|
throw new WorkbenchCodedError("usage", "workbench login --timeout only applies with --wait.", {
|
|
846
|
-
remediation: "Run workbench login --start-only, then workbench login --wait
|
|
847
|
-
exitCode: 2,
|
|
848
|
-
});
|
|
849
|
-
}
|
|
850
|
-
if (waitOnly && timeoutSeconds === undefined) {
|
|
851
|
-
throw new WorkbenchCodedError("usage", "workbench login --wait requires --timeout N.", {
|
|
852
|
-
remediation: "Run workbench login --wait --timeout 120.",
|
|
774
|
+
remediation: "Run workbench login --start-only, then workbench login --wait.",
|
|
853
775
|
exitCode: 2,
|
|
854
776
|
});
|
|
855
777
|
}
|
|
856
778
|
const config = await loadConfig();
|
|
857
|
-
const
|
|
858
|
-
|
|
779
|
+
const explicitBaseUrl = stringFlag(parsed, "base-url");
|
|
780
|
+
const pending = waitOnly ? await readPendingDeviceAuthorization(explicitBaseUrl) : null;
|
|
781
|
+
const baseUrl = pending?.baseUrl ?? selectWorkbenchBaseUrl({
|
|
782
|
+
explicitBaseUrl,
|
|
859
783
|
configBaseUrl: config.baseUrl,
|
|
860
784
|
});
|
|
861
|
-
const pending = waitOnly ? await readPendingDeviceAuthorization(baseUrl) : null;
|
|
862
785
|
const record = pending ?? await startDeviceAuthorization(baseUrl);
|
|
863
786
|
const freshAuthorization = pending === null;
|
|
864
787
|
if (startOnly) {
|
|
@@ -873,8 +796,8 @@ async function handleLogin(parsed, io) {
|
|
|
873
796
|
verificationUriComplete: record.verification_uri_complete,
|
|
874
797
|
userCode: record.user_code,
|
|
875
798
|
expiresAt: record.expiresAt,
|
|
876
|
-
resume: "workbench login --wait
|
|
877
|
-
}, parsed, io, () => `Open ${record.verification_uri_complete}\nCode: ${record.user_code}\nResume: workbench login --wait
|
|
799
|
+
resume: "workbench login --wait",
|
|
800
|
+
}, parsed, io, () => `Open ${record.verification_uri_complete}\nCode: ${record.user_code}\nResume: workbench login --wait`);
|
|
878
801
|
}
|
|
879
802
|
await writePendingDeviceAuthorization(record);
|
|
880
803
|
if (freshAuthorization && !parsed.flags.json) {
|
|
@@ -911,15 +834,16 @@ async function handleLogin(parsed, io) {
|
|
|
911
834
|
}, parsed, io, () => `Workbench Cloud: authenticated${username ? ` as ${username}` : ""}\nWorkbench API: ${baseUrl}`);
|
|
912
835
|
}
|
|
913
836
|
async function handleLogout(parsed, io) {
|
|
914
|
-
|
|
915
|
-
|
|
837
|
+
const provider = optionalPositional(parsed, 1);
|
|
838
|
+
if (provider) {
|
|
839
|
+
if (parsed.positionals.length > 2) {
|
|
840
|
+
throw new WorkbenchUserError("workbench logout PROVIDER accepts only one provider argument.");
|
|
841
|
+
}
|
|
842
|
+
return await handleAdapterLogout(provider, parsed, io);
|
|
916
843
|
}
|
|
917
844
|
const config = await loadConfig();
|
|
918
845
|
const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
|
|
919
846
|
const tokenPresent = Boolean(config.accessToken);
|
|
920
|
-
if (tokenPresent && !baseUrl) {
|
|
921
|
-
throw new WorkbenchUserError("Missing Workbench API URL. Set WORKBENCH_API_URL or run `workbench login --base-url URL`.");
|
|
922
|
-
}
|
|
923
847
|
let revoke = "skipped";
|
|
924
848
|
if (config.accessToken && baseUrl) {
|
|
925
849
|
try {
|
|
@@ -950,51 +874,33 @@ async function handleLogout(parsed, io) {
|
|
|
950
874
|
`Logged out of Workbench${baseUrl ? ` (${baseUrl})` : ""}.`,
|
|
951
875
|
`Token: ${tokenPresent ? "present" : "absent"}; revoke ${revoke}; config ${configRemoved ? "removed" : "unchanged"}.`,
|
|
952
876
|
adapterAuthRetained
|
|
953
|
-
? "Local adapter auth records were retained; run workbench
|
|
877
|
+
? "Local adapter auth records were retained; run workbench logout PROVIDER to remove them."
|
|
954
878
|
: "No local adapter auth records remain.",
|
|
955
879
|
].join("\n"));
|
|
956
880
|
}
|
|
957
881
|
async function handleInstall(parsed, io) {
|
|
958
|
-
const
|
|
959
|
-
flag: "source",
|
|
960
|
-
usage: "workbench install requires --source SOURCE.",
|
|
961
|
-
remediation: "Run workbench install --source https://HOST/skills/OWNER/SKILL --agent codex.",
|
|
962
|
-
});
|
|
882
|
+
const sourceInput = requiredPositional(parsed, 1, "workbench install requires HANDLE_OR_URL.");
|
|
963
883
|
rejectExtraInput(parsed, {
|
|
964
|
-
maxPositionals:
|
|
965
|
-
message: "workbench install accepts
|
|
966
|
-
remediation: "Run workbench install
|
|
884
|
+
maxPositionals: 2,
|
|
885
|
+
message: "workbench install accepts one HANDLE_OR_URL argument.",
|
|
886
|
+
remediation: "Run workbench install OWNER/SKILL --to codex.",
|
|
967
887
|
});
|
|
968
|
-
|
|
969
|
-
throw new WorkbenchCodedError("install_target_required", "workbench install requires an explicit target.", {
|
|
970
|
-
remediation: "Run workbench install --source SOURCE --agent codex, workbench install --source SOURCE --agent claude, or workbench install --source SOURCE --local.",
|
|
971
|
-
exitCode: 2,
|
|
972
|
-
});
|
|
973
|
-
}
|
|
888
|
+
const source = await resolveWorkbenchInstallSourceInput(sourceInput);
|
|
974
889
|
const workbenchSource = parseWorkbenchInstallSource(source);
|
|
975
890
|
if (!workbenchSource) {
|
|
976
891
|
throw new WorkbenchCodedError("usage", "workbench install requires a Workbench Cloud source URL.", {
|
|
977
|
-
remediation: "Run workbench install
|
|
892
|
+
remediation: "Run workbench install OWNER/SKILL --to codex.",
|
|
978
893
|
exitCode: 2,
|
|
979
894
|
});
|
|
980
895
|
}
|
|
981
896
|
const snapshot = await fetchWorkbenchInstallSourceSnapshot(workbenchSource, source);
|
|
982
897
|
const sourceSummary = workbenchInstallSourceSummary(workbenchSource, snapshot);
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
skills: [snapshot.name],
|
|
987
|
-
fileCount: snapshot.files.length,
|
|
988
|
-
targets: installTargetsToJson(supportedInstallTargets()),
|
|
989
|
-
}, parsed, io, () => [
|
|
990
|
-
`${snapshot.name}\t${snapshot.versionId}\tfiles=${snapshot.files.length}`,
|
|
991
|
-
"Targets:",
|
|
992
|
-
...supportedInstallTargets().map((target) => ` ${target.agent}\t${target.destination}`),
|
|
993
|
-
].join("\n"));
|
|
994
|
-
}
|
|
898
|
+
const config = await loadConfig();
|
|
899
|
+
const toTargets = stringsFlag(parsed, "to");
|
|
900
|
+
const selectedTargets = toTargets.length > 0 ? normalizeInstallTargetNames(toTargets) : await defaultInstallTargetNames(config);
|
|
995
901
|
const targets = resolveInstallTargets({
|
|
996
|
-
agents:
|
|
997
|
-
local:
|
|
902
|
+
agents: selectedTargets.filter((target) => target !== "local"),
|
|
903
|
+
local: selectedTargets.some((target) => target === "local"),
|
|
998
904
|
skillName: snapshot.name,
|
|
999
905
|
});
|
|
1000
906
|
const result = await installSnapshotToTargets({
|
|
@@ -1003,6 +909,9 @@ async function handleInstall(parsed, io) {
|
|
|
1003
909
|
overwrite: parsed.flags.yes === true,
|
|
1004
910
|
dryRun: parsed.flags["dry-run"] === true,
|
|
1005
911
|
});
|
|
912
|
+
if (toTargets.length > 0 && parsed.flags["dry-run"] !== true) {
|
|
913
|
+
await writeConfig({ ...config, installTargets: selectedTargets });
|
|
914
|
+
}
|
|
1006
915
|
return emitResult("workbench.cli.install.v1", {
|
|
1007
916
|
source: sourceSummary,
|
|
1008
917
|
result: result.result,
|
|
@@ -1016,6 +925,327 @@ async function handleInstall(parsed, io) {
|
|
|
1016
925
|
...result.targets.map((target) => ` ${target.agent}\t${target.previous}\t${target.destination}`),
|
|
1017
926
|
].join("\n"));
|
|
1018
927
|
}
|
|
928
|
+
async function handleCloudEval(parsed, io) {
|
|
929
|
+
const started = await startCloudExecution("eval", parsed);
|
|
930
|
+
const artifactIds = await artifactIdsByRunId(started.core, started.runs);
|
|
931
|
+
const failedRuns = started.runs.filter((run) => run.status === "failed" || run.status === "canceled");
|
|
932
|
+
if (failedRuns.length > 0) {
|
|
933
|
+
return emitEvalFailure(started.runs, failedRuns, artifactIds, parsed, io);
|
|
934
|
+
}
|
|
935
|
+
const deltas = await evalDeltas(started.core, started.runs);
|
|
936
|
+
const next = await evalSuccessNextCommand(started.core, started.runs);
|
|
937
|
+
return emitResult("workbench.cli.eval.v1", {
|
|
938
|
+
result: started.runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
|
|
939
|
+
deltas: deltas,
|
|
940
|
+
next: next,
|
|
941
|
+
cloud: cloudExecutionSummary(started),
|
|
942
|
+
}, parsed, io, () => [
|
|
943
|
+
`Completed hosted eval on ${started.remote.url}.`,
|
|
944
|
+
started.runs.map(formatRun).join("\n"),
|
|
945
|
+
...deltas.map(formatEvalDelta),
|
|
946
|
+
...(next ? [`next: ${next}`] : []),
|
|
947
|
+
].filter(Boolean).join("\n"));
|
|
948
|
+
}
|
|
949
|
+
async function handleCloudImprove(parsed, io) {
|
|
950
|
+
const started = await startCloudExecution("improve", parsed);
|
|
951
|
+
const artifactIds = await artifactIdsByRunId(started.core, started.runs);
|
|
952
|
+
const failedRuns = started.runs.filter((run) => run.status === "failed" || run.status === "canceled");
|
|
953
|
+
if (failedRuns.length > 0) {
|
|
954
|
+
const first = failedRuns[0];
|
|
955
|
+
throw new WorkbenchCodedError("improve_failed", "Hosted improve failed; evidence was saved.", {
|
|
956
|
+
remediation: `Run workbench show ${first.id}.`,
|
|
957
|
+
subject: {
|
|
958
|
+
runIds: failedRuns.map((run) => run.id),
|
|
959
|
+
statuses: Object.fromEntries(failedRuns.map((run) => [run.id, run.status])),
|
|
960
|
+
},
|
|
961
|
+
exitCode: 1,
|
|
962
|
+
});
|
|
963
|
+
}
|
|
964
|
+
const switchedVersionId = await switchHostedImproveVersionIfPromoted(started);
|
|
965
|
+
const next = cloudImproveNextCommand(started.runs);
|
|
966
|
+
return emitResult("workbench.cli.improve.v1", {
|
|
967
|
+
result: started.runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
|
|
968
|
+
next: next,
|
|
969
|
+
cloud: cloudExecutionSummary(started),
|
|
970
|
+
...(switchedVersionId ? { switchedVersionId } : {}),
|
|
971
|
+
}, parsed, io, () => [
|
|
972
|
+
`Completed hosted improve on ${started.remote.url}.`,
|
|
973
|
+
started.runs.map(formatRun).join("\n"),
|
|
974
|
+
...(switchedVersionId ? [`Switched local source to ${displayRef(switchedVersionId)}.`] : []),
|
|
975
|
+
...(next ? [`next: ${next}`] : []),
|
|
976
|
+
].filter(Boolean).join("\n"));
|
|
977
|
+
}
|
|
978
|
+
async function defaultInstallTargetNames(config) {
|
|
979
|
+
if (config.installTargets && config.installTargets.length > 0) {
|
|
980
|
+
return config.installTargets;
|
|
981
|
+
}
|
|
982
|
+
const detected = [];
|
|
983
|
+
for (const target of supportedInstallTargets()) {
|
|
984
|
+
if (target.agent === "local") {
|
|
985
|
+
continue;
|
|
986
|
+
}
|
|
987
|
+
const home = path.dirname(path.dirname(target.destination));
|
|
988
|
+
if (await pathExists(home)) {
|
|
989
|
+
detected.push(target.agent);
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
return detected.length > 0 ? detected : ["local"];
|
|
993
|
+
}
|
|
994
|
+
function normalizeInstallTargetNames(values) {
|
|
995
|
+
const normalized = [];
|
|
996
|
+
for (const value of values) {
|
|
997
|
+
const target = value.trim().toLowerCase();
|
|
998
|
+
if (target !== "codex" && target !== "claude" && target !== "local") {
|
|
999
|
+
throw new WorkbenchCodedError("usage", `Unsupported install target: ${value}`, {
|
|
1000
|
+
remediation: "Use --to codex, --to claude, or --to local.",
|
|
1001
|
+
exitCode: 2,
|
|
1002
|
+
});
|
|
1003
|
+
}
|
|
1004
|
+
normalized.push(target);
|
|
1005
|
+
}
|
|
1006
|
+
return [...new Set(normalized)];
|
|
1007
|
+
}
|
|
1008
|
+
async function pathExists(filePath) {
|
|
1009
|
+
try {
|
|
1010
|
+
await fs.access(filePath);
|
|
1011
|
+
return true;
|
|
1012
|
+
}
|
|
1013
|
+
catch {
|
|
1014
|
+
return false;
|
|
1015
|
+
}
|
|
1016
|
+
}
|
|
1017
|
+
async function startCloudExecution(command, parsed) {
|
|
1018
|
+
const root = dirFlag(parsed) ?? process.cwd();
|
|
1019
|
+
const remote = await ensureCloudRemoteForExecution(root, parsed);
|
|
1020
|
+
const source = parseWorkbenchInstallSource(remote.url);
|
|
1021
|
+
if (!source) {
|
|
1022
|
+
throw new WorkbenchCodedError("remote_invalid_url", `Workbench remote is not a Cloud skill URL: ${remote.url}`, {
|
|
1023
|
+
remediation: "Run workbench publish to recreate the Workbench Cloud link.",
|
|
1024
|
+
subject: { remote: remote.name, url: remote.url },
|
|
1025
|
+
exitCode: 2,
|
|
1026
|
+
});
|
|
1027
|
+
}
|
|
1028
|
+
const token = await workbenchCloudToken({ baseUrl: source.baseUrl });
|
|
1029
|
+
if (!token) {
|
|
1030
|
+
throw new WorkbenchCodedError("auth_required", `workbench ${command} --cloud requires Workbench Cloud auth.`, {
|
|
1031
|
+
remediation: `Run workbench login --base-url ${source.baseUrl}.`,
|
|
1032
|
+
exitCode: 1,
|
|
1033
|
+
});
|
|
1034
|
+
}
|
|
1035
|
+
const core = { dir: root, authToken: token };
|
|
1036
|
+
const syncBefore = await syncWorkbenchRemote({ ...core, remote: remote.name });
|
|
1037
|
+
const startSnapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
1038
|
+
const skillId = await resolveCloudSkillId(source);
|
|
1039
|
+
const response = await apiRequest(`/api/workbench/skills/${encodeURIComponent(skillId)}${command === "improve" ? "/improve" : "/runs"}`, { method: "POST", body: cloudExecutionRequestBody(command, parsed) }, source.baseUrl);
|
|
1040
|
+
const runs = response.runs ?? [];
|
|
1041
|
+
if (runs.length === 0) {
|
|
1042
|
+
throw new WorkbenchCodedError("cloud_run_missing", `Workbench Cloud did not return a run for ${command}.`, {
|
|
1043
|
+
retryable: true,
|
|
1044
|
+
remediation: "Run workbench log --runs.",
|
|
1045
|
+
subject: { remote: remote.name, skillId },
|
|
1046
|
+
exitCode: 1,
|
|
1047
|
+
});
|
|
1048
|
+
}
|
|
1049
|
+
const initialSyncAfter = await syncWorkbenchRemote({ ...core, remote: remote.name });
|
|
1050
|
+
const completed = await waitForCloudRuns({
|
|
1051
|
+
core,
|
|
1052
|
+
remote,
|
|
1053
|
+
runs,
|
|
1054
|
+
initialSync: initialSyncAfter,
|
|
1055
|
+
});
|
|
1056
|
+
return {
|
|
1057
|
+
core,
|
|
1058
|
+
remote,
|
|
1059
|
+
skillId,
|
|
1060
|
+
runs: completed.runs,
|
|
1061
|
+
startVersionId: startSnapshot.status.currentVersionId ?? startSnapshot.refs.current,
|
|
1062
|
+
source,
|
|
1063
|
+
sync: {
|
|
1064
|
+
before: { pushed: syncBefore.pushed, pulled: syncBefore.pulled, upToDate: syncBefore.upToDate },
|
|
1065
|
+
after: { pushed: completed.sync.pushed, pulled: completed.sync.pulled, upToDate: completed.sync.upToDate },
|
|
1066
|
+
},
|
|
1067
|
+
};
|
|
1068
|
+
}
|
|
1069
|
+
async function waitForCloudRuns(input) {
|
|
1070
|
+
const runIds = input.runs
|
|
1071
|
+
.map((run) => run.id)
|
|
1072
|
+
.filter((id) => typeof id === "string" && id.length > 0);
|
|
1073
|
+
if (runIds.length === 0 || runIds.length !== input.runs.length) {
|
|
1074
|
+
throw new WorkbenchCodedError("cloud_run_missing", "Workbench Cloud did not return a run id.", {
|
|
1075
|
+
retryable: true,
|
|
1076
|
+
remediation: "Run workbench log --runs.",
|
|
1077
|
+
exitCode: 1,
|
|
1078
|
+
});
|
|
1079
|
+
}
|
|
1080
|
+
let sync = input.initialSync;
|
|
1081
|
+
const timeoutMs = positiveIntEnv("WORKBENCH_CLOUD_RUN_TIMEOUT_MS") ?? CLOUD_RUN_TIMEOUT_MS;
|
|
1082
|
+
const pollIntervalMs = positiveIntEnv("WORKBENCH_CLOUD_RUN_POLL_INTERVAL_MS") ?? CLOUD_RUN_POLL_INTERVAL_MS;
|
|
1083
|
+
const deadline = Date.now() + timeoutMs;
|
|
1084
|
+
while (true) {
|
|
1085
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(input.core);
|
|
1086
|
+
const runs = runIds
|
|
1087
|
+
.map((id) => snapshot.runs.find((entry) => entry.id === id))
|
|
1088
|
+
.filter((run) => Boolean(run));
|
|
1089
|
+
if (runs.length === runIds.length && runs.every(isTerminalRun)) {
|
|
1090
|
+
return { runs, sync };
|
|
1091
|
+
}
|
|
1092
|
+
if (Date.now() >= deadline) {
|
|
1093
|
+
throw new WorkbenchCodedError("cloud_run_pending", "Hosted Workbench run is still running.", {
|
|
1094
|
+
retryable: true,
|
|
1095
|
+
remediation: runIds[0] ? `Run workbench show ${runIds[0]}.` : "Run workbench log --runs.",
|
|
1096
|
+
subject: {
|
|
1097
|
+
runIds,
|
|
1098
|
+
statuses: Object.fromEntries(runs.map((run) => [run.id, run.status])),
|
|
1099
|
+
},
|
|
1100
|
+
exitCode: 1,
|
|
1101
|
+
});
|
|
1102
|
+
}
|
|
1103
|
+
await sleep(pollIntervalMs);
|
|
1104
|
+
sync = await syncWorkbenchRemote({ ...input.core, remote: input.remote.name });
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
function isTerminalRun(run) {
|
|
1108
|
+
return run.status === "succeeded" || run.status === "failed" || run.status === "canceled";
|
|
1109
|
+
}
|
|
1110
|
+
async function switchHostedImproveVersionIfPromoted(started) {
|
|
1111
|
+
const outputVersionId = started.runs.find((run) => run.status === "succeeded" && run.outputVersionId)?.outputVersionId;
|
|
1112
|
+
if (!outputVersionId) {
|
|
1113
|
+
return undefined;
|
|
1114
|
+
}
|
|
1115
|
+
const refs = await fetchCloudObjectRefs(started);
|
|
1116
|
+
if (refs.current !== outputVersionId) {
|
|
1117
|
+
return undefined;
|
|
1118
|
+
}
|
|
1119
|
+
await listWorkbenchVersions(started.core);
|
|
1120
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(started.core);
|
|
1121
|
+
const currentVersionId = snapshot.status.currentVersionId ?? snapshot.refs.current;
|
|
1122
|
+
if (started.startVersionId && currentVersionId && currentVersionId !== started.startVersionId) {
|
|
1123
|
+
throw new WorkbenchCodedError("worktree_changed", "Local source changed while hosted improve was running; refusing to overwrite it.", {
|
|
1124
|
+
remediation: `Review workbench diff, then run workbench switch ${outputVersionId} when ready.`,
|
|
1125
|
+
subject: {
|
|
1126
|
+
startedFrom: started.startVersionId,
|
|
1127
|
+
current: currentVersionId,
|
|
1128
|
+
hostedVersion: outputVersionId,
|
|
1129
|
+
},
|
|
1130
|
+
exitCode: 1,
|
|
1131
|
+
});
|
|
1132
|
+
}
|
|
1133
|
+
const version = await switchWorkbenchVersion(outputVersionId, started.core);
|
|
1134
|
+
return version.id;
|
|
1135
|
+
}
|
|
1136
|
+
async function fetchCloudObjectRefs(started) {
|
|
1137
|
+
const response = await apiRequest(`/api/workbench/skills/${encodeURIComponent(started.skillId)}/objects`, {}, started.source.baseUrl);
|
|
1138
|
+
return response.objectPack?.refs ?? {};
|
|
1139
|
+
}
|
|
1140
|
+
async function ensureCloudRemoteForExecution(root, parsed) {
|
|
1141
|
+
const linked = await linkedCloudRemote(root);
|
|
1142
|
+
if (linked) {
|
|
1143
|
+
return linked;
|
|
1144
|
+
}
|
|
1145
|
+
const link = await cloudRemoteLinkTarget(root);
|
|
1146
|
+
const remote = await derivePublishCloudRemote(parsed, "workbench --cloud", link.name);
|
|
1147
|
+
const source = parseWorkbenchInstallSource(remote.url);
|
|
1148
|
+
if (!source) {
|
|
1149
|
+
throw new WorkbenchCodedError("remote_invalid_url", `Workbench remote is not a Cloud skill URL: ${remote.url}`, {
|
|
1150
|
+
remediation: "Run workbench publish to recreate the Workbench Cloud link.",
|
|
1151
|
+
subject: { remote: remote.name, url: remote.url },
|
|
1152
|
+
exitCode: 2,
|
|
1153
|
+
});
|
|
1154
|
+
}
|
|
1155
|
+
const token = await workbenchCloudToken({ baseUrl: source.baseUrl });
|
|
1156
|
+
if (!token) {
|
|
1157
|
+
throw new WorkbenchCodedError("auth_required", "workbench --cloud requires Workbench Cloud auth.", {
|
|
1158
|
+
remediation: `Run workbench login --base-url ${source.baseUrl}.`,
|
|
1159
|
+
exitCode: 1,
|
|
1160
|
+
});
|
|
1161
|
+
}
|
|
1162
|
+
const result = await addWorkbenchRemote(remote.name, remote.url, {
|
|
1163
|
+
dir: root,
|
|
1164
|
+
authToken: token,
|
|
1165
|
+
replace: link.replace,
|
|
1166
|
+
});
|
|
1167
|
+
return result.remote;
|
|
1168
|
+
}
|
|
1169
|
+
async function linkedCloudRemote(root) {
|
|
1170
|
+
return preferredCloudRemote(await inspectionRemotes(root)) ?? null;
|
|
1171
|
+
}
|
|
1172
|
+
async function inspectionRemotes(root) {
|
|
1173
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot({ dir: root }).catch((error) => {
|
|
1174
|
+
if (error instanceof WorkbenchCodedError || error instanceof WorkbenchUserError) {
|
|
1175
|
+
return null;
|
|
1176
|
+
}
|
|
1177
|
+
throw error;
|
|
1178
|
+
});
|
|
1179
|
+
return snapshot?.remotes ?? [];
|
|
1180
|
+
}
|
|
1181
|
+
async function cloudRemoteLinkTarget(root) {
|
|
1182
|
+
return cloudRemoteLinkTargetFromRemotes(await inspectionRemotes(root));
|
|
1183
|
+
}
|
|
1184
|
+
function cloudRemoteLinkTargetFromRemotes(remotes) {
|
|
1185
|
+
const existing = preferredCloudRemote(remotes);
|
|
1186
|
+
if (existing) {
|
|
1187
|
+
return { name: existing.name, replace: true, existing };
|
|
1188
|
+
}
|
|
1189
|
+
return { name: availableCloudRemoteName(remotes), replace: false };
|
|
1190
|
+
}
|
|
1191
|
+
function preferredCloudRemote(remotes) {
|
|
1192
|
+
const cloudRemotes = remotes.filter((remote) => remote.kind === "workbench-cloud");
|
|
1193
|
+
return cloudRemotes.find((remote) => remote.name === "cloud") ?? cloudRemotes[0];
|
|
1194
|
+
}
|
|
1195
|
+
function availableCloudRemoteName(remotes) {
|
|
1196
|
+
const names = new Set(remotes.map((remote) => remote.name));
|
|
1197
|
+
if (!names.has("cloud")) {
|
|
1198
|
+
return "cloud";
|
|
1199
|
+
}
|
|
1200
|
+
for (let index = 1;; index += 1) {
|
|
1201
|
+
const name = `cloud-${index}`;
|
|
1202
|
+
if (!names.has(name)) {
|
|
1203
|
+
return name;
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1207
|
+
async function resolveCloudSkillId(source) {
|
|
1208
|
+
const listed = await apiRequest("/api/workbench/skills", {}, source.baseUrl);
|
|
1209
|
+
const skill = listed.skills?.find((entry) => entry.ownerSlug === source.owner && entry.name === source.skill);
|
|
1210
|
+
if (!skill?.id) {
|
|
1211
|
+
throw new WorkbenchCodedError("remote_not_found", `Workbench Cloud skill not found: ${source.owner}/${source.skill}`, {
|
|
1212
|
+
remediation: "Run workbench publish.",
|
|
1213
|
+
subject: { owner: source.owner, skill: source.skill },
|
|
1214
|
+
exitCode: 1,
|
|
1215
|
+
});
|
|
1216
|
+
}
|
|
1217
|
+
return skill.id;
|
|
1218
|
+
}
|
|
1219
|
+
function cloudExecutionRequestBody(command, parsed) {
|
|
1220
|
+
return {
|
|
1221
|
+
version: optionalPositional(parsed, 1),
|
|
1222
|
+
skill: stringFlag(parsed, "skills"),
|
|
1223
|
+
agent: stringFlag(parsed, "agents"),
|
|
1224
|
+
samples: intFlag(parsed, "samples"),
|
|
1225
|
+
...(command === "improve" ? { budget: intFlag(parsed, "budget") } : {}),
|
|
1226
|
+
};
|
|
1227
|
+
}
|
|
1228
|
+
function cloudImproveNextCommand(runs) {
|
|
1229
|
+
return cloudExecutionNextCommand(runs, "workbench eval");
|
|
1230
|
+
}
|
|
1231
|
+
function cloudExecutionNextCommand(runs, successCommand) {
|
|
1232
|
+
const first = runs[0];
|
|
1233
|
+
if (!first) {
|
|
1234
|
+
return "workbench log --runs";
|
|
1235
|
+
}
|
|
1236
|
+
if (first.status === "running" || first.status === "failed" || first.status === "canceled") {
|
|
1237
|
+
return `workbench show ${displayRef(first.id)}`;
|
|
1238
|
+
}
|
|
1239
|
+
return successCommand;
|
|
1240
|
+
}
|
|
1241
|
+
function cloudExecutionSummary(started) {
|
|
1242
|
+
return {
|
|
1243
|
+
remote: started.remote.name,
|
|
1244
|
+
url: started.remote.url,
|
|
1245
|
+
skillId: started.skillId,
|
|
1246
|
+
sync: started.sync,
|
|
1247
|
+
};
|
|
1248
|
+
}
|
|
1019
1249
|
function workbenchInstallSourceSummary(source, snapshot) {
|
|
1020
1250
|
const installUrl = `${source.baseUrl}/skills/${encodeURIComponent(source.owner)}/${encodeURIComponent(source.skill)}`;
|
|
1021
1251
|
return {
|
|
@@ -1089,12 +1319,13 @@ async function fetchWorkbenchInstallSourceSnapshot(source, displaySource) {
|
|
|
1089
1319
|
throw new WorkbenchCodedError("auth_required", token
|
|
1090
1320
|
? `Workbench Cloud rejected the provided token while installing ${displaySource}.`
|
|
1091
1321
|
: `Authentication is required to install ${displaySource}.`, {
|
|
1092
|
-
remediation:
|
|
1322
|
+
remediation: "Run workbench login.",
|
|
1093
1323
|
exitCode: 1,
|
|
1094
1324
|
});
|
|
1095
1325
|
}
|
|
1096
1326
|
if (!response.ok) {
|
|
1097
|
-
|
|
1327
|
+
const excerpt = readResponseError(text);
|
|
1328
|
+
throw new WorkbenchCodedError("install_failed", `Unable to download Workbench source ${displaySource}: ${response.status}${excerpt ? ` ${excerpt}` : response.statusText ? ` ${response.statusText}` : ""}`, {
|
|
1098
1329
|
subject: { source: displaySource, status: response.status },
|
|
1099
1330
|
exitCode: 1,
|
|
1100
1331
|
});
|
|
@@ -1175,6 +1406,7 @@ async function loadConfig() {
|
|
|
1175
1406
|
...(typeof parsed.baseUrl === "string" ? { baseUrl: normalizeBaseUrl(parsed.baseUrl) } : {}),
|
|
1176
1407
|
...(typeof parsed.accessToken === "string" ? { accessToken: parsed.accessToken } : {}),
|
|
1177
1408
|
...(typeof parsed.username === "string" ? { username: parsed.username } : {}),
|
|
1409
|
+
...(Array.isArray(parsed.installTargets) ? { installTargets: normalizeInstallTargetNames(parsed.installTargets.flatMap((entry) => typeof entry === "string" ? [entry] : [])) } : {}),
|
|
1178
1410
|
};
|
|
1179
1411
|
}
|
|
1180
1412
|
// Single resolver for the Workbench Cloud token used by every authenticated
|
|
@@ -1214,18 +1446,15 @@ function deviceAuthPath() {
|
|
|
1214
1446
|
return process.env.WORKBENCH_DEVICE_AUTH?.trim() || path.join(path.dirname(configPath()), "device-auth.json");
|
|
1215
1447
|
}
|
|
1216
1448
|
function selectWorkbenchBaseUrl(input = {}) {
|
|
1217
|
-
|
|
1218
|
-
if (!baseUrl) {
|
|
1219
|
-
throw new WorkbenchUserError("Missing Workbench API URL. Pass --base-url URL, set WORKBENCH_API_URL, or run `workbench login --base-url URL`.");
|
|
1220
|
-
}
|
|
1221
|
-
return baseUrl;
|
|
1449
|
+
return optionalWorkbenchBaseUrl(input);
|
|
1222
1450
|
}
|
|
1223
1451
|
function optionalWorkbenchBaseUrl(input = {}) {
|
|
1224
1452
|
const value = input.explicitBaseUrl ??
|
|
1225
1453
|
input.originBaseUrl ??
|
|
1226
1454
|
process.env.WORKBENCH_API_URL ??
|
|
1227
|
-
input.configBaseUrl
|
|
1228
|
-
|
|
1455
|
+
input.configBaseUrl ??
|
|
1456
|
+
DEFAULT_WORKBENCH_CLOUD_BASE_URL;
|
|
1457
|
+
return normalizeBaseUrl(value);
|
|
1229
1458
|
}
|
|
1230
1459
|
function normalizeBaseUrl(value) {
|
|
1231
1460
|
return value.trim().replace(/\/+$/u, "");
|
|
@@ -1243,7 +1472,8 @@ async function requestDeviceAuthorization(baseUrl) {
|
|
|
1243
1472
|
});
|
|
1244
1473
|
}
|
|
1245
1474
|
if (!response.ok) {
|
|
1246
|
-
|
|
1475
|
+
const excerpt = readResponseError(text);
|
|
1476
|
+
throw new WorkbenchCodedError("login_denied", `Device login failed: ${response.status}${excerpt ? ` ${excerpt}` : response.statusText ? ` ${response.statusText}` : ""}`, {
|
|
1247
1477
|
exitCode: 1,
|
|
1248
1478
|
});
|
|
1249
1479
|
}
|
|
@@ -1294,7 +1524,7 @@ async function pollDeviceToken(baseUrl, authorization, timeoutSeconds) {
|
|
|
1294
1524
|
}
|
|
1295
1525
|
throw new WorkbenchCodedError("login_pending", "Device login is still waiting for browser authorization.", {
|
|
1296
1526
|
retryable: true,
|
|
1297
|
-
remediation: "Authorize the device in the browser, then run workbench login --wait
|
|
1527
|
+
remediation: "Authorize the device in the browser, then run workbench login --wait.",
|
|
1298
1528
|
subject: {
|
|
1299
1529
|
retryAfterSeconds: Math.max(1, Math.ceil(intervalMs / 1000)),
|
|
1300
1530
|
verificationUri: authorization.verification_uri,
|
|
@@ -1318,7 +1548,8 @@ async function fetchWorkbenchUsername(baseUrl, accessToken) {
|
|
|
1318
1548
|
}
|
|
1319
1549
|
async function readPendingDeviceAuthorization(baseUrl) {
|
|
1320
1550
|
const record = await readDeviceAuthorizationJson(deviceAuthPath());
|
|
1321
|
-
|
|
1551
|
+
const expectedBaseUrl = baseUrl ? normalizeBaseUrl(baseUrl) : undefined;
|
|
1552
|
+
if (!record || (expectedBaseUrl && record.baseUrl !== expectedBaseUrl) || Date.parse(record.expiresAt) <= Date.now()) {
|
|
1322
1553
|
return null;
|
|
1323
1554
|
}
|
|
1324
1555
|
return record;
|
|
@@ -1408,7 +1639,8 @@ async function apiRequest(apiPath, options = {}, baseUrlOverride) {
|
|
|
1408
1639
|
}
|
|
1409
1640
|
throw requestError;
|
|
1410
1641
|
}
|
|
1411
|
-
const
|
|
1642
|
+
const excerpt = readResponseError(text);
|
|
1643
|
+
const requestError = new WorkbenchApiRequestError(response.status, `Request failed with status ${response.status}${response.statusText ? ` ${response.statusText}` : ""}${excerpt ? `: ${excerpt}` : ""}.`, text);
|
|
1412
1644
|
lastError = requestError;
|
|
1413
1645
|
if (canRetry && attempt < API_REQUEST_MAX_ATTEMPTS && isTransientApiRequestError(requestError)) {
|
|
1414
1646
|
await sleep(250 * attempt);
|
|
@@ -1428,8 +1660,11 @@ function encodeJsonRequestBody(body) {
|
|
|
1428
1660
|
if (Buffer.byteLength(text) < API_REQUEST_GZIP_THRESHOLD_BYTES) {
|
|
1429
1661
|
return { body: text, headers: { "content-type": "application/json" } };
|
|
1430
1662
|
}
|
|
1663
|
+
const compressed = gzipSync(text);
|
|
1664
|
+
const compressedBody = new ArrayBuffer(compressed.byteLength);
|
|
1665
|
+
new Uint8Array(compressedBody).set(compressed);
|
|
1431
1666
|
return {
|
|
1432
|
-
body:
|
|
1667
|
+
body: compressedBody,
|
|
1433
1668
|
headers: {
|
|
1434
1669
|
"content-encoding": "gzip",
|
|
1435
1670
|
"content-type": "application/json",
|
|
@@ -1498,11 +1733,21 @@ function readResponseError(text) {
|
|
|
1498
1733
|
const parsed = JSON.parse(text);
|
|
1499
1734
|
const record = asRecord(parsed);
|
|
1500
1735
|
const error = record?.error ?? record?.message;
|
|
1501
|
-
return typeof error === "string" && error.trim() ? error : null;
|
|
1736
|
+
return typeof error === "string" && error.trim() ? oneLineExcerpt(error) : null;
|
|
1502
1737
|
}
|
|
1503
1738
|
catch {
|
|
1504
|
-
|
|
1739
|
+
if (/<(?:!doctype|html|head|body)\b/iu.test(text)) {
|
|
1740
|
+
return null;
|
|
1741
|
+
}
|
|
1742
|
+
return oneLineExcerpt(text);
|
|
1743
|
+
}
|
|
1744
|
+
}
|
|
1745
|
+
function oneLineExcerpt(text) {
|
|
1746
|
+
const line = text.replace(/\s+/gu, " ").trim();
|
|
1747
|
+
if (!line) {
|
|
1748
|
+
return null;
|
|
1505
1749
|
}
|
|
1750
|
+
return line.length > 180 ? `${line.slice(0, 177)}...` : line;
|
|
1506
1751
|
}
|
|
1507
1752
|
function parseWorkbenchCloudErrorBody(text) {
|
|
1508
1753
|
try {
|
|
@@ -1535,6 +1780,14 @@ function errorMessage(error) {
|
|
|
1535
1780
|
function sleep(ms) {
|
|
1536
1781
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1537
1782
|
}
|
|
1783
|
+
function positiveIntEnv(name) {
|
|
1784
|
+
const raw = process.env[name]?.trim();
|
|
1785
|
+
if (!raw) {
|
|
1786
|
+
return undefined;
|
|
1787
|
+
}
|
|
1788
|
+
const value = Number(raw);
|
|
1789
|
+
return Number.isSafeInteger(value) && value > 0 ? value : undefined;
|
|
1790
|
+
}
|
|
1538
1791
|
async function openBrowser(url) {
|
|
1539
1792
|
const command = process.platform === "darwin"
|
|
1540
1793
|
? "open"
|
|
@@ -1870,6 +2123,17 @@ function parseArgs(argv) {
|
|
|
1870
2123
|
addFlag(flags, "version", true);
|
|
1871
2124
|
continue;
|
|
1872
2125
|
}
|
|
2126
|
+
if (arg === "-n") {
|
|
2127
|
+
const value = argv[index + 1];
|
|
2128
|
+
if (value && !value.startsWith("-")) {
|
|
2129
|
+
index += 1;
|
|
2130
|
+
addFlag(flags, "samples", value);
|
|
2131
|
+
}
|
|
2132
|
+
else {
|
|
2133
|
+
addFlag(flags, "samples", true);
|
|
2134
|
+
}
|
|
2135
|
+
continue;
|
|
2136
|
+
}
|
|
1873
2137
|
if (!arg.startsWith("--") || arg === "--") {
|
|
1874
2138
|
positionals.push(arg);
|
|
1875
2139
|
continue;
|
|
@@ -1877,7 +2141,9 @@ function parseArgs(argv) {
|
|
|
1877
2141
|
const eq = arg.indexOf("=");
|
|
1878
2142
|
const name = eq === -1 ? arg.slice(2) : arg.slice(2, eq);
|
|
1879
2143
|
const value = eq === -1 ? argv[index + 1] : arg.slice(eq + 1);
|
|
1880
|
-
|
|
2144
|
+
const flagSpec = flagSpecForParsedPrefix(positionals, flags);
|
|
2145
|
+
const kind = flagSpec?.[name];
|
|
2146
|
+
if (eq === -1 && kind === "boolean") {
|
|
1881
2147
|
addFlag(flags, name, true);
|
|
1882
2148
|
}
|
|
1883
2149
|
else if (eq === -1 && value && !value.startsWith("-")) {
|
|
@@ -1890,8 +2156,12 @@ function parseArgs(argv) {
|
|
|
1890
2156
|
}
|
|
1891
2157
|
return { positionals, flags };
|
|
1892
2158
|
}
|
|
2159
|
+
function flagSpecForParsedPrefix(positionals, flags) {
|
|
2160
|
+
const command = positionals[0] ?? (flags.version === true ? "version" : "status");
|
|
2161
|
+
return allowedFlagsForCommand({ positionals: [...positionals], flags: {} }, command);
|
|
2162
|
+
}
|
|
1893
2163
|
function addFlag(flags, name, value) {
|
|
1894
|
-
if (name === "with") {
|
|
2164
|
+
if (name === "with" || name === "to") {
|
|
1895
2165
|
const existing = flags[name];
|
|
1896
2166
|
flags[name] = Array.isArray(existing)
|
|
1897
2167
|
? [...existing, String(value)]
|
|
@@ -1900,15 +2170,6 @@ function addFlag(flags, name, value) {
|
|
|
1900
2170
|
: [String(existing), String(value)];
|
|
1901
2171
|
return;
|
|
1902
2172
|
}
|
|
1903
|
-
if (name === "agent" || name === "skill") {
|
|
1904
|
-
const existing = flags[name];
|
|
1905
|
-
flags[name] = Array.isArray(existing)
|
|
1906
|
-
? [...existing, String(value)]
|
|
1907
|
-
: existing === undefined
|
|
1908
|
-
? String(value)
|
|
1909
|
-
: [String(existing), String(value)];
|
|
1910
|
-
return;
|
|
1911
|
-
}
|
|
1912
2173
|
flags[name] = value;
|
|
1913
2174
|
}
|
|
1914
2175
|
function dirFlag(parsed) {
|
|
@@ -1972,14 +2233,154 @@ function rejectExtraInput(parsed, input) {
|
|
|
1972
2233
|
exitCode: 2,
|
|
1973
2234
|
});
|
|
1974
2235
|
}
|
|
1975
|
-
function
|
|
1976
|
-
|
|
1977
|
-
|
|
2236
|
+
async function defaultDiffRange(core) {
|
|
2237
|
+
await listWorkbenchVersions(core);
|
|
2238
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2239
|
+
const currentId = snapshot.status.currentVersionId ?? snapshot.refs.current;
|
|
2240
|
+
const current = snapshot.versions.find((version) => version.id === currentId);
|
|
2241
|
+
if (!current) {
|
|
2242
|
+
throw new WorkbenchCodedError("version_not_found", "Current Workbench version was not found.", {
|
|
2243
|
+
remediation: "Run workbench log --versions.",
|
|
2244
|
+
exitCode: 1,
|
|
2245
|
+
});
|
|
1978
2246
|
}
|
|
1979
|
-
|
|
1980
|
-
|
|
2247
|
+
const parent = current.parentIds[0];
|
|
2248
|
+
return parent ? `${parent}..${current.id}` : `${current.id}..${current.id}`;
|
|
2249
|
+
}
|
|
2250
|
+
function parsePublishVisibilityFlags(parsed) {
|
|
2251
|
+
const selected = [
|
|
2252
|
+
parsed.flags.private === true ? "private" : undefined,
|
|
2253
|
+
parsed.flags.team === true ? "internal" : undefined,
|
|
2254
|
+
parsed.flags.public === true ? "public" : undefined,
|
|
2255
|
+
].filter((value) => Boolean(value));
|
|
2256
|
+
if (selected.length > 1) {
|
|
2257
|
+
throw new WorkbenchCodedError("usage", "workbench publish accepts only one visibility flag.", {
|
|
2258
|
+
remediation: "Run workbench publish --private, workbench publish --team, or workbench publish --public.",
|
|
2259
|
+
exitCode: 2,
|
|
2260
|
+
});
|
|
2261
|
+
}
|
|
2262
|
+
return selected[0];
|
|
2263
|
+
}
|
|
2264
|
+
async function previewPublishWithDerivedRemote(parsed) {
|
|
2265
|
+
const root = path.resolve(dirFlag(parsed) ?? process.cwd());
|
|
2266
|
+
const reconciledSnapshot = await createWorkbenchReadOnlyInspectionSnapshot({ dir: root });
|
|
2267
|
+
const link = cloudRemoteLinkTargetFromRemotes(reconciledSnapshot.remotes);
|
|
2268
|
+
const remote = stringFlag(parsed, "as") || !link.existing
|
|
2269
|
+
? await derivePublishCloudRemote(parsed, "workbench publish", link.name)
|
|
2270
|
+
: link.existing;
|
|
2271
|
+
const requestedVersion = optionalPositional(parsed, 1);
|
|
2272
|
+
const version = requestedVersion && requestedVersion !== "current"
|
|
2273
|
+
? snapshotVersionByRef(reconciledSnapshot, requestedVersion)
|
|
2274
|
+
: snapshotVersionByRef(reconciledSnapshot, reconciledSnapshot.status.currentVersionId ?? reconciledSnapshot.refs.current ?? "");
|
|
2275
|
+
if (!version) {
|
|
2276
|
+
throw new WorkbenchCodedError("version_not_found", `Version not found: ${requestedVersion ?? "current"}`, {
|
|
2277
|
+
remediation: "Run workbench log --versions.",
|
|
2278
|
+
subject: { version: requestedVersion ?? "current" },
|
|
2279
|
+
exitCode: 1,
|
|
2280
|
+
});
|
|
1981
2281
|
}
|
|
1982
|
-
|
|
2282
|
+
return {
|
|
2283
|
+
remote,
|
|
2284
|
+
version,
|
|
2285
|
+
visibility: parsePublishVisibilityFlags(parsed) ?? "private",
|
|
2286
|
+
installHandle: installHandleFromCloudRemote(remote),
|
|
2287
|
+
installUrl: remote.url,
|
|
2288
|
+
pinnedInstallUrl: `${remote.url}/releases/${encodeURIComponent(version.id)}`,
|
|
2289
|
+
};
|
|
2290
|
+
}
|
|
2291
|
+
async function ensurePublishRemote(parsed) {
|
|
2292
|
+
const core = await coreOptions(parsed);
|
|
2293
|
+
const root = path.resolve(dirFlag(parsed) ?? process.cwd());
|
|
2294
|
+
const link = await cloudRemoteLinkTarget(root);
|
|
2295
|
+
const override = stringFlag(parsed, "as");
|
|
2296
|
+
if (override) {
|
|
2297
|
+
const remote = await derivePublishCloudRemote(parsed, "workbench publish", link.name);
|
|
2298
|
+
const result = await addWorkbenchRemote(remote.name, remote.url, { ...core, replace: link.replace });
|
|
2299
|
+
return result.remote.name;
|
|
2300
|
+
}
|
|
2301
|
+
if (link.existing) {
|
|
2302
|
+
return link.existing.name;
|
|
2303
|
+
}
|
|
2304
|
+
const remote = await derivePublishCloudRemote(parsed, "workbench publish", link.name);
|
|
2305
|
+
const result = await addWorkbenchRemote(remote.name, remote.url, core);
|
|
2306
|
+
return result.remote.name;
|
|
2307
|
+
}
|
|
2308
|
+
async function derivePublishCloudRemote(parsed, action = "workbench publish", name = "cloud") {
|
|
2309
|
+
const config = await loadConfig();
|
|
2310
|
+
const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl }) ?? DEFAULT_WORKBENCH_CLOUD_BASE_URL;
|
|
2311
|
+
const override = stringFlag(parsed, "as");
|
|
2312
|
+
const handle = override ? parseOwnerSkillHandle(override) : derivedOwnerSkillHandle(parsed, config, action);
|
|
2313
|
+
const url = `${baseUrl}/skills/${encodeURIComponent(handle.owner)}/${encodeURIComponent(handle.skill)}`;
|
|
2314
|
+
return { name, kind: "workbench-cloud", url };
|
|
2315
|
+
}
|
|
2316
|
+
function installHandleFromCloudRemote(remote) {
|
|
2317
|
+
const source = parseWorkbenchInstallSource(remote.url);
|
|
2318
|
+
if (!source) {
|
|
2319
|
+
throw new WorkbenchCodedError("remote_invalid_url", `Workbench remote is not a Cloud skill URL: ${remote.url}`, {
|
|
2320
|
+
remediation: "Run workbench publish to recreate the Workbench Cloud link.",
|
|
2321
|
+
subject: { remote: remote.name, url: remote.url },
|
|
2322
|
+
exitCode: 2,
|
|
2323
|
+
});
|
|
2324
|
+
}
|
|
2325
|
+
return `${source.owner}/${source.skill}`;
|
|
2326
|
+
}
|
|
2327
|
+
function parseOwnerSkillHandle(input) {
|
|
2328
|
+
const handle = normalizedOwnerSkillHandle(input);
|
|
2329
|
+
if (!handle) {
|
|
2330
|
+
throw new WorkbenchCodedError("usage", "workbench publish --as expects OWNER/SKILL.", {
|
|
2331
|
+
remediation: "Run workbench publish --as OWNER/SKILL.",
|
|
2332
|
+
exitCode: 2,
|
|
2333
|
+
});
|
|
2334
|
+
}
|
|
2335
|
+
return handle;
|
|
2336
|
+
}
|
|
2337
|
+
function derivedOwnerSkillHandle(parsed, config, action) {
|
|
2338
|
+
const owner = config.username?.trim();
|
|
2339
|
+
if (!owner) {
|
|
2340
|
+
throw new WorkbenchCodedError("auth_required", `${action} needs a logged-in Workbench Cloud username before it can derive OWNER/SKILL.`, {
|
|
2341
|
+
remediation: "Run workbench login.",
|
|
2342
|
+
exitCode: 1,
|
|
2343
|
+
});
|
|
2344
|
+
}
|
|
2345
|
+
const root = path.resolve(dirFlag(parsed) ?? process.cwd());
|
|
2346
|
+
const handle = normalizeOwnerSkillHandle(owner, path.basename(root));
|
|
2347
|
+
if (!handle.owner || !handle.skill) {
|
|
2348
|
+
throw new WorkbenchCodedError("usage", `${action} could not derive a valid OWNER/SKILL handle.`, {
|
|
2349
|
+
remediation: `Run ${action} --as OWNER/SKILL.`,
|
|
2350
|
+
subject: { owner, skill: path.basename(root) },
|
|
2351
|
+
exitCode: 2,
|
|
2352
|
+
});
|
|
2353
|
+
}
|
|
2354
|
+
return handle;
|
|
2355
|
+
}
|
|
2356
|
+
async function resolveWorkbenchInstallSourceInput(input) {
|
|
2357
|
+
if (/^https?:\/\//u.test(input)) {
|
|
2358
|
+
return input;
|
|
2359
|
+
}
|
|
2360
|
+
const handle = normalizedOwnerSkillHandle(input);
|
|
2361
|
+
if (!handle) {
|
|
2362
|
+
throw new WorkbenchCodedError("usage", "workbench install expects OWNER/SKILL or a Workbench Cloud skill URL.", {
|
|
2363
|
+
remediation: "Run workbench install OWNER/SKILL --to codex.",
|
|
2364
|
+
exitCode: 2,
|
|
2365
|
+
});
|
|
2366
|
+
}
|
|
2367
|
+
const config = await loadConfig();
|
|
2368
|
+
const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl }) ?? DEFAULT_WORKBENCH_CLOUD_BASE_URL;
|
|
2369
|
+
return `${baseUrl}/skills/${encodeURIComponent(handle.owner)}/${encodeURIComponent(handle.skill)}`;
|
|
2370
|
+
}
|
|
2371
|
+
function normalizedOwnerSkillHandle(value) {
|
|
2372
|
+
const parts = value.trim().split("/");
|
|
2373
|
+
if (parts.length !== 2) {
|
|
2374
|
+
return null;
|
|
2375
|
+
}
|
|
2376
|
+
const handle = normalizeOwnerSkillHandle(parts[0] ?? "", parts[1] ?? "");
|
|
2377
|
+
return handle.owner && handle.skill ? handle : null;
|
|
2378
|
+
}
|
|
2379
|
+
function normalizeOwnerSkillHandle(owner, skill) {
|
|
2380
|
+
return {
|
|
2381
|
+
owner: normalizeWorkbenchSkillName(owner),
|
|
2382
|
+
skill: normalizeWorkbenchSkillName(skill),
|
|
2383
|
+
};
|
|
1983
2384
|
}
|
|
1984
2385
|
function parseWithFlags(parsed) {
|
|
1985
2386
|
const raw = parsed.flags.with;
|
|
@@ -2021,7 +2422,7 @@ async function artifactIdsByRunId(core, runs) {
|
|
|
2021
2422
|
return byRun;
|
|
2022
2423
|
}
|
|
2023
2424
|
function emitEvalFailure(runs, failedRuns, artifactIds, parsed, io) {
|
|
2024
|
-
const
|
|
2425
|
+
const next = evalFailureNextCommand(failedRuns);
|
|
2025
2426
|
if (parsed.flags.json === true) {
|
|
2026
2427
|
io.stdout.write(`${JSON.stringify({
|
|
2027
2428
|
schema: "workbench.cli.eval.v1",
|
|
@@ -2032,14 +2433,14 @@ function emitEvalFailure(runs, failedRuns, artifactIds, parsed, io) {
|
|
|
2032
2433
|
evidenceSaved: true,
|
|
2033
2434
|
runs: runs.map((run) => runFailureSummary(run, artifactIds.get(run.id) ?? [])),
|
|
2034
2435
|
failedRuns: failedRuns.map((run) => runFailureSummary(run, artifactIds.get(run.id) ?? [])),
|
|
2035
|
-
|
|
2436
|
+
next,
|
|
2036
2437
|
}, null, 2)}\n`);
|
|
2037
2438
|
return 1;
|
|
2038
2439
|
}
|
|
2039
2440
|
io.stdout.write([
|
|
2040
2441
|
"Eval failed; evidence was saved.",
|
|
2041
2442
|
...failedRuns.map(formatRun),
|
|
2042
|
-
...(
|
|
2443
|
+
...(next ? [`next: ${next}`] : []),
|
|
2043
2444
|
].join("\n") + "\n");
|
|
2044
2445
|
return 1;
|
|
2045
2446
|
}
|
|
@@ -2072,18 +2473,12 @@ function runFailureSummary(run, artifactIds) {
|
|
|
2072
2473
|
artifactIds: [...artifactIds],
|
|
2073
2474
|
};
|
|
2074
2475
|
}
|
|
2075
|
-
function
|
|
2476
|
+
function evalFailureNextCommand(failedRuns) {
|
|
2076
2477
|
const first = failedRuns[0];
|
|
2077
2478
|
if (!first) {
|
|
2078
|
-
return
|
|
2479
|
+
return "workbench log --runs";
|
|
2079
2480
|
}
|
|
2080
|
-
|
|
2081
|
-
return [
|
|
2082
|
-
"workbench compare --versions all",
|
|
2083
|
-
`workbench trace ${first.id}`,
|
|
2084
|
-
...(traceId ? [`workbench show ${traceId}:stderr.log`] : []),
|
|
2085
|
-
`workbench improve --agent ${first.agentName} --budget 1 --samples 1`,
|
|
2086
|
-
];
|
|
2481
|
+
return `workbench show ${displayRef(first.id)}`;
|
|
2087
2482
|
}
|
|
2088
2483
|
function output(value, parsed, io, text) {
|
|
2089
2484
|
return emitResult(commandSchema(parsed), { result: value }, parsed, io, text);
|
|
@@ -2091,7 +2486,7 @@ function output(value, parsed, io, text) {
|
|
|
2091
2486
|
function commandSchema(parsed) {
|
|
2092
2487
|
const command = parsed.positionals[0] ?? "result";
|
|
2093
2488
|
const subcommand = parsed.positionals[1];
|
|
2094
|
-
const suffix = ["
|
|
2489
|
+
const suffix = ["agent", "case"].includes(command) && subcommand
|
|
2095
2490
|
? `${command}-${subcommand}`
|
|
2096
2491
|
: command;
|
|
2097
2492
|
return `workbench.cli.${suffix}.v1`;
|
|
@@ -2116,11 +2511,377 @@ async function workbenchCliAuthStatus() {
|
|
|
2116
2511
|
})),
|
|
2117
2512
|
};
|
|
2118
2513
|
}
|
|
2514
|
+
function statusWithCausalNext(status, auth) {
|
|
2515
|
+
const cloudAuthMissing = auth.workbenchCloud.status !== "authenticated";
|
|
2516
|
+
const needsCloudAuth = cloudAuthMissing && status.remotes.some((remote) => remote.kind === "workbench-cloud" &&
|
|
2517
|
+
(remote.sync.status !== "up_to_date" || remote.publication.status === "unpublished"));
|
|
2518
|
+
if (!needsCloudAuth) {
|
|
2519
|
+
return status;
|
|
2520
|
+
}
|
|
2521
|
+
return {
|
|
2522
|
+
...status,
|
|
2523
|
+
next: "workbench login",
|
|
2524
|
+
};
|
|
2525
|
+
}
|
|
2526
|
+
function displayRef(id) {
|
|
2527
|
+
const version = /^v_([0-9a-f]{8,})$/iu.exec(id);
|
|
2528
|
+
if (version?.[1]) {
|
|
2529
|
+
return version[1].slice(0, 8);
|
|
2530
|
+
}
|
|
2531
|
+
const separator = id.indexOf("_");
|
|
2532
|
+
if (separator > 0 && separator < id.length - 1) {
|
|
2533
|
+
const prefix = id.slice(0, separator);
|
|
2534
|
+
const suffix = id.slice(separator + 1);
|
|
2535
|
+
return `${prefix}_${suffix.slice(0, 8)}`;
|
|
2536
|
+
}
|
|
2537
|
+
return id.length > 8 ? id.slice(0, 8) : id;
|
|
2538
|
+
}
|
|
2539
|
+
function shortenCommandRefs(command) {
|
|
2540
|
+
return command.replace(/\b(?:v_[0-9a-f]{8,}|(?:run|job|trace|artifact)_[a-z0-9_-]+)/giu, (match) => displayRef(match));
|
|
2541
|
+
}
|
|
2542
|
+
function snapshotVersionByRef(snapshot, ref) {
|
|
2543
|
+
const requested = ref.trim();
|
|
2544
|
+
const normalized = requested === "current" ? snapshot.refs.current ?? "" : requested;
|
|
2545
|
+
if (!normalized) {
|
|
2546
|
+
return undefined;
|
|
2547
|
+
}
|
|
2548
|
+
const candidates = snapshot.versions.filter((version) => snapshotVersionRefMatches(version, normalized));
|
|
2549
|
+
if (candidates.length > 1) {
|
|
2550
|
+
throw new WorkbenchCodedError("ref_ambiguous", `Version ref is ambiguous: ${ref}. Candidates: ${candidates.map((version) => displayRef(version.id)).join(", ")}.`, {
|
|
2551
|
+
subject: { ref, candidates: candidates.map((version) => version.id) },
|
|
2552
|
+
exitCode: 2,
|
|
2553
|
+
});
|
|
2554
|
+
}
|
|
2555
|
+
return candidates[0];
|
|
2556
|
+
}
|
|
2557
|
+
function snapshotVersionRefMatches(version, ref) {
|
|
2558
|
+
const withoutVersionPrefix = ref.startsWith("v_") ? ref.slice(2) : ref;
|
|
2559
|
+
return version.id === ref ||
|
|
2560
|
+
version.hash === ref ||
|
|
2561
|
+
version.id.startsWith(ref) ||
|
|
2562
|
+
version.hash.startsWith(ref) ||
|
|
2563
|
+
version.hash.startsWith(withoutVersionPrefix) ||
|
|
2564
|
+
version.id.startsWith(`v_${withoutVersionPrefix}`);
|
|
2565
|
+
}
|
|
2566
|
+
function snapshotObjectByRef(entries, ref, kind) {
|
|
2567
|
+
const normalized = ref.trim();
|
|
2568
|
+
if (!normalized) {
|
|
2569
|
+
return undefined;
|
|
2570
|
+
}
|
|
2571
|
+
const candidates = entries.filter((entry) => objectRefMatches(entry.id, normalized));
|
|
2572
|
+
if (candidates.length > 1) {
|
|
2573
|
+
throw new WorkbenchCodedError("ref_ambiguous", `${capitalize(kind)} ref is ambiguous: ${ref}. Candidates: ${candidates.map((entry) => displayRef(entry.id)).slice(0, 8).join(", ")}.`, {
|
|
2574
|
+
subject: { ref, candidates: candidates.map((entry) => entry.id).slice(0, 20) },
|
|
2575
|
+
exitCode: 2,
|
|
2576
|
+
});
|
|
2577
|
+
}
|
|
2578
|
+
return candidates[0];
|
|
2579
|
+
}
|
|
2580
|
+
function objectRefMatches(id, ref) {
|
|
2581
|
+
if (id === ref || id.startsWith(ref)) {
|
|
2582
|
+
return true;
|
|
2583
|
+
}
|
|
2584
|
+
const separator = id.indexOf("_");
|
|
2585
|
+
return separator > 0 && id.slice(separator + 1).startsWith(ref);
|
|
2586
|
+
}
|
|
2587
|
+
function capitalize(value) {
|
|
2588
|
+
return value.length > 0 ? `${value[0].toUpperCase()}${value.slice(1)}` : value;
|
|
2589
|
+
}
|
|
2590
|
+
function runOrJobEvidenceSelection(snapshot, ref) {
|
|
2591
|
+
const run = snapshotObjectByRef(snapshot.runs, ref, "run");
|
|
2592
|
+
const job = snapshotObjectByRef(snapshot.jobs, ref, "job");
|
|
2593
|
+
if (run && job) {
|
|
2594
|
+
throw new WorkbenchCodedError("ref_ambiguous", `Run/job ref is ambiguous: ${ref}. Candidates: ${displayRef(run.id)}, ${displayRef(job.id)}.`, {
|
|
2595
|
+
subject: { ref, candidates: [run.id, job.id] },
|
|
2596
|
+
exitCode: 2,
|
|
2597
|
+
});
|
|
2598
|
+
}
|
|
2599
|
+
if (run) {
|
|
2600
|
+
return {
|
|
2601
|
+
run,
|
|
2602
|
+
jobs: snapshot.jobs.filter((entry) => entry.runId === run.id),
|
|
2603
|
+
};
|
|
2604
|
+
}
|
|
2605
|
+
return job ? { jobs: [job] } : { jobs: [] };
|
|
2606
|
+
}
|
|
2607
|
+
function evidenceFilesForRunOrJob(snapshot, ref) {
|
|
2608
|
+
const selection = runOrJobEvidenceSelection(snapshot, ref);
|
|
2609
|
+
if (!selection.run && selection.jobs.length === 0) {
|
|
2610
|
+
return [];
|
|
2611
|
+
}
|
|
2612
|
+
const traceById = new Map(snapshot.traces.map((trace) => [trace.id, trace]));
|
|
2613
|
+
const artifactById = new Map(snapshot.artifacts.map((artifact) => [artifact.id, artifact]));
|
|
2614
|
+
const files = selection.jobs.flatMap((job) => [
|
|
2615
|
+
...job.traceIds.flatMap((traceId) => {
|
|
2616
|
+
const trace = traceById.get(traceId);
|
|
2617
|
+
return trace
|
|
2618
|
+
? trace.files.map((file) => evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/traces/${evidencePathSegment(trace.id)}/${file.path}`))
|
|
2619
|
+
: [];
|
|
2620
|
+
}),
|
|
2621
|
+
...job.artifactIds.flatMap((artifactId) => {
|
|
2622
|
+
const artifact = artifactById.get(artifactId);
|
|
2623
|
+
return artifact
|
|
2624
|
+
? artifact.files.map((file) => evidenceFileWithPath(file, `cases/${evidencePathSegment(job.caseId)}/jobs/${evidencePathSegment(job.id)}/artifacts/${evidencePathSegment(artifact.id)}/${file.path}`))
|
|
2625
|
+
: [];
|
|
2626
|
+
}),
|
|
2627
|
+
]);
|
|
2628
|
+
const seen = new Set();
|
|
2629
|
+
return files.filter((file) => {
|
|
2630
|
+
if (seen.has(file.path)) {
|
|
2631
|
+
return false;
|
|
2632
|
+
}
|
|
2633
|
+
seen.add(file.path);
|
|
2634
|
+
return true;
|
|
2635
|
+
});
|
|
2636
|
+
}
|
|
2637
|
+
function evidenceFileWithPath(file, filePath) {
|
|
2638
|
+
return {
|
|
2639
|
+
...file,
|
|
2640
|
+
path: filePath.replace(/\\/gu, "/").replace(/^\/+/u, ""),
|
|
2641
|
+
};
|
|
2642
|
+
}
|
|
2643
|
+
function evidencePathSegment(value) {
|
|
2644
|
+
return value.replace(/[^A-Za-z0-9._-]+/gu, "-") || "_";
|
|
2645
|
+
}
|
|
2646
|
+
function formatRunOrJobEvidence(details, files) {
|
|
2647
|
+
const detailLines = details.map(formatTraceDetail).filter(Boolean);
|
|
2648
|
+
const fileLines = files.length > 0 ? ["Files:", ...files.map((file) => file.path)] : [];
|
|
2649
|
+
return [...detailLines, ...fileLines].join("\n") || "No evidence.";
|
|
2650
|
+
}
|
|
2651
|
+
function manifestOnly(value) {
|
|
2652
|
+
if (value === null || typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
|
2653
|
+
return value;
|
|
2654
|
+
}
|
|
2655
|
+
if (Array.isArray(value)) {
|
|
2656
|
+
return value.map(manifestOnly);
|
|
2657
|
+
}
|
|
2658
|
+
if (!value || typeof value !== "object") {
|
|
2659
|
+
return null;
|
|
2660
|
+
}
|
|
2661
|
+
const record = value;
|
|
2662
|
+
if (typeof record.path === "string" && typeof record.content === "string") {
|
|
2663
|
+
return fileSummary(record);
|
|
2664
|
+
}
|
|
2665
|
+
const out = {};
|
|
2666
|
+
for (const [key, child] of Object.entries(record)) {
|
|
2667
|
+
if (child === undefined) {
|
|
2668
|
+
continue;
|
|
2669
|
+
}
|
|
2670
|
+
out[key] = manifestOnly(child);
|
|
2671
|
+
}
|
|
2672
|
+
return out;
|
|
2673
|
+
}
|
|
2674
|
+
async function resolveLocalImproverAgent(parsed, core) {
|
|
2675
|
+
if (stringFlag(parsed, "agents")) {
|
|
2676
|
+
return undefined;
|
|
2677
|
+
}
|
|
2678
|
+
const agents = await listWorkbenchAgents(core).catch(() => []);
|
|
2679
|
+
const status = await workbenchStatusSnapshot(core).catch(() => undefined);
|
|
2680
|
+
const defaultAgentName = status?.project.defaultAgent ?? agents[0]?.name;
|
|
2681
|
+
const defaultAgent = agents.find((agent) => agent.name === defaultAgentName);
|
|
2682
|
+
if (defaultAgent && workbenchSkillImproveCanUseQueuedAdapter(defaultAgent)) {
|
|
2683
|
+
return undefined;
|
|
2684
|
+
}
|
|
2685
|
+
const connected = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).listStatus().catch(() => []);
|
|
2686
|
+
const candidates = connected
|
|
2687
|
+
.filter((entry) => entry.status === "connected" &&
|
|
2688
|
+
(entry.adapterId === "claude" || entry.adapterId === "codex"))
|
|
2689
|
+
.sort((left, right) => {
|
|
2690
|
+
const adapterRank = (adapter) => adapter === "claude" ? 0 : adapter === "codex" ? 1 : 2;
|
|
2691
|
+
return adapterRank(left.adapterId) - adapterRank(right.adapterId) ||
|
|
2692
|
+
(Date.parse(right.updatedAt ?? "") || 0) - (Date.parse(left.updatedAt ?? "") || 0);
|
|
2693
|
+
});
|
|
2694
|
+
const selected = candidates[0];
|
|
2695
|
+
if (!selected) {
|
|
2696
|
+
throw new WorkbenchCodedError("auth_required", "workbench improve needs a connected improver.", {
|
|
2697
|
+
remediation: "Run workbench login claude (or codex) to connect an improver.",
|
|
2698
|
+
exitCode: 1,
|
|
2699
|
+
});
|
|
2700
|
+
}
|
|
2701
|
+
return {
|
|
2702
|
+
name: selected.adapterId,
|
|
2703
|
+
adapter: selected.adapterId,
|
|
2704
|
+
config: {
|
|
2705
|
+
auth: selected.slot ? { [selected.slot]: selected.profile } : selected.profile,
|
|
2706
|
+
},
|
|
2707
|
+
};
|
|
2708
|
+
}
|
|
2709
|
+
function formatLogEntry(entry) {
|
|
2710
|
+
if (entry.kind === "version") {
|
|
2711
|
+
return `${entry.createdAt}\tversion\t${displayRef(entry.id)}\tfiles=${entry.fileCount}\t${entry.message}`;
|
|
2712
|
+
}
|
|
2713
|
+
const score = entry.score === undefined ? "n/a" : entry.score.toFixed(3);
|
|
2714
|
+
return `${entry.createdAt}\trun\t${displayRef(entry.id)}\t${entry.status}\tversion=${displayRef(entry.versionId)}\tskill=${entry.skillName}\tagent=${entry.agentName}\tscore=${score}`;
|
|
2715
|
+
}
|
|
2716
|
+
function splitShowRef(ref) {
|
|
2717
|
+
const index = ref.indexOf(":");
|
|
2718
|
+
if (index === -1) {
|
|
2719
|
+
return [ref, null];
|
|
2720
|
+
}
|
|
2721
|
+
return [ref.slice(0, index), ref.slice(index + 1)];
|
|
2722
|
+
}
|
|
2723
|
+
async function fileForRunOrJobRef(core, objectRef, requestedPath) {
|
|
2724
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2725
|
+
const selection = runOrJobEvidenceSelection(snapshot, objectRef);
|
|
2726
|
+
if (!selection.run && selection.jobs.length === 0) {
|
|
2727
|
+
return null;
|
|
2728
|
+
}
|
|
2729
|
+
const files = evidenceFilesForRunOrJob(snapshot, objectRef);
|
|
2730
|
+
const file = findShowFile(files, requestedPath, objectRef);
|
|
2731
|
+
if (file) {
|
|
2732
|
+
return file;
|
|
2733
|
+
}
|
|
2734
|
+
throw new WorkbenchCodedError("ref_not_found", `File not found in ${objectRef}: ${requestedPath}`, {
|
|
2735
|
+
remediation: `Run workbench show ${objectRef}.`,
|
|
2736
|
+
subject: { ref: objectRef, path: requestedPath },
|
|
2737
|
+
exitCode: 1,
|
|
2738
|
+
});
|
|
2739
|
+
}
|
|
2740
|
+
function evidenceDetailsForRunOrJob(snapshot, ref) {
|
|
2741
|
+
const selection = runOrJobEvidenceSelection(snapshot, ref);
|
|
2742
|
+
return selection.jobs.flatMap((entry) => {
|
|
2743
|
+
const detail = workbenchJobEvidenceForSnapshot(snapshot, {
|
|
2744
|
+
runId: entry.runId,
|
|
2745
|
+
jobId: entry.id,
|
|
2746
|
+
});
|
|
2747
|
+
return detail ? [detail] : [];
|
|
2748
|
+
}).filter((detail) => detail.executions.some((execution) => execution.sessions.length > 0 ||
|
|
2749
|
+
execution.trace.spans.length > 0 ||
|
|
2750
|
+
execution.trace.events.length > 0 ||
|
|
2751
|
+
execution.trace.summaries.length > 0));
|
|
2752
|
+
}
|
|
2753
|
+
function findShowFile(files, requestedPath, objectRef) {
|
|
2754
|
+
const normalized = requestedPath.replace(/\\/gu, "/");
|
|
2755
|
+
const exact = files.filter((file) => file.path === normalized);
|
|
2756
|
+
if (exact.length === 1) {
|
|
2757
|
+
return exact[0];
|
|
2758
|
+
}
|
|
2759
|
+
const exactEquivalent = singleEquivalentShowFile(exact);
|
|
2760
|
+
if (exactEquivalent) {
|
|
2761
|
+
return exactEquivalent;
|
|
2762
|
+
}
|
|
2763
|
+
if (exact.length > 1) {
|
|
2764
|
+
throw ambiguousShowPath(objectRef, requestedPath, exact);
|
|
2765
|
+
}
|
|
2766
|
+
const suffixCandidates = files.filter((file) => file.path.endsWith(`/${normalized}`) || path.basename(file.path) === normalized);
|
|
2767
|
+
if (suffixCandidates.length === 0) {
|
|
2768
|
+
return null;
|
|
2769
|
+
}
|
|
2770
|
+
const candidates = normalized === "stderr.log"
|
|
2771
|
+
? suffixCandidates.filter((file) => file.content.length > 0)
|
|
2772
|
+
: suffixCandidates;
|
|
2773
|
+
if (candidates.length === 1) {
|
|
2774
|
+
return candidates[0];
|
|
2775
|
+
}
|
|
2776
|
+
const equivalentCandidate = singleEquivalentShowFile(candidates);
|
|
2777
|
+
if (equivalentCandidate) {
|
|
2778
|
+
return equivalentCandidate;
|
|
2779
|
+
}
|
|
2780
|
+
if (candidates.length === 0 && suffixCandidates.length === 1) {
|
|
2781
|
+
return suffixCandidates[0];
|
|
2782
|
+
}
|
|
2783
|
+
const equivalentSuffixCandidate = singleEquivalentShowFile(suffixCandidates);
|
|
2784
|
+
if (equivalentSuffixCandidate) {
|
|
2785
|
+
return equivalentSuffixCandidate;
|
|
2786
|
+
}
|
|
2787
|
+
throw ambiguousShowPath(objectRef, requestedPath, candidates.length > 0 ? candidates : suffixCandidates);
|
|
2788
|
+
}
|
|
2789
|
+
function singleEquivalentShowFile(files) {
|
|
2790
|
+
if (files.length <= 1) {
|
|
2791
|
+
return null;
|
|
2792
|
+
}
|
|
2793
|
+
const first = files[0];
|
|
2794
|
+
return files.every((file) => file.kind === first.kind && file.encoding === first.encoding && file.content === first.content)
|
|
2795
|
+
? first
|
|
2796
|
+
: null;
|
|
2797
|
+
}
|
|
2798
|
+
function ambiguousShowPath(objectRef, requestedPath, candidates) {
|
|
2799
|
+
const candidatePaths = candidates.map((file) => file.path);
|
|
2800
|
+
return new WorkbenchCodedError("ref_ambiguous", `File path is ambiguous in ${objectRef}: ${requestedPath}. Candidates: ${candidatePaths.join(", ")}.`, {
|
|
2801
|
+
remediation: `Run workbench show ${objectRef}.`,
|
|
2802
|
+
subject: { ref: objectRef, path: requestedPath, candidates: candidatePaths },
|
|
2803
|
+
exitCode: 2,
|
|
2804
|
+
});
|
|
2805
|
+
}
|
|
2806
|
+
function fileListing(kind, id, files) {
|
|
2807
|
+
return {
|
|
2808
|
+
kind,
|
|
2809
|
+
id,
|
|
2810
|
+
fileCount: files.length,
|
|
2811
|
+
files: files.map(fileSummary),
|
|
2812
|
+
};
|
|
2813
|
+
}
|
|
2814
|
+
function formatFileListing(kind, id, files) {
|
|
2815
|
+
return [`${kind}\t${displayRef(id)}\tfiles=${files.length}`, ...files.map((file) => file.path)].join("\n");
|
|
2816
|
+
}
|
|
2817
|
+
async function traceIdForCaseSource(core, ref) {
|
|
2818
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2819
|
+
const trace = snapshotObjectByRef(snapshot.traces, ref, "trace");
|
|
2820
|
+
if (trace) {
|
|
2821
|
+
return trace.id;
|
|
2822
|
+
}
|
|
2823
|
+
const selection = runOrJobEvidenceSelection(snapshot, ref);
|
|
2824
|
+
const traceId = selection.run?.traceIds[0] ?? selection.jobs[0]?.traceIds[0];
|
|
2825
|
+
if (traceId) {
|
|
2826
|
+
return traceId;
|
|
2827
|
+
}
|
|
2828
|
+
throw new WorkbenchCodedError("ref_not_found", `Run, job, or trace not found: ${ref}`, {
|
|
2829
|
+
remediation: "Run workbench log, then workbench case add RUN_ID.",
|
|
2830
|
+
subject: { ref },
|
|
2831
|
+
exitCode: 1,
|
|
2832
|
+
});
|
|
2833
|
+
}
|
|
2834
|
+
async function evalDeltas(core, runs) {
|
|
2835
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2836
|
+
return runs.map((run) => {
|
|
2837
|
+
const previous = snapshot.runs
|
|
2838
|
+
.filter((candidate) => candidate.id !== run.id &&
|
|
2839
|
+
candidate.skillName === run.skillName &&
|
|
2840
|
+
candidate.agentName === run.agentName &&
|
|
2841
|
+
typeof candidate.score === "number" &&
|
|
2842
|
+
candidate.createdAt < run.createdAt)
|
|
2843
|
+
.sort((left, right) => right.createdAt.localeCompare(left.createdAt))[0];
|
|
2844
|
+
return {
|
|
2845
|
+
runId: run.id,
|
|
2846
|
+
versionId: run.versionId,
|
|
2847
|
+
skillName: run.skillName,
|
|
2848
|
+
agentName: run.agentName,
|
|
2849
|
+
...(run.score !== undefined ? { score: run.score } : {}),
|
|
2850
|
+
...(previous?.score !== undefined ? { previousScore: previous.score } : {}),
|
|
2851
|
+
...(run.score !== undefined && previous?.score !== undefined ? { delta: run.score - previous.score } : {}),
|
|
2852
|
+
};
|
|
2853
|
+
});
|
|
2854
|
+
}
|
|
2855
|
+
function formatEvalDelta(delta) {
|
|
2856
|
+
if (delta.score === undefined) {
|
|
2857
|
+
return "";
|
|
2858
|
+
}
|
|
2859
|
+
const score = delta.score.toFixed(3);
|
|
2860
|
+
if (delta.previousScore === undefined || delta.delta === undefined) {
|
|
2861
|
+
return `${delta.skillName} ${displayRef(delta.versionId)} ${score}`;
|
|
2862
|
+
}
|
|
2863
|
+
const sign = delta.delta >= 0 ? "+" : "";
|
|
2864
|
+
return `${delta.skillName} ${displayRef(delta.versionId)} ${score} (was ${delta.previousScore.toFixed(3)}, ${sign}${delta.delta.toFixed(3)})`;
|
|
2865
|
+
}
|
|
2866
|
+
async function evalSuccessNextCommand(core, runs) {
|
|
2867
|
+
if (runs.length === 0) {
|
|
2868
|
+
return "workbench eval";
|
|
2869
|
+
}
|
|
2870
|
+
if (!runs.some((run) => typeof run.score === "number")) {
|
|
2871
|
+
return "edit .workbench/cases, then run workbench eval";
|
|
2872
|
+
}
|
|
2873
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2874
|
+
const currentVersion = snapshotVersionByRef(snapshot, snapshot.status.currentVersionId ?? snapshot.refs.current ?? "");
|
|
2875
|
+
const caseFiles = currentVersion?.files.filter((file) => file.kind === "text" &&
|
|
2876
|
+
/^\.workbench\/cases\/[^/]+\/case\.ya?ml$/u.test(file.path)) ?? [];
|
|
2877
|
+
const hasWorkflowCase = caseFiles.some((file) => file.kind === "text" && !/\n\s*smoke:\s*true(?:\s|$)/u.test(`\n${file.content}`));
|
|
2878
|
+
return hasWorkflowCase ? "workbench publish" : "edit .workbench/cases, then run workbench eval";
|
|
2879
|
+
}
|
|
2119
2880
|
function formatStatusSnapshot(status) {
|
|
2120
2881
|
const lines = [
|
|
2121
2882
|
`Root: ${status.project.root}`,
|
|
2122
2883
|
`Initialized: ${status.project.initialized ? "yes" : "no"}`,
|
|
2123
|
-
...(status.project.currentVersionId ? [`Current version: ${status.project.currentVersionId}`] : []),
|
|
2884
|
+
...(status.project.currentVersionId ? [`Current version: ${displayRef(status.project.currentVersionId)}`] : []),
|
|
2124
2885
|
...(status.project.defaultSkill ? [`Default skill: ${status.project.defaultSkill}`] : []),
|
|
2125
2886
|
...(status.project.defaultAgent ? [`Default agent: ${status.project.defaultAgent}`] : []),
|
|
2126
2887
|
`Runs: ${status.runs.total}${status.runs.lastStatus ? ` (last ${status.runs.lastStatus})` : ""}`,
|
|
@@ -2130,7 +2891,7 @@ function formatStatusSnapshot(status) {
|
|
|
2130
2891
|
? [
|
|
2131
2892
|
"publication=published",
|
|
2132
2893
|
remote.publication.visibility ? `visibility=${remote.publication.visibility}` : undefined,
|
|
2133
|
-
remote.publication.versionId ? `version=${remote.publication.versionId}` : undefined,
|
|
2894
|
+
remote.publication.versionId ? `version=${displayRef(remote.publication.versionId)}` : undefined,
|
|
2134
2895
|
remote.publication.installUrl ? `install=${remote.publication.installUrl}` : undefined,
|
|
2135
2896
|
remote.publication.pinnedInstallUrl ? `pinned=${remote.publication.pinnedInstallUrl}` : undefined,
|
|
2136
2897
|
].filter(Boolean).join("\t")
|
|
@@ -2141,49 +2902,16 @@ function formatStatusSnapshot(status) {
|
|
|
2141
2902
|
? [
|
|
2142
2903
|
` error[${remote.sync.lastError.code}]: ${remote.sync.lastError.message}`,
|
|
2143
2904
|
...(remote.sync.lastAttemptAt ? [` last attempt: ${remote.sync.lastAttemptAt}`] : []),
|
|
2144
|
-
...(remote.sync.nextCommand ? [` next: ${remote.sync.nextCommand}`] : []),
|
|
2145
2905
|
]
|
|
2146
2906
|
: []),
|
|
2147
2907
|
];
|
|
2148
2908
|
})] : ["Remotes: none"]),
|
|
2149
|
-
...(status.next
|
|
2909
|
+
...(status.next ? [`next: ${shortenCommandRefs(status.next)}`] : []),
|
|
2150
2910
|
];
|
|
2151
2911
|
return lines.join("\n");
|
|
2152
2912
|
}
|
|
2153
|
-
function formatCheck(result) {
|
|
2154
|
-
return [
|
|
2155
|
-
"Workbench skill is valid.",
|
|
2156
|
-
`Cases: ${result.cases} (${result.plan.source.smokeCaseCount} smoke)`,
|
|
2157
|
-
`Skills: ${result.skills}`,
|
|
2158
|
-
`Agents: ${result.agents}`,
|
|
2159
|
-
`Skill files: ${result.plan.source.skillFiles}`,
|
|
2160
|
-
`Eval files: ${result.plan.source.evalFiles}`,
|
|
2161
|
-
"",
|
|
2162
|
-
"Skill plan:",
|
|
2163
|
-
...result.plan.skills.map((skill) => [
|
|
2164
|
-
skill.name,
|
|
2165
|
-
`bundle=${skill.bundleHash.slice(0, 12)}`,
|
|
2166
|
-
`files=${skill.fileCount}`,
|
|
2167
|
-
`includes=${skill.includedSkillCount}`,
|
|
2168
|
-
].join("\t")),
|
|
2169
|
-
"",
|
|
2170
|
-
"Agent plan:",
|
|
2171
|
-
...result.plan.agents.map((agent) => [
|
|
2172
|
-
agent.name,
|
|
2173
|
-
agent.adapter,
|
|
2174
|
-
agent.model,
|
|
2175
|
-
agent.providerBacked ? "provider-eval" : "local-eval",
|
|
2176
|
-
`network=${agent.network.egress}`,
|
|
2177
|
-
`cpu=${agent.resources.cpu}`,
|
|
2178
|
-
`memoryGb=${agent.resources.memoryGb}`,
|
|
2179
|
-
`timeout=${agent.resources.timeoutMinutes}m`,
|
|
2180
|
-
`image=${agent.image}`,
|
|
2181
|
-
agent.auth ? `auth=${agent.auth}` : undefined,
|
|
2182
|
-
].filter(Boolean).join("\t")),
|
|
2183
|
-
].join("\n");
|
|
2184
|
-
}
|
|
2185
2913
|
function formatVersion(version) {
|
|
2186
|
-
return `${version.id}\t${version.hash.slice(0, 12)}\t${version.message}`;
|
|
2914
|
+
return `${displayRef(version.id)}\t${version.hash.slice(0, 12)}\t${version.message}`;
|
|
2187
2915
|
}
|
|
2188
2916
|
function versionSummary(version) {
|
|
2189
2917
|
return {
|
|
@@ -2201,11 +2929,11 @@ function formatAgent(agent) {
|
|
|
2201
2929
|
function formatRun(run) {
|
|
2202
2930
|
const score = run.score === undefined ? "n/a" : run.score.toFixed(3);
|
|
2203
2931
|
const latency = run.latencyMs === undefined ? "n/a" : `${run.latencyMs}ms`;
|
|
2204
|
-
return `${run.id}\t${run.kind}\t${run.status}\tversion=${run.versionId}\tskill=${run.skillName}\tagent=${run.agentName}\tscore=${score}\tlatency=${latency}`;
|
|
2932
|
+
return `${displayRef(run.id)}\t${run.kind}\t${run.status}\tversion=${displayRef(run.versionId)}\tskill=${run.skillName}\tagent=${run.agentName}\tscore=${score}\tlatency=${latency}`;
|
|
2205
2933
|
}
|
|
2206
2934
|
function formatImproveResult(result) {
|
|
2207
2935
|
return [
|
|
2208
|
-
`Improved ${result.version.parentIds[0]
|
|
2936
|
+
`Improved ${result.version.parentIds[0] ? displayRef(result.version.parentIds[0]) : "current"} -> ${displayRef(result.version.id)}. ${formatRun(result.run)}`,
|
|
2209
2937
|
result.switched
|
|
2210
2938
|
? "Switched to improved version."
|
|
2211
2939
|
: `Did not switch: ${result.promotionReason}`,
|
|
@@ -2214,26 +2942,26 @@ function formatImproveResult(result) {
|
|
|
2214
2942
|
function formatJob(job) {
|
|
2215
2943
|
const score = job.score === undefined ? "n/a" : job.score.toFixed(3);
|
|
2216
2944
|
const duration = job.durationMs === undefined ? "n/a" : `${job.durationMs}ms`;
|
|
2217
|
-
return `${job.id}\trun=${job.runId}\tcase=${job.caseId}\tsample=${job.sample}\t${job.status}\tscore=${score}\tduration=${duration}`;
|
|
2945
|
+
return `${displayRef(job.id)}\trun=${displayRef(job.runId)}\tcase=${job.caseId}\tsample=${job.sample}\t${job.status}\tscore=${score}\tduration=${duration}`;
|
|
2218
2946
|
}
|
|
2219
2947
|
function formatComparison(comparison) {
|
|
2220
2948
|
const lines = ["version\tskill\tagent\tstatus\tscore\tcost\tlatency\trun"];
|
|
2221
2949
|
for (const cell of comparison.cells) {
|
|
2222
2950
|
lines.push([
|
|
2223
|
-
cell.versionId,
|
|
2951
|
+
displayRef(cell.versionId),
|
|
2224
2952
|
cell.skillName,
|
|
2225
2953
|
`${cell.agentName}@${shortObjectId(cell.agentHash)}`,
|
|
2226
2954
|
cell.status ?? "not-run",
|
|
2227
2955
|
cell.score === undefined ? "n/a" : cell.score.toFixed(3),
|
|
2228
2956
|
cell.costUsd === undefined ? "n/a" : `$${cell.costUsd.toFixed(4)}`,
|
|
2229
2957
|
cell.latencyMs === undefined ? "n/a" : `${cell.latencyMs}ms`,
|
|
2230
|
-
cell.runId
|
|
2958
|
+
cell.runId ? displayRef(cell.runId) : "n/a",
|
|
2231
2959
|
].join("\t"));
|
|
2232
2960
|
}
|
|
2233
2961
|
return lines.join("\n");
|
|
2234
2962
|
}
|
|
2235
2963
|
function shortObjectId(id) {
|
|
2236
|
-
return id.length >
|
|
2964
|
+
return id.length > 8 ? id.slice(0, 8) : id;
|
|
2237
2965
|
}
|
|
2238
2966
|
function formatTrace(trace) {
|
|
2239
2967
|
const result = asRecord(trace.result);
|
|
@@ -2242,7 +2970,7 @@ function formatTrace(trace) {
|
|
|
2242
2970
|
const error = typeof result?.error === "string" ? result.error.split(/\r?\n/u)[0] : undefined;
|
|
2243
2971
|
const files = trace.files.slice(0, 5).map((file) => file.path).join(",");
|
|
2244
2972
|
return [
|
|
2245
|
-
`${trace.id}\trun=${trace.runId}\tjob=${trace.jobId
|
|
2973
|
+
`${displayRef(trace.id)}\trun=${displayRef(trace.runId)}\tjob=${trace.jobId ? displayRef(trace.jobId) : "n/a"}\tversion=${displayRef(trace.versionId)}\tskill=${trace.skillName}\tagent=${trace.agentName}`,
|
|
2246
2974
|
status ? `status=${status}` : undefined,
|
|
2247
2975
|
score ? `score=${score}` : undefined,
|
|
2248
2976
|
error ? `error=${error}` : undefined,
|
|
@@ -2270,7 +2998,7 @@ function formatTraceDetail(detail) {
|
|
|
2270
2998
|
return detail.executions.map((execution) => {
|
|
2271
2999
|
const sessionLabels = execution.sessions.map((session) => session.label).join(",");
|
|
2272
3000
|
return [
|
|
2273
|
-
`${execution.id}\trun=${detail.runId}\tjobs=${execution.jobIds.join(",")}\tstatus=${execution.status}`,
|
|
3001
|
+
`${execution.id}\trun=${displayRef(detail.runId)}\tjobs=${execution.jobIds.map(displayRef).join(",")}\tstatus=${execution.status}`,
|
|
2274
3002
|
`events=${execution.trace.events.length}`,
|
|
2275
3003
|
`spans=${execution.trace.spans.length}`,
|
|
2276
3004
|
`summaries=${execution.trace.summaries.length}`,
|
|
@@ -2279,7 +3007,7 @@ function formatTraceDetail(detail) {
|
|
|
2279
3007
|
}).join("\n");
|
|
2280
3008
|
}
|
|
2281
3009
|
function formatArtifact(artifact) {
|
|
2282
|
-
return `${artifact.id}\trun=${artifact.runId}\tjob=${artifact.jobId}\t${artifact.kind}\tfiles=${artifact.files.length}`;
|
|
3010
|
+
return `${displayRef(artifact.id)}\trun=${displayRef(artifact.runId)}\tjob=${displayRef(artifact.jobId)}\t${artifact.kind}\tfiles=${artifact.files.length}`;
|
|
2283
3011
|
}
|
|
2284
3012
|
function artifactSummary(artifact) {
|
|
2285
3013
|
return {
|