@workbench-ai/workbench 0.0.68 → 0.0.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dev-open/client.css +416 -107
- package/dist/dev-open/client.js +272 -231
- package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-400-normal.woff +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-400-normal.woff2 +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-500-normal.woff +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-500-normal.woff2 +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-600-normal.woff +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-600-normal.woff2 +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-400-normal.woff +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-400-normal.woff2 +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-500-normal.woff +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-500-normal.woff2 +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-600-normal.woff +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-600-normal.woff2 +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-latin-400-normal.woff +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-latin-400-normal.woff2 +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-latin-500-normal.woff +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-latin-500-normal.woff2 +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-latin-600-normal.woff +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-latin-600-normal.woff2 +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-400-normal.woff +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-400-normal.woff2 +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-500-normal.woff +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-500-normal.woff2 +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-600-normal.woff +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-600-normal.woff2 +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-400-normal.woff +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-400-normal.woff2 +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-500-normal.woff +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-500-normal.woff2 +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-600-normal.woff +0 -0
- package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-600-normal.woff2 +0 -0
- package/dist/dev-open/fonts/libre-caslon-display-latin-400-normal.woff +0 -0
- package/dist/dev-open/fonts/libre-caslon-display-latin-400-normal.woff2 +0 -0
- package/dist/dev-open/fonts/libre-caslon-display-latin-ext-400-normal.woff +0 -0
- package/dist/dev-open/fonts/libre-caslon-display-latin-ext-400-normal.woff2 +0 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1832 -519
- package/dist/install-targets.d.ts +35 -0
- package/dist/install-targets.d.ts.map +1 -0
- package/dist/install-targets.js +188 -0
- package/dist/open-server.d.ts.map +1 -1
- package/dist/open-server.js +72 -4
- package/dist/output.d.ts +22 -0
- package/dist/output.d.ts.map +1 -0
- package/dist/output.js +38 -0
- package/package.json +5 -4
package/dist/index.js
CHANGED
|
@@ -4,107 +4,125 @@ import { createRequire } from "node:module";
|
|
|
4
4
|
import os from "node:os";
|
|
5
5
|
import path from "node:path";
|
|
6
6
|
import { gzipSync } from "node:zlib";
|
|
7
|
-
import { addWorkbenchCase, addWorkbenchRemote, addWorkbenchAgent,
|
|
7
|
+
import { addWorkbenchCase, addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchCases, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, publishWorkbenchVersion, removeWorkbenchCase, removeWorkbenchAgent, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
|
|
8
|
+
import { normalizeWorkbenchSkillName } from "@workbench-ai/workbench-contract";
|
|
9
|
+
import { emitError, emitResult } from "./output.js";
|
|
10
|
+
import { installSnapshotToTargets, installTargetsToJson, normalizeInstallSnapshotPath, resolveInstallTargets, supportedInstallTargets, } from "./install-targets.js";
|
|
8
11
|
import { startWorkbenchOpenServer } from "./open-server.js";
|
|
9
12
|
const require = createRequire(import.meta.url);
|
|
10
13
|
const HELP = [
|
|
11
14
|
"Usage:",
|
|
15
|
+
" workbench [--json]",
|
|
12
16
|
" workbench <command> [options]",
|
|
13
17
|
"",
|
|
14
|
-
"
|
|
15
|
-
" workbench init [DIR] [--json]",
|
|
16
|
-
" workbench status [--dir DIR] [--json]",
|
|
17
|
-
" workbench check [--dir DIR] [--json]",
|
|
18
|
-
" workbench versions [--dir DIR] [--json]",
|
|
19
|
-
" workbench switch VERSION [--dir DIR] [--json]",
|
|
20
|
-
" workbench diff [A..B] [--dir DIR] [--json]",
|
|
21
|
-
" workbench sync [REMOTE] [--dir DIR] [--json]",
|
|
18
|
+
"Bare workbench prints project status and the next useful command.",
|
|
22
19
|
"",
|
|
23
|
-
"
|
|
24
|
-
" workbench
|
|
25
|
-
" workbench
|
|
20
|
+
"Taught commands:",
|
|
21
|
+
" workbench new [DIR] [--json]",
|
|
22
|
+
" workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
23
|
+
" workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
26
24
|
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
27
|
-
" workbench
|
|
25
|
+
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
|
|
26
|
+
" workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--list] [--dry-run] [--json]",
|
|
28
27
|
"",
|
|
29
|
-
"
|
|
30
|
-
" workbench
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
"
|
|
28
|
+
"More:",
|
|
29
|
+
" workbench help --all",
|
|
30
|
+
].join("\n");
|
|
31
|
+
const HELP_ALL = [
|
|
32
|
+
"Usage:",
|
|
33
|
+
" workbench # = workbench status",
|
|
34
|
+
" workbench new [DIR] [--json]",
|
|
35
|
+
" workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
36
|
+
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
37
|
+
" workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
38
|
+
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--json]",
|
|
39
|
+
" workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--list] [--dry-run] [--json]",
|
|
34
40
|
"",
|
|
35
|
-
"
|
|
36
|
-
" workbench
|
|
37
|
-
" workbench
|
|
38
|
-
" workbench
|
|
39
|
-
" workbench
|
|
40
|
-
" workbench
|
|
41
|
-
" workbench auth status [ADAPTER[/SLOT]] [--profile PROFILE] [--json]",
|
|
42
|
-
" workbench auth connect ADAPTER[/SLOT] [--method METHOD] [--profile PROFILE] [--profile-root DIR] [--local-only] [--json]",
|
|
43
|
-
" workbench auth disconnect ADAPTER[/SLOT] [--profile PROFILE] [--local-only] [--json]",
|
|
44
|
-
" workbench login [--base-url URL] [--no-open] [--json]",
|
|
45
|
-
" workbench logout [--json]",
|
|
46
|
-
" workbench publish [VERSION] [--visibility private|public] [--json]",
|
|
41
|
+
"Inspect:",
|
|
42
|
+
" workbench status [--dir DIR] [--json]",
|
|
43
|
+
" workbench log [--runs|--versions] [--json]",
|
|
44
|
+
" workbench show REF[:PATH] [--json]",
|
|
45
|
+
" workbench diff [A..B] [--json]",
|
|
46
|
+
" workbench switch VERSION [--json]",
|
|
47
47
|
" workbench open [--host HOST] [--port PORT] [--no-open] [--json]",
|
|
48
48
|
"",
|
|
49
|
-
"
|
|
50
|
-
" workbench
|
|
51
|
-
" workbench
|
|
52
|
-
"
|
|
53
|
-
"
|
|
54
|
-
" workbench
|
|
55
|
-
" workbench
|
|
56
|
-
" workbench
|
|
57
|
-
" workbench publish --visibility public",
|
|
49
|
+
"Configure:",
|
|
50
|
+
" workbench case add [RUN_ID] | list | rm ID [--json]",
|
|
51
|
+
" workbench agent add NAME --adapter X [--model M] [--with k=v]... | list | rm NAME [--json]",
|
|
52
|
+
"",
|
|
53
|
+
"Share and auth:",
|
|
54
|
+
" workbench login [PROVIDER] [--method METHOD] [--profile P] [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--local-only] [--json]",
|
|
55
|
+
" workbench logout [PROVIDER] [--json]",
|
|
56
|
+
" workbench sync [REMOTE] [--dry-run] [--json]",
|
|
58
57
|
"",
|
|
59
|
-
"
|
|
60
|
-
"
|
|
61
|
-
"
|
|
58
|
+
"Remote URLs:",
|
|
59
|
+
" https://HOST/skills/OWNER/SKILL Workbench Cloud skill remote",
|
|
60
|
+
" file:///absolute/path local file remote for plumbing sync",
|
|
62
61
|
].join("\n");
|
|
63
62
|
const COMMAND_HELP = {
|
|
64
|
-
|
|
63
|
+
new: [
|
|
65
64
|
"Usage:",
|
|
66
|
-
" workbench
|
|
67
|
-
" workbench auth connect ADAPTER[/SLOT] [--method api-key|oauth|bedrock] [--profile PROFILE] [--profile-root DIR] [--local-only] [--json]",
|
|
68
|
-
" workbench auth disconnect ADAPTER[/SLOT] [--profile PROFILE] [--local-only] [--json]",
|
|
65
|
+
" workbench new [DIR] [--json]",
|
|
69
66
|
"",
|
|
70
|
-
"
|
|
67
|
+
"Creates a Workbench skill project.",
|
|
71
68
|
].join("\n"),
|
|
72
69
|
eval: [
|
|
73
70
|
"Usage:",
|
|
74
|
-
" workbench eval [VERSION] [--
|
|
71
|
+
" workbench eval [VERSION] [--skills all|LIST] [--agents all|LIST] [-n N|--samples N] [--rerun] [--cloud] [--json]",
|
|
75
72
|
"",
|
|
76
|
-
"Runs
|
|
73
|
+
"Runs eval jobs for the selected version, measured skills, and agents. Omitted selectors use manifest defaults.",
|
|
77
74
|
].join("\n"),
|
|
78
75
|
improve: [
|
|
79
76
|
"Usage:",
|
|
80
|
-
" workbench improve [VERSION] [--
|
|
77
|
+
" workbench improve [VERSION] [--skills LIST] [--agents LIST] [--budget N] [-n N|--samples N] [--cloud] [--json]",
|
|
78
|
+
"",
|
|
79
|
+
"Creates one improved child version from evidence. The selected skills and agents must resolve to exactly one entry each.",
|
|
80
|
+
].join("\n"),
|
|
81
|
+
compare: [
|
|
82
|
+
"Usage:",
|
|
83
|
+
" workbench compare [--skills all|LIST] [--agents all|LIST] [--versions all|A..B|LIST] [--json]",
|
|
84
|
+
"",
|
|
85
|
+
"Compares recorded eval evidence across selected skills, agents, and versions.",
|
|
86
|
+
].join("\n"),
|
|
87
|
+
install: [
|
|
88
|
+
"Usage:",
|
|
89
|
+
" workbench install HANDLE_OR_URL [--to codex|claude|local]... [--yes] [--list] [--dry-run] [--json]",
|
|
90
|
+
"",
|
|
91
|
+
"Installs published Workbench Cloud source into local agent targets.",
|
|
81
92
|
"",
|
|
82
|
-
"
|
|
93
|
+
"Example:",
|
|
94
|
+
" workbench install acme/earnings-prep --to codex --yes",
|
|
83
95
|
].join("\n"),
|
|
84
|
-
|
|
96
|
+
status: [
|
|
85
97
|
"Usage:",
|
|
86
|
-
" workbench
|
|
98
|
+
" workbench status [--dir DIR] [--json]",
|
|
87
99
|
"",
|
|
88
|
-
"
|
|
100
|
+
"Reports project, worktree, run, per-remote sync/publication, and auth state. --json emits the workbench.status.v1 dashboard.",
|
|
101
|
+
].join("\n"),
|
|
102
|
+
logout: [
|
|
103
|
+
"Usage:",
|
|
104
|
+
" workbench logout [PROVIDER] [--json]",
|
|
105
|
+
"",
|
|
106
|
+
"With no provider, logs out of Workbench Cloud. With a provider such as codex or claude, removes local adapter auth.",
|
|
89
107
|
].join("\n"),
|
|
90
108
|
show: [
|
|
91
109
|
"Usage:",
|
|
92
110
|
" workbench show REF [--json]",
|
|
93
111
|
" workbench show REF:PATH [--json]",
|
|
94
112
|
"",
|
|
95
|
-
"Shows a Workbench object
|
|
113
|
+
"Shows a Workbench object, lists files for file-backed objects, or prints one file.",
|
|
96
114
|
].join("\n"),
|
|
97
|
-
|
|
115
|
+
log: [
|
|
98
116
|
"Usage:",
|
|
99
|
-
" workbench
|
|
117
|
+
" workbench log [--runs|--versions] [--json]",
|
|
100
118
|
"",
|
|
101
|
-
"
|
|
119
|
+
"Shows one reverse-chronological timeline of versions and runs.",
|
|
102
120
|
].join("\n"),
|
|
103
|
-
|
|
121
|
+
diff: [
|
|
104
122
|
"Usage:",
|
|
105
|
-
" workbench
|
|
123
|
+
" workbench diff [A..B] [--json]",
|
|
106
124
|
"",
|
|
107
|
-
"
|
|
125
|
+
"Shows changed files between two Workbench source versions.",
|
|
108
126
|
].join("\n"),
|
|
109
127
|
switch: [
|
|
110
128
|
"Usage:",
|
|
@@ -112,116 +130,129 @@ const COMMAND_HELP = {
|
|
|
112
130
|
"",
|
|
113
131
|
"Switches the working skill source to a recorded Workbench version.",
|
|
114
132
|
].join("\n"),
|
|
133
|
+
open: [
|
|
134
|
+
"Usage:",
|
|
135
|
+
" workbench open [--host HOST] [--port PORT] [--no-open] [--json]",
|
|
136
|
+
"",
|
|
137
|
+
"Serves or emits the read-only Workbench inspection snapshot.",
|
|
138
|
+
].join("\n"),
|
|
139
|
+
case: [
|
|
140
|
+
"Usage:",
|
|
141
|
+
" workbench case list [--json]",
|
|
142
|
+
" workbench case add [RUN_ID] [--json]",
|
|
143
|
+
" workbench case rm ID [--json]",
|
|
144
|
+
"",
|
|
145
|
+
"Lists cases, creates a draft case, or removes a case.",
|
|
146
|
+
].join("\n"),
|
|
147
|
+
agent: [
|
|
148
|
+
"Usage:",
|
|
149
|
+
" workbench agent list [--json]",
|
|
150
|
+
" workbench agent add NAME --adapter X [--model M] [--with k=v]... [--json]",
|
|
151
|
+
" workbench agent rm NAME [--json]",
|
|
152
|
+
"",
|
|
153
|
+
"Lists, adds, or removes eval agent configurations.",
|
|
154
|
+
].join("\n"),
|
|
115
155
|
sync: [
|
|
116
156
|
"Usage:",
|
|
117
|
-
" workbench sync [REMOTE] [--json]",
|
|
157
|
+
" workbench sync [REMOTE] [--dry-run] [--dir DIR] [--json]",
|
|
118
158
|
"",
|
|
119
|
-
"
|
|
159
|
+
"Plumbing command: synchronizes local evidence and version objects with a Workbench remote.",
|
|
120
160
|
].join("\n"),
|
|
121
161
|
publish: [
|
|
122
162
|
"Usage:",
|
|
123
|
-
" workbench publish [VERSION] [--
|
|
163
|
+
" workbench publish [VERSION] [--as OWNER/SKILL] [--private|--team|--public] [--dry-run] [--dir DIR] [--json]",
|
|
124
164
|
"",
|
|
125
|
-
"Publishes installable skill source
|
|
165
|
+
"Publishes installable skill source to Workbench Cloud. --as sets the linked OWNER/SKILL handle.",
|
|
126
166
|
].join("\n"),
|
|
127
167
|
login: [
|
|
128
168
|
"Usage:",
|
|
129
|
-
" workbench login [--base-url URL] [--no-open] [--json]",
|
|
130
|
-
" workbench logout [--json]",
|
|
169
|
+
" workbench login [PROVIDER] [--method METHOD] [--profile P] [--base-url URL] [--start-only|--wait] [--timeout N] [--no-open] [--local-only] [--json]",
|
|
170
|
+
" workbench logout [PROVIDER] [--json]",
|
|
131
171
|
"",
|
|
132
|
-
"Connects the CLI to Workbench Cloud
|
|
172
|
+
"Connects the CLI to Workbench Cloud or captures local adapter auth for a provider.",
|
|
133
173
|
].join("\n"),
|
|
134
174
|
};
|
|
135
|
-
const
|
|
136
|
-
"
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
"rerun",
|
|
141
|
-
]);
|
|
142
|
-
const FLAG_DEFINITIONS = {
|
|
143
|
-
adapter: "string",
|
|
144
|
-
"base-url": "string",
|
|
145
|
-
budget: "positive-integer",
|
|
175
|
+
const COMMON_FLAGS = {
|
|
176
|
+
json: "boolean",
|
|
177
|
+
};
|
|
178
|
+
const PROJECT_FLAGS = {
|
|
179
|
+
...COMMON_FLAGS,
|
|
146
180
|
dir: "string",
|
|
147
|
-
|
|
181
|
+
};
|
|
182
|
+
const HELP_FLAG = {
|
|
148
183
|
help: "boolean",
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
"local-only": "boolean",
|
|
152
|
-
method: "string",
|
|
153
|
-
model: "string",
|
|
154
|
-
"no-open": "boolean",
|
|
155
|
-
port: "positive-integer",
|
|
156
|
-
profile: "string",
|
|
157
|
-
"profile-root": "string",
|
|
158
|
-
rerun: "boolean",
|
|
159
|
-
samples: "positive-integer",
|
|
160
|
-
agent: "string",
|
|
161
|
-
agents: "string",
|
|
162
|
-
skill: "string",
|
|
163
|
-
skills: "string",
|
|
184
|
+
};
|
|
185
|
+
const VERSION_FLAG = {
|
|
164
186
|
version: "boolean",
|
|
165
|
-
versions: "string",
|
|
166
|
-
visibility: "string",
|
|
167
|
-
with: "repeat-string",
|
|
168
187
|
};
|
|
169
188
|
const COMMAND_FLAGS = {
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
open: ["dir", "host", "json", "no-open", "port"],
|
|
181
|
-
publish: ["dir", "json", "visibility"],
|
|
182
|
-
retry: ["dir", "json"],
|
|
183
|
-
show: ["dir", "json"],
|
|
184
|
-
status: ["dir", "json"],
|
|
185
|
-
switch: ["dir", "json"],
|
|
186
|
-
sync: ["dir", "json"],
|
|
187
|
-
trace: ["dir", "json"],
|
|
188
|
-
versions: ["dir", "json"],
|
|
189
|
-
};
|
|
190
|
-
const SUBCOMMAND_FLAGS = {
|
|
191
|
-
auth: {
|
|
192
|
-
defaultSubcommand: "status",
|
|
193
|
-
flags: {
|
|
194
|
-
status: ["json", "profile"],
|
|
195
|
-
connect: ["json", "local-only", "method", "profile", "profile-root"],
|
|
196
|
-
disconnect: ["json", "local-only", "profile"],
|
|
197
|
-
},
|
|
189
|
+
compare: { ...PROJECT_FLAGS, ...HELP_FLAG, agents: "string", skills: "string", versions: "string" },
|
|
190
|
+
diff: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
191
|
+
eval: {
|
|
192
|
+
...PROJECT_FLAGS,
|
|
193
|
+
...HELP_FLAG,
|
|
194
|
+
agents: "string",
|
|
195
|
+
cloud: "boolean",
|
|
196
|
+
rerun: "boolean",
|
|
197
|
+
samples: "positive-integer",
|
|
198
|
+
skills: "string",
|
|
198
199
|
},
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
200
|
+
help: { ...COMMON_FLAGS, ...HELP_FLAG, all: "boolean" },
|
|
201
|
+
improve: {
|
|
202
|
+
...PROJECT_FLAGS,
|
|
203
|
+
...HELP_FLAG,
|
|
204
|
+
agents: "string",
|
|
205
|
+
budget: "positive-integer",
|
|
206
|
+
cloud: "boolean",
|
|
207
|
+
samples: "positive-integer",
|
|
208
|
+
skills: "string",
|
|
206
209
|
},
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
210
|
+
install: { ...COMMON_FLAGS, ...HELP_FLAG, "dry-run": "boolean", list: "boolean", to: "repeat-string", yes: "boolean" },
|
|
211
|
+
log: { ...PROJECT_FLAGS, ...HELP_FLAG, runs: "boolean", versions: "boolean" },
|
|
212
|
+
login: {
|
|
213
|
+
...COMMON_FLAGS,
|
|
214
|
+
...HELP_FLAG,
|
|
215
|
+
"base-url": "string",
|
|
216
|
+
"local-only": "boolean",
|
|
217
|
+
method: "string",
|
|
218
|
+
"no-open": "boolean",
|
|
219
|
+
profile: "string",
|
|
220
|
+
"profile-root": "string",
|
|
221
|
+
"start-only": "boolean",
|
|
222
|
+
timeout: "positive-integer",
|
|
223
|
+
wait: "boolean",
|
|
224
|
+
},
|
|
225
|
+
logout: { ...COMMON_FLAGS, ...HELP_FLAG },
|
|
226
|
+
new: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
227
|
+
open: { ...PROJECT_FLAGS, ...HELP_FLAG, host: "string", "no-open": "boolean", port: "positive-integer" },
|
|
228
|
+
publish: {
|
|
229
|
+
...PROJECT_FLAGS,
|
|
230
|
+
...HELP_FLAG,
|
|
231
|
+
as: "string",
|
|
232
|
+
"dry-run": "boolean",
|
|
233
|
+
private: "boolean",
|
|
234
|
+
public: "boolean",
|
|
235
|
+
team: "boolean",
|
|
212
236
|
},
|
|
213
|
-
|
|
237
|
+
show: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
238
|
+
status: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
239
|
+
switch: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
240
|
+
sync: { ...PROJECT_FLAGS, ...HELP_FLAG, "dry-run": "boolean" },
|
|
241
|
+
version: { ...COMMON_FLAGS, ...VERSION_FLAG },
|
|
242
|
+
};
|
|
243
|
+
const SUBCOMMAND_FLAGS = {
|
|
244
|
+
case: {
|
|
214
245
|
flags: {
|
|
215
|
-
list:
|
|
246
|
+
list: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
247
|
+
add: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
248
|
+
rm: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
216
249
|
},
|
|
217
250
|
},
|
|
218
251
|
agent: {
|
|
219
252
|
flags: {
|
|
220
|
-
list:
|
|
221
|
-
add:
|
|
222
|
-
|
|
223
|
-
default: ["dir", "json"],
|
|
224
|
-
remove: ["dir", "json"],
|
|
253
|
+
list: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
254
|
+
add: { ...PROJECT_FLAGS, ...HELP_FLAG, adapter: "string", model: "string", with: "repeat-string" },
|
|
255
|
+
rm: { ...PROJECT_FLAGS, ...HELP_FLAG },
|
|
225
256
|
},
|
|
226
257
|
},
|
|
227
258
|
};
|
|
@@ -232,61 +263,85 @@ export async function runCli(argv, io = {
|
|
|
232
263
|
const parsed = parseArgs(argv);
|
|
233
264
|
const command = parsed.positionals[0];
|
|
234
265
|
try {
|
|
235
|
-
|
|
266
|
+
validateCommandFlags(parsed, command);
|
|
267
|
+
if (command === "version" || parsed.flags.version === true) {
|
|
236
268
|
io.stdout.write(`workbench ${getCliVersion()}\n`);
|
|
237
269
|
return 0;
|
|
238
270
|
}
|
|
239
|
-
if (
|
|
271
|
+
if (command === "help") {
|
|
240
272
|
const helpCommand = command === "help" ? optionalPositional(parsed, 1) : undefined;
|
|
241
|
-
io.stdout.write(`${helpCommand ? commandHelp(helpCommand) : HELP}\n`);
|
|
273
|
+
io.stdout.write(`${parsed.flags.all === true ? HELP_ALL : helpCommand ? commandHelp(helpCommand) : HELP}\n`);
|
|
242
274
|
return 0;
|
|
243
275
|
}
|
|
244
276
|
if (parsed.flags.help === true) {
|
|
245
|
-
io.stdout.write(`${commandHelp(command)}\n`);
|
|
277
|
+
io.stdout.write(`${command ? commandHelp(command) : HELP}\n`);
|
|
246
278
|
return 0;
|
|
247
279
|
}
|
|
248
|
-
|
|
249
|
-
|
|
280
|
+
if (!command) {
|
|
281
|
+
return await handleStatus(parsed, io);
|
|
282
|
+
}
|
|
250
283
|
if (command === "login") {
|
|
251
284
|
return await handleLogin(parsed, io);
|
|
252
285
|
}
|
|
253
286
|
if (command === "logout") {
|
|
254
287
|
return await handleLogout(parsed, io);
|
|
255
288
|
}
|
|
256
|
-
if (command === "
|
|
289
|
+
if (command === "install") {
|
|
290
|
+
return await handleInstall(parsed, io);
|
|
291
|
+
}
|
|
292
|
+
const core = await coreOptions(parsed);
|
|
293
|
+
if (command === "new") {
|
|
257
294
|
const status = await initWorkbenchSkill({ dir: parsed.positionals[1] ?? dirFlag(parsed) });
|
|
258
|
-
return output(status, parsed, io, () => `
|
|
295
|
+
return output(status, parsed, io, () => `Created Workbench skill at ${status.root}.\nnext: edit SKILL.md, then run workbench eval`);
|
|
259
296
|
}
|
|
260
297
|
if (command === "status") {
|
|
261
|
-
|
|
262
|
-
return output(status, parsed, io, () => formatStatus(status));
|
|
263
|
-
}
|
|
264
|
-
if (command === "check") {
|
|
265
|
-
const result = await checkWorkbenchSkill(core);
|
|
266
|
-
return output(result, parsed, io, () => formatCheck(result));
|
|
298
|
+
return await handleStatus(parsed, io);
|
|
267
299
|
}
|
|
268
300
|
if (command === "eval") {
|
|
301
|
+
if (parsed.flags.cloud === true) {
|
|
302
|
+
return await handleCloudEval(parsed, io);
|
|
303
|
+
}
|
|
269
304
|
const runs = await evalWorkbenchSkill({
|
|
270
305
|
...core,
|
|
271
306
|
version: optionalPositional(parsed, 1),
|
|
272
|
-
skill: stringFlag(parsed, "
|
|
273
|
-
agent: stringFlag(parsed, "
|
|
307
|
+
skill: stringFlag(parsed, "skills"),
|
|
308
|
+
agent: stringFlag(parsed, "agents"),
|
|
274
309
|
samples: intFlag(parsed, "samples"),
|
|
275
310
|
rerun: parsed.flags.rerun === true,
|
|
276
311
|
});
|
|
277
|
-
const
|
|
278
|
-
|
|
312
|
+
const artifactIds = await artifactIdsByRunId(core, runs);
|
|
313
|
+
const failedRuns = runs.filter((run) => run.status === "failed" || run.status === "canceled");
|
|
314
|
+
if (failedRuns.length > 0) {
|
|
315
|
+
return emitEvalFailure(runs, failedRuns, artifactIds, parsed, io);
|
|
316
|
+
}
|
|
317
|
+
const deltas = await evalDeltas(core, runs);
|
|
318
|
+
const nextCommands = evalSuccessNextCommands(runs);
|
|
319
|
+
return emitResult("workbench.cli.eval.v1", {
|
|
320
|
+
result: runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
|
|
321
|
+
deltas: deltas,
|
|
322
|
+
nextCommands: nextCommands,
|
|
323
|
+
}, parsed, io, () => [
|
|
324
|
+
runs.map(formatRun).join("\n"),
|
|
325
|
+
...deltas.map(formatEvalDelta),
|
|
326
|
+
...(nextCommands[0] ? [`next: ${nextCommands[0]}`] : []),
|
|
327
|
+
].filter(Boolean).join("\n"));
|
|
279
328
|
}
|
|
280
329
|
if (command === "improve") {
|
|
330
|
+
if (parsed.flags.cloud === true) {
|
|
331
|
+
return await handleCloudImprove(parsed, io);
|
|
332
|
+
}
|
|
281
333
|
const result = await improveWorkbenchSkill({
|
|
282
334
|
...core,
|
|
283
335
|
version: optionalPositional(parsed, 1),
|
|
284
|
-
skill: stringFlag(parsed, "
|
|
285
|
-
agent: stringFlag(parsed, "
|
|
336
|
+
skill: stringFlag(parsed, "skills"),
|
|
337
|
+
agent: stringFlag(parsed, "agents"),
|
|
286
338
|
budget: intFlag(parsed, "budget"),
|
|
287
339
|
samples: intFlag(parsed, "samples"),
|
|
288
340
|
});
|
|
289
|
-
return output(
|
|
341
|
+
return output({
|
|
342
|
+
...result,
|
|
343
|
+
version: versionSummary(result.version),
|
|
344
|
+
}, parsed, io, () => `${formatImproveResult(result)}\nnext: workbench eval`);
|
|
290
345
|
}
|
|
291
346
|
if (command === "compare") {
|
|
292
347
|
const comparison = await compareWorkbench({
|
|
@@ -297,158 +352,209 @@ export async function runCli(argv, io = {
|
|
|
297
352
|
});
|
|
298
353
|
return output(comparison, parsed, io, () => formatComparison(comparison));
|
|
299
354
|
}
|
|
300
|
-
if (command === "retry") {
|
|
301
|
-
const runId = requiredPositional(parsed, 1, "workbench retry requires RUN_ID.");
|
|
302
|
-
const snapshot = await createWorkbenchInspectionSnapshot(core);
|
|
303
|
-
const run = snapshot.runs.find((entry) => entry.id === runId);
|
|
304
|
-
if (!run) {
|
|
305
|
-
throw new WorkbenchUserError(`Run not found: ${runId}`);
|
|
306
|
-
}
|
|
307
|
-
const retrySelection = retrySamplesForFailedJobs(snapshot.jobs, run);
|
|
308
|
-
const retry = await evalWorkbenchSkill({
|
|
309
|
-
...core,
|
|
310
|
-
version: run.versionId,
|
|
311
|
-
skill: run.skillName,
|
|
312
|
-
agent: run.agentName,
|
|
313
|
-
kind: "retry",
|
|
314
|
-
parentRunId: run.id,
|
|
315
|
-
samples: retrySelection.samples,
|
|
316
|
-
selectedSamples: retrySelection.selectedSamples,
|
|
317
|
-
});
|
|
318
|
-
const code = output(retry, parsed, io, () => retry.map(formatRun).join("\n"));
|
|
319
|
-
return retry.some((entry) => entry.status === "failed" || entry.status === "canceled") ? 1 : code;
|
|
320
|
-
}
|
|
321
|
-
if (command === "versions") {
|
|
322
|
-
const versions = await listWorkbenchVersions(core);
|
|
323
|
-
return output(versions, parsed, io, () => versions.map(formatVersion).join("\n") || "No versions.");
|
|
324
|
-
}
|
|
325
355
|
if (command === "switch") {
|
|
326
356
|
const versionRef = requiredPositional(parsed, 1, "workbench switch requires VERSION.");
|
|
327
357
|
const version = await switchWorkbenchVersion(versionRef, core);
|
|
328
|
-
return output(version, parsed, io, () => `Switched to ${version.id}.`);
|
|
358
|
+
return output(versionSummary(version), parsed, io, () => `Switched to ${version.id}.`);
|
|
329
359
|
}
|
|
330
360
|
if (command === "diff") {
|
|
331
|
-
const range =
|
|
361
|
+
const range = optionalPositional(parsed, 1) ?? await defaultDiffRange(core);
|
|
332
362
|
const diffs = await diffWorkbenchVersions(range, core);
|
|
333
363
|
return output(diffs, parsed, io, () => diffs.map((entry) => `${entry.status}\t${entry.path}`).join("\n") || "No diff.");
|
|
334
364
|
}
|
|
335
365
|
if (command === "show") {
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
}
|
|
341
|
-
const value = await showWorkbenchRef(ref, core);
|
|
342
|
-
return output(value, parsed, io, () => formatShow(value));
|
|
343
|
-
}
|
|
344
|
-
if (command === "files") {
|
|
345
|
-
const ref = requiredPositional(parsed, 1, "workbench files requires REF.");
|
|
346
|
-
const files = await filesForWorkbenchRef(ref, core);
|
|
347
|
-
return output(files, parsed, io, () => files.map((file) => file.path).join("\n") || "No files.");
|
|
348
|
-
}
|
|
349
|
-
if (command === "list") {
|
|
350
|
-
return await handleList(parsed, io);
|
|
351
|
-
}
|
|
352
|
-
if (command === "trace") {
|
|
353
|
-
const ref = requiredPositional(parsed, 1, "workbench trace requires RUN_ID or TRACE_ID.");
|
|
354
|
-
const snapshot = await createWorkbenchInspectionSnapshot(core);
|
|
355
|
-
const run = snapshot.runs.find((entry) => entry.id === ref);
|
|
356
|
-
const job = snapshot.jobs.find((entry) => entry.id === ref);
|
|
357
|
-
const traces = run
|
|
358
|
-
? snapshot.traces.filter((trace) => run.traceIds.includes(trace.id))
|
|
359
|
-
: job
|
|
360
|
-
? snapshot.traces.filter((trace) => job.traceIds.includes(trace.id))
|
|
361
|
-
: snapshot.traces.filter((trace) => trace.id === ref);
|
|
362
|
-
if (traces.length === 0) {
|
|
363
|
-
throw new WorkbenchUserError(`Trace not found: ${ref}`);
|
|
364
|
-
}
|
|
365
|
-
return output(traces, parsed, io, () => traces.map(formatTrace).join("\n"));
|
|
366
|
+
return await handleShow(parsed, io);
|
|
367
|
+
}
|
|
368
|
+
if (command === "log") {
|
|
369
|
+
return await handleLog(parsed, io);
|
|
366
370
|
}
|
|
367
371
|
if (command === "agent") {
|
|
368
372
|
return await handleAgent(parsed, io);
|
|
369
373
|
}
|
|
370
|
-
if (command === "skills") {
|
|
371
|
-
return await handleSkills(parsed, io);
|
|
372
|
-
}
|
|
373
374
|
if (command === "case") {
|
|
374
375
|
return await handleCase(parsed, io);
|
|
375
376
|
}
|
|
376
|
-
if (command === "remote") {
|
|
377
|
-
return await handleRemote(parsed, io);
|
|
378
|
-
}
|
|
379
377
|
if (command === "sync") {
|
|
380
378
|
const result = await syncWorkbenchRemote({
|
|
381
379
|
...core,
|
|
382
380
|
remote: optionalPositional(parsed, 1),
|
|
381
|
+
dryRun: parsed.flags["dry-run"] === true,
|
|
383
382
|
});
|
|
384
|
-
return
|
|
383
|
+
return emitResult("workbench.cli.sync.v1", {
|
|
384
|
+
remote: result.remote,
|
|
385
|
+
pushed: result.pushed,
|
|
386
|
+
pulled: result.pulled,
|
|
387
|
+
upToDate: result.upToDate,
|
|
388
|
+
publication: result.publication,
|
|
389
|
+
...(result.dryRun ? { dryRun: true } : {}),
|
|
390
|
+
}, parsed, io, () => `${result.dryRun ? "Would sync" : "Synced"} ${result.remote.name}: pushed ${result.pushed}, pulled ${result.pulled}${result.upToDate ? " (up to date)" : ""}.`);
|
|
385
391
|
}
|
|
386
392
|
if (command === "publish") {
|
|
393
|
+
const preview = parsed.flags["dry-run"] === true && !stringFlag(parsed, "as")
|
|
394
|
+
? await previewPublishWithDerivedRemote(parsed)
|
|
395
|
+
: undefined;
|
|
396
|
+
if (preview) {
|
|
397
|
+
return emitResult("workbench.cli.publish.v1", {
|
|
398
|
+
remote: preview.remote,
|
|
399
|
+
version: versionSummary(preview.version),
|
|
400
|
+
visibility: preview.visibility,
|
|
401
|
+
installHandle: preview.installHandle,
|
|
402
|
+
installUrl: preview.installUrl,
|
|
403
|
+
pinnedInstallUrl: preview.pinnedInstallUrl,
|
|
404
|
+
dryRun: true,
|
|
405
|
+
}, parsed, io, () => [
|
|
406
|
+
`Would publish ${preview.version.id} to remote ${preview.remote.name}.`,
|
|
407
|
+
`Visibility: ${preview.visibility}`,
|
|
408
|
+
`Install: ${preview.installUrl}`,
|
|
409
|
+
`Pinned: ${preview.pinnedInstallUrl}`,
|
|
410
|
+
`next: workbench install ${preview.installHandle}`,
|
|
411
|
+
].join("\n"));
|
|
412
|
+
}
|
|
413
|
+
const remote = await ensurePublishRemote(parsed);
|
|
387
414
|
const result = await publishWorkbenchVersion({
|
|
388
415
|
...core,
|
|
389
416
|
version: optionalPositional(parsed, 1),
|
|
390
|
-
|
|
417
|
+
remote,
|
|
418
|
+
dryRun: parsed.flags["dry-run"] === true,
|
|
419
|
+
visibility: parsePublishVisibilityFlags(parsed),
|
|
391
420
|
});
|
|
392
|
-
return
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
421
|
+
return emitResult("workbench.cli.publish.v1", {
|
|
422
|
+
remote: result.remote,
|
|
423
|
+
version: versionSummary(result.version),
|
|
424
|
+
visibility: result.visibility,
|
|
425
|
+
installHandle: result.installHandle,
|
|
426
|
+
installUrl: result.installUrl,
|
|
427
|
+
pinnedInstallUrl: result.pinnedInstallUrl,
|
|
428
|
+
...(result.dryRun ? { dryRun: true } : {}),
|
|
429
|
+
}, parsed, io, () => [
|
|
430
|
+
`${result.dryRun ? "Would publish" : "Published"} ${result.version.id} to remote ${result.remote.name}.`,
|
|
431
|
+
`Visibility: ${result.visibility}`,
|
|
432
|
+
`Install: ${result.installUrl}`,
|
|
433
|
+
`Pinned: ${result.pinnedInstallUrl}`,
|
|
434
|
+
`next: workbench install ${result.installHandle}`,
|
|
435
|
+
].join("\n"));
|
|
396
436
|
}
|
|
397
437
|
if (command === "open") {
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
dir: dirFlag(parsed),
|
|
402
|
-
authToken: core.authToken,
|
|
403
|
-
host: stringFlag(parsed, "host"),
|
|
404
|
-
port: intFlag(parsed, "port"),
|
|
405
|
-
});
|
|
406
|
-
io.stdout.write(`Workbench: ${server.url}\n`);
|
|
407
|
-
if (parsed.flags["no-open"] !== true) {
|
|
408
|
-
await openBrowser(server.url).catch(() => undefined);
|
|
409
|
-
}
|
|
410
|
-
await new Promise(() => { });
|
|
438
|
+
if (parsed.flags.json === true) {
|
|
439
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
440
|
+
return output(snapshot, parsed, io, () => "Read-only Workbench inspection data is available with --json.");
|
|
411
441
|
}
|
|
412
|
-
|
|
442
|
+
// The browser server serves committed object state through a read-only
|
|
443
|
+
// snapshot path, so long-running commands do not block page loads.
|
|
444
|
+
const server = await startWorkbenchOpenServer({
|
|
445
|
+
dir: dirFlag(parsed),
|
|
446
|
+
authToken: core.authToken,
|
|
447
|
+
host: stringFlag(parsed, "host"),
|
|
448
|
+
port: intFlag(parsed, "port"),
|
|
449
|
+
});
|
|
450
|
+
io.stdout.write(`Workbench: ${server.url}\n`);
|
|
451
|
+
if (parsed.flags["no-open"] !== true) {
|
|
452
|
+
await openBrowser(server.url).catch(() => undefined);
|
|
453
|
+
}
|
|
454
|
+
return await new Promise(() => { });
|
|
413
455
|
}
|
|
414
456
|
throw new WorkbenchUserError(`Unknown command: ${command}\n\n${HELP}`);
|
|
415
457
|
}
|
|
416
458
|
catch (error) {
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
if (
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
}
|
|
439
|
-
if (kind === "traces") {
|
|
440
|
-
return output(snapshot.traces, parsed, io, () => snapshot.traces.map(formatTrace).join("\n") || "No traces.");
|
|
441
|
-
}
|
|
442
|
-
if (kind === "artifacts") {
|
|
443
|
-
return output(snapshot.artifacts, parsed, io, () => snapshot.artifacts.map(formatArtifact).join("\n") || "No artifacts.");
|
|
459
|
+
return emitError(error, parsed, io);
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
async function handleStatus(parsed, io) {
|
|
463
|
+
const status = await workbenchStatusSnapshot(await coreOptions(parsed));
|
|
464
|
+
const auth = await workbenchCliAuthStatus();
|
|
465
|
+
return emitResult("workbench.status.v1", {
|
|
466
|
+
project: status.project,
|
|
467
|
+
worktree: status.worktree,
|
|
468
|
+
runs: status.runs,
|
|
469
|
+
remotes: status.remotes,
|
|
470
|
+
auth: auth,
|
|
471
|
+
next: status.next,
|
|
472
|
+
}, parsed, io, () => formatStatusSnapshot({ ...status, auth }));
|
|
473
|
+
}
|
|
474
|
+
async function handleLog(parsed, io) {
|
|
475
|
+
if (parsed.flags.runs === true && parsed.flags.versions === true) {
|
|
476
|
+
throw new WorkbenchCodedError("usage", "workbench log accepts only one of --runs or --versions.", {
|
|
477
|
+
remediation: "Run workbench log --runs or workbench log --versions.",
|
|
478
|
+
exitCode: 2,
|
|
479
|
+
});
|
|
444
480
|
}
|
|
445
|
-
if (
|
|
446
|
-
|
|
481
|
+
if (parsed.positionals.length > 1) {
|
|
482
|
+
if (parsed.flags.runs === true) {
|
|
483
|
+
throw new WorkbenchUserError("--runs does not accept a value.");
|
|
484
|
+
}
|
|
485
|
+
if (parsed.flags.versions === true) {
|
|
486
|
+
throw new WorkbenchUserError("--versions does not accept a value.");
|
|
487
|
+
}
|
|
488
|
+
rejectExtraInput(parsed, {
|
|
489
|
+
maxPositionals: 1,
|
|
490
|
+
message: "workbench log does not accept refs or paths.",
|
|
491
|
+
remediation: "Run workbench log, workbench log --runs, or workbench log --versions.",
|
|
492
|
+
});
|
|
447
493
|
}
|
|
448
|
-
|
|
494
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(await coreOptions(parsed));
|
|
495
|
+
const includeRuns = parsed.flags.versions !== true;
|
|
496
|
+
const includeVersions = parsed.flags.runs !== true;
|
|
497
|
+
const entries = [
|
|
498
|
+
...(includeVersions ? snapshot.versions.map((version) => ({
|
|
499
|
+
kind: "version",
|
|
500
|
+
id: version.id,
|
|
501
|
+
createdAt: version.createdAt,
|
|
502
|
+
message: version.message,
|
|
503
|
+
fileCount: version.files.length,
|
|
504
|
+
})) : []),
|
|
505
|
+
...(includeRuns ? snapshot.runs.map((run) => ({
|
|
506
|
+
kind: "run",
|
|
507
|
+
id: run.id,
|
|
508
|
+
createdAt: run.createdAt,
|
|
509
|
+
status: run.status,
|
|
510
|
+
versionId: run.versionId,
|
|
511
|
+
skillName: run.skillName,
|
|
512
|
+
agentName: run.agentName,
|
|
513
|
+
...(run.score !== undefined ? { score: run.score } : {}),
|
|
514
|
+
})) : []),
|
|
515
|
+
].sort((left, right) => right.createdAt.localeCompare(left.createdAt));
|
|
516
|
+
return emitResult("workbench.cli.log.v1", {
|
|
517
|
+
entries: entries,
|
|
518
|
+
}, parsed, io, () => entries.map(formatLogEntry).join("\n") || "No history.");
|
|
519
|
+
}
|
|
520
|
+
async function handleShow(parsed, io) {
|
|
521
|
+
const ref = requiredPositional(parsed, 1, "workbench show requires REF.");
|
|
522
|
+
const session = await showLocalAgentSession(ref);
|
|
523
|
+
if (session) {
|
|
524
|
+
return output(session, parsed, io, () => formatSessionDetail(session));
|
|
525
|
+
}
|
|
526
|
+
const core = await coreOptions(parsed);
|
|
527
|
+
const [objectRef, requestedPath] = splitShowRef(ref);
|
|
528
|
+
if (requestedPath) {
|
|
529
|
+
const runOrJobFile = await fileForRunOrJobRef(core, objectRef, requestedPath);
|
|
530
|
+
if (runOrJobFile) {
|
|
531
|
+
return output(runOrJobFile, parsed, io, () => formatShow(runOrJobFile));
|
|
532
|
+
}
|
|
533
|
+
const value = await showWorkbenchRef(ref, core);
|
|
534
|
+
return output(value, parsed, io, () => formatShow(value));
|
|
535
|
+
}
|
|
536
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
537
|
+
const version = snapshot.versions.find((entry) => entry.id === objectRef);
|
|
538
|
+
if (version) {
|
|
539
|
+
return output(fileListing("version", version.id, version.files), parsed, io, () => formatFileListing("version", version.id, version.files));
|
|
540
|
+
}
|
|
541
|
+
const trace = snapshot.traces.find((entry) => entry.id === objectRef);
|
|
542
|
+
if (trace) {
|
|
543
|
+
return output(fileListing("trace", trace.id, trace.files), parsed, io, () => formatFileListing("trace", trace.id, trace.files));
|
|
544
|
+
}
|
|
545
|
+
const artifact = snapshot.artifacts.find((entry) => entry.id === objectRef);
|
|
546
|
+
if (artifact) {
|
|
547
|
+
return output(fileListing("artifact", artifact.id, artifact.files), parsed, io, () => formatFileListing("artifact", artifact.id, artifact.files));
|
|
548
|
+
}
|
|
549
|
+
const details = evidenceDetailsForRunOrJob(snapshot, objectRef);
|
|
550
|
+
if (details.length > 0) {
|
|
551
|
+
return output(details, parsed, io, () => details.map(formatTraceDetail).join("\n"));
|
|
552
|
+
}
|
|
553
|
+
const value = await showWorkbenchRef(ref, core);
|
|
554
|
+
return output(value, parsed, io, () => formatShow(value));
|
|
449
555
|
}
|
|
450
556
|
async function handleAgent(parsed, io) {
|
|
451
|
-
const subcommand = requiredPositional(parsed, 1, "workbench agent requires list|add|
|
|
557
|
+
const subcommand = requiredPositional(parsed, 1, "workbench agent requires list|add|rm.");
|
|
452
558
|
if (subcommand === "list") {
|
|
453
559
|
const agents = await listWorkbenchAgents(await coreOptions(parsed));
|
|
454
560
|
return output(agents, parsed, io, () => agents.map(formatAgent).join("\n") || "No agents.");
|
|
@@ -468,95 +574,43 @@ async function handleAgent(parsed, io) {
|
|
|
468
574
|
});
|
|
469
575
|
return output(agent, parsed, io, () => `Added agent ${formatAgent(agent)}.`);
|
|
470
576
|
}
|
|
471
|
-
if (subcommand === "
|
|
472
|
-
const
|
|
473
|
-
const agent = (await listWorkbenchAgents(await coreOptions(parsed))).find((entry) => entry.name === name);
|
|
474
|
-
if (!agent) {
|
|
475
|
-
throw new WorkbenchUserError(`Agent not found: ${name}`);
|
|
476
|
-
}
|
|
477
|
-
return output(agent, parsed, io, () => formatAgent(agent));
|
|
478
|
-
}
|
|
479
|
-
if (subcommand === "default") {
|
|
480
|
-
const agent = await setDefaultWorkbenchAgent(requiredPositional(parsed, 2, "workbench agent default requires NAME."), await coreOptions(parsed));
|
|
481
|
-
return output(agent, parsed, io, () => `Default agent: ${agent.name}`);
|
|
482
|
-
}
|
|
483
|
-
if (subcommand === "remove") {
|
|
484
|
-
const result = await removeWorkbenchAgent(requiredPositional(parsed, 2, "workbench agent remove requires NAME."), await coreOptions(parsed));
|
|
577
|
+
if (subcommand === "rm") {
|
|
578
|
+
const result = await removeWorkbenchAgent(requiredPositional(parsed, 2, "workbench agent rm requires NAME."), await coreOptions(parsed));
|
|
485
579
|
return output(result, parsed, io, () => `Removed agent ${result.removed}.`);
|
|
486
580
|
}
|
|
487
581
|
throw new WorkbenchUserError(`Unsupported agent command: ${subcommand}`);
|
|
488
582
|
}
|
|
489
|
-
async function handleSkills(parsed, io) {
|
|
490
|
-
const subcommand = requiredPositional(parsed, 1, "workbench skills requires list.");
|
|
491
|
-
if (subcommand !== "list") {
|
|
492
|
-
throw new WorkbenchUserError(`Unsupported skills command: ${subcommand}`);
|
|
493
|
-
}
|
|
494
|
-
const snapshot = await createWorkbenchInspectionSnapshot(await coreOptions(parsed));
|
|
495
|
-
return output(snapshot.skillSources, parsed, io, () => snapshot.skillSources.map((source) => {
|
|
496
|
-
const where = source.kind === "remote" ? `${source.from}${source.ref ? `#${source.ref}` : ""}` : source.path;
|
|
497
|
-
return `${source.name}\t${source.kind}\t${where}\tincludes=${source.includes?.length ?? 0}`;
|
|
498
|
-
}).join("\n") || "No skills.");
|
|
499
|
-
}
|
|
500
583
|
async function handleCase(parsed, io) {
|
|
501
|
-
const subcommand = requiredPositional(parsed, 1, "workbench case requires list|add|
|
|
584
|
+
const subcommand = requiredPositional(parsed, 1, "workbench case requires list|add|rm.");
|
|
502
585
|
if (subcommand === "list") {
|
|
503
586
|
const cases = await listWorkbenchCases(await coreOptions(parsed));
|
|
504
587
|
return output(cases, parsed, io, () => cases.map((entry) => `${entry.id}\t${entry.path}`).join("\n") || "No cases.");
|
|
505
588
|
}
|
|
506
589
|
if (subcommand === "add") {
|
|
507
|
-
const
|
|
590
|
+
const core = await coreOptions(parsed);
|
|
591
|
+
const sourceRef = optionalPositional(parsed, 2);
|
|
592
|
+
const record = await addWorkbenchCase({ ...core, fromTraceId: sourceRef ? await traceIdForCaseSource(core, sourceRef) : undefined });
|
|
508
593
|
return output(record, parsed, io, () => `Added case ${record.id}.`);
|
|
509
594
|
}
|
|
510
|
-
if (subcommand === "
|
|
511
|
-
const
|
|
512
|
-
return output(record, parsed, io, () => record.content);
|
|
513
|
-
}
|
|
514
|
-
if (subcommand === "remove") {
|
|
515
|
-
const result = await removeWorkbenchCase(requiredPositional(parsed, 2, "workbench case remove requires CASE_ID."), await coreOptions(parsed));
|
|
595
|
+
if (subcommand === "rm") {
|
|
596
|
+
const result = await removeWorkbenchCase(requiredPositional(parsed, 2, "workbench case rm requires CASE_ID."), await coreOptions(parsed));
|
|
516
597
|
return output(result, parsed, io, () => `Removed case ${result.removed}.`);
|
|
517
598
|
}
|
|
518
599
|
throw new WorkbenchUserError(`Unsupported case command: ${subcommand}`);
|
|
519
600
|
}
|
|
520
|
-
async function
|
|
521
|
-
const
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
const subcommand = optionalPositional(parsed, 1) ?? "status";
|
|
534
|
-
if (subcommand === "status") {
|
|
535
|
-
const targetRaw = optionalPositional(parsed, 2);
|
|
536
|
-
const profile = authProfileFlag(parsed);
|
|
537
|
-
const store = localWorkbenchAdapterAuthStore(adapterAuthStoreRoot());
|
|
538
|
-
if (targetRaw) {
|
|
539
|
-
const status = await store.status(parseAuthTarget(targetRaw, profile));
|
|
540
|
-
return output({ ok: true, command: "status", status }, parsed, io, () => formatAuthStatusRecord(status));
|
|
541
|
-
}
|
|
542
|
-
const statuses = await store.listStatus();
|
|
543
|
-
const required = await requiredAgentAuthStatuses(parsed, statuses);
|
|
544
|
-
return output({ ok: true, command: "status", adapterStatuses: statuses, required }, parsed, io, () => formatAuthStatusList(statuses, required));
|
|
545
|
-
}
|
|
546
|
-
if (subcommand === "connect") {
|
|
547
|
-
const targetRaw = requiredPositional(parsed, 2, "workbench auth connect requires ADAPTER[/SLOT].");
|
|
548
|
-
const target = parseAuthTarget(targetRaw, authProfileFlag(parsed));
|
|
549
|
-
const method = authMethod(parsed, target.adapterId);
|
|
550
|
-
const bundle = await collectAdapterAuthBundle({
|
|
551
|
-
target,
|
|
552
|
-
method,
|
|
553
|
-
profileRoot: path.resolve(stringFlag(parsed, "profile-root") ?? os.homedir()),
|
|
554
|
-
});
|
|
555
|
-
const saved = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).put(bundle);
|
|
556
|
-
const remote = await uploadAdapterConnection(saved, parsed);
|
|
557
|
-
return output({
|
|
558
|
-
ok: true,
|
|
559
|
-
command: "connect",
|
|
601
|
+
async function handleAdapterLogin(provider, parsed, io) {
|
|
602
|
+
const target = parseAuthTarget(provider, authProfileFlag(parsed));
|
|
603
|
+
const method = authMethod(parsed, target.adapterId);
|
|
604
|
+
const bundle = await collectAdapterAuthBundle({
|
|
605
|
+
target,
|
|
606
|
+
method,
|
|
607
|
+
profileRoot: path.resolve(stringFlag(parsed, "profile-root") ?? os.homedir()),
|
|
608
|
+
});
|
|
609
|
+
const saved = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).put(bundle);
|
|
610
|
+
const remote = await uploadAdapterConnection(saved, parsed);
|
|
611
|
+
return emitResult("workbench.cli.login.v1", {
|
|
612
|
+
provider: saved.adapterId,
|
|
613
|
+
localAdapter: {
|
|
560
614
|
adapter: saved.adapterId,
|
|
561
615
|
...(saved.slot ? { slot: saved.slot } : {}),
|
|
562
616
|
profile: saved.profile,
|
|
@@ -564,25 +618,24 @@ async function handleAuth(parsed, io) {
|
|
|
564
618
|
status: saved.status,
|
|
565
619
|
version: saved.version,
|
|
566
620
|
updatedAt: saved.updatedAt,
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
}
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
621
|
+
},
|
|
622
|
+
workbenchCloud: remote,
|
|
623
|
+
}, parsed, io, () => `Connected ${formatAuthTarget(saved)} ${saved.method} auth v${saved.version}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
|
|
624
|
+
}
|
|
625
|
+
async function handleAdapterLogout(provider, parsed, io) {
|
|
626
|
+
const target = parseAuthTarget(provider, authProfileFlag(parsed));
|
|
627
|
+
await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).disconnect(target);
|
|
628
|
+
const remote = await deleteAdapterConnectionRemote(target, parsed);
|
|
629
|
+
return emitResult("workbench.cli.logout.v1", {
|
|
630
|
+
provider: target.adapterId,
|
|
631
|
+
localAdapter: {
|
|
578
632
|
adapter: target.adapterId,
|
|
579
633
|
...(target.slot ? { slot: target.slot } : {}),
|
|
580
634
|
profile: target.profile,
|
|
581
635
|
status: "disconnected",
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
}
|
|
585
|
-
throw new WorkbenchUserError(`Unsupported auth command: ${subcommand}`);
|
|
636
|
+
},
|
|
637
|
+
workbenchCloud: remote,
|
|
638
|
+
}, parsed, io, () => `Disconnected ${formatAuthTarget(target)}; Workbench Cloud: ${remote.sync}${remote.reason ? ` (${remote.reason})` : ""}.`);
|
|
586
639
|
}
|
|
587
640
|
function getCliVersion() {
|
|
588
641
|
const manifest = require("../package.json");
|
|
@@ -592,19 +645,17 @@ function commandHelp(command) {
|
|
|
592
645
|
return COMMAND_HELP[command] ?? HELP;
|
|
593
646
|
}
|
|
594
647
|
function validateCommandFlags(parsed, command) {
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
}
|
|
598
|
-
const allowed = allowedFlagsForCommand(parsed, command);
|
|
648
|
+
const effectiveCommand = command ?? (parsed.flags.version === true ? "version" : "status");
|
|
649
|
+
const allowed = allowedFlagsForCommand(parsed, effectiveCommand);
|
|
599
650
|
if (!allowed) {
|
|
600
651
|
return;
|
|
601
652
|
}
|
|
602
|
-
const allowedSet = new Set(allowed);
|
|
653
|
+
const allowedSet = new Set(Object.keys(allowed));
|
|
603
654
|
for (const [name, value] of Object.entries(parsed.flags)) {
|
|
604
|
-
if (!allowedSet.has(name)
|
|
605
|
-
throw new WorkbenchUserError(`Unsupported flag --${name} for workbench ${
|
|
655
|
+
if (!allowedSet.has(name)) {
|
|
656
|
+
throw new WorkbenchUserError(`Unsupported flag --${name} for workbench ${effectiveCommand}.`);
|
|
606
657
|
}
|
|
607
|
-
validateFlagValue(name, value);
|
|
658
|
+
validateFlagValue(name, value, allowed[name]);
|
|
608
659
|
}
|
|
609
660
|
}
|
|
610
661
|
function allowedFlagsForCommand(parsed, command) {
|
|
@@ -613,10 +664,9 @@ function allowedFlagsForCommand(parsed, command) {
|
|
|
613
664
|
return COMMAND_FLAGS[command];
|
|
614
665
|
}
|
|
615
666
|
const subcommand = parsed.positionals[1] ?? subcommands.defaultSubcommand;
|
|
616
|
-
return subcommand ? subcommands.flags[subcommand] ??
|
|
667
|
+
return subcommand ? subcommands.flags[subcommand] ?? { ...COMMON_FLAGS, ...HELP_FLAG } : { ...COMMON_FLAGS, ...HELP_FLAG };
|
|
617
668
|
}
|
|
618
|
-
function validateFlagValue(name, value) {
|
|
619
|
-
const kind = FLAG_DEFINITIONS[name];
|
|
669
|
+
function validateFlagValue(name, value, kind) {
|
|
620
670
|
if (!kind) {
|
|
621
671
|
return;
|
|
622
672
|
}
|
|
@@ -643,55 +693,685 @@ function validateFlagValue(name, value) {
|
|
|
643
693
|
}
|
|
644
694
|
}
|
|
645
695
|
const CONFIG_SCHEMA = "workbench.cli.config.v1";
|
|
696
|
+
const DEFAULT_WORKBENCH_CLOUD_BASE_URL = "https://v2.workbench.ai";
|
|
646
697
|
const API_REQUEST_MAX_ATTEMPTS = 3;
|
|
647
698
|
const API_REQUEST_GZIP_THRESHOLD_BYTES = 1024 * 1024;
|
|
699
|
+
const CLOUD_RUN_TIMEOUT_MS = 30 * 60 * 1000;
|
|
700
|
+
const CLOUD_RUN_POLL_INTERVAL_MS = 3000;
|
|
648
701
|
async function handleLogin(parsed, io) {
|
|
649
|
-
|
|
650
|
-
|
|
702
|
+
const provider = optionalPositional(parsed, 1);
|
|
703
|
+
if (provider) {
|
|
704
|
+
if (parsed.positionals.length > 2) {
|
|
705
|
+
throw new WorkbenchUserError("workbench login PROVIDER accepts only one provider argument.");
|
|
706
|
+
}
|
|
707
|
+
if (parsed.flags["start-only"] === true || parsed.flags.wait === true || parsed.flags.timeout !== undefined || parsed.flags["no-open"] === true) {
|
|
708
|
+
throw new WorkbenchCodedError("usage", "Workbench Cloud login flags do not apply to provider login.", {
|
|
709
|
+
remediation: `Run workbench login ${provider} --method ${authMethod(parsed, provider)}.`,
|
|
710
|
+
exitCode: 2,
|
|
711
|
+
});
|
|
712
|
+
}
|
|
713
|
+
return await handleAdapterLogin(provider, parsed, io);
|
|
714
|
+
}
|
|
715
|
+
if (parsed.flags["start-only"] === true && parsed.flags.wait === true) {
|
|
716
|
+
throw new WorkbenchCodedError("usage", "workbench login accepts only one of --start-only or --wait.", {
|
|
717
|
+
remediation: "Run workbench login --start-only or workbench login --wait --timeout 120.",
|
|
718
|
+
exitCode: 2,
|
|
719
|
+
});
|
|
720
|
+
}
|
|
721
|
+
const startOnly = parsed.flags["start-only"] === true;
|
|
722
|
+
const waitOnly = parsed.flags.wait === true;
|
|
723
|
+
const timeoutSeconds = intFlag(parsed, "timeout");
|
|
724
|
+
if (startOnly && timeoutSeconds !== undefined) {
|
|
725
|
+
throw new WorkbenchCodedError("usage", "workbench login --timeout only applies with --wait.", {
|
|
726
|
+
remediation: "Run workbench login --start-only, then workbench login --wait --timeout 120.",
|
|
727
|
+
exitCode: 2,
|
|
728
|
+
});
|
|
729
|
+
}
|
|
730
|
+
if (waitOnly && timeoutSeconds === undefined) {
|
|
731
|
+
throw new WorkbenchCodedError("usage", "workbench login --wait requires --timeout N.", {
|
|
732
|
+
remediation: "Run workbench login --wait --timeout 120.",
|
|
733
|
+
exitCode: 2,
|
|
734
|
+
});
|
|
651
735
|
}
|
|
652
736
|
const config = await loadConfig();
|
|
653
737
|
const baseUrl = selectWorkbenchBaseUrl({
|
|
654
738
|
explicitBaseUrl: stringFlag(parsed, "base-url"),
|
|
655
739
|
configBaseUrl: config.baseUrl,
|
|
656
740
|
});
|
|
657
|
-
const
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
741
|
+
const pending = waitOnly ? await readPendingDeviceAuthorization(baseUrl) : null;
|
|
742
|
+
const record = pending ?? await startDeviceAuthorization(baseUrl);
|
|
743
|
+
const freshAuthorization = pending === null;
|
|
744
|
+
if (startOnly) {
|
|
745
|
+
await writePendingDeviceAuthorization(record);
|
|
746
|
+
if (parsed.flags["no-open"] !== true) {
|
|
747
|
+
await openBrowser(record.verification_uri_complete).catch(() => undefined);
|
|
748
|
+
}
|
|
749
|
+
return emitResult("workbench.cli.login.v1", {
|
|
750
|
+
status: "authorization_pending",
|
|
751
|
+
baseUrl,
|
|
752
|
+
verificationUri: record.verification_uri,
|
|
753
|
+
verificationUriComplete: record.verification_uri_complete,
|
|
754
|
+
userCode: record.user_code,
|
|
755
|
+
expiresAt: record.expiresAt,
|
|
756
|
+
resume: "workbench login --wait --timeout 120",
|
|
757
|
+
}, parsed, io, () => `Open ${record.verification_uri_complete}\nCode: ${record.user_code}\nResume: workbench login --wait --timeout 120`);
|
|
758
|
+
}
|
|
759
|
+
await writePendingDeviceAuthorization(record);
|
|
760
|
+
if (freshAuthorization && !parsed.flags.json) {
|
|
761
|
+
io.stdout.write(`Open ${record.verification_uri_complete}\nCode: ${record.user_code}\n`);
|
|
762
|
+
}
|
|
763
|
+
if (!waitOnly && parsed.flags["no-open"] !== true) {
|
|
764
|
+
await openBrowser(record.verification_uri_complete).catch(() => undefined);
|
|
765
|
+
}
|
|
766
|
+
let token;
|
|
767
|
+
try {
|
|
768
|
+
token = await pollDeviceToken(baseUrl, record, timeoutSeconds);
|
|
671
769
|
}
|
|
672
|
-
|
|
673
|
-
|
|
770
|
+
catch (error) {
|
|
771
|
+
const denied = error instanceof WorkbenchCodedError && error.code === "login_denied";
|
|
772
|
+
const expired = Date.parse(record.expiresAt) <= Date.now();
|
|
773
|
+
if (denied || expired) {
|
|
774
|
+
await clearPendingDeviceAuthorization();
|
|
775
|
+
}
|
|
776
|
+
throw error;
|
|
674
777
|
}
|
|
675
|
-
|
|
778
|
+
const username = await fetchWorkbenchUsername(baseUrl, token.access_token).catch(() => undefined);
|
|
779
|
+
await writeConfig({
|
|
780
|
+
schema: CONFIG_SCHEMA,
|
|
781
|
+
baseUrl,
|
|
782
|
+
accessToken: token.access_token,
|
|
783
|
+
...(username ? { username } : {}),
|
|
784
|
+
});
|
|
785
|
+
await clearPendingDeviceAuthorization();
|
|
786
|
+
return emitResult("workbench.cli.login.v1", {
|
|
787
|
+
status: "authenticated",
|
|
788
|
+
baseUrl,
|
|
789
|
+
...(username ? { username } : {}),
|
|
790
|
+
...(token.expires_in !== undefined ? { expiresIn: token.expires_in } : {}),
|
|
791
|
+
}, parsed, io, () => `Workbench Cloud: authenticated${username ? ` as ${username}` : ""}\nWorkbench API: ${baseUrl}`);
|
|
676
792
|
}
|
|
677
793
|
async function handleLogout(parsed, io) {
|
|
678
|
-
|
|
679
|
-
|
|
794
|
+
const provider = optionalPositional(parsed, 1);
|
|
795
|
+
if (provider) {
|
|
796
|
+
if (parsed.positionals.length > 2) {
|
|
797
|
+
throw new WorkbenchUserError("workbench logout PROVIDER accepts only one provider argument.");
|
|
798
|
+
}
|
|
799
|
+
return await handleAdapterLogout(provider, parsed, io);
|
|
680
800
|
}
|
|
681
801
|
const config = await loadConfig();
|
|
682
802
|
const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
|
|
683
|
-
|
|
803
|
+
const tokenPresent = Boolean(config.accessToken);
|
|
804
|
+
if (tokenPresent && !baseUrl) {
|
|
684
805
|
throw new WorkbenchUserError("Missing Workbench API URL. Set WORKBENCH_API_URL or run `workbench login --base-url URL`.");
|
|
685
806
|
}
|
|
807
|
+
let revoke = "skipped";
|
|
686
808
|
if (config.accessToken && baseUrl) {
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
809
|
+
try {
|
|
810
|
+
const response = await fetch(`${baseUrl}/api/oauth/revoke`, {
|
|
811
|
+
method: "POST",
|
|
812
|
+
headers: { "content-type": "application/json" },
|
|
813
|
+
body: JSON.stringify({ token: config.accessToken }),
|
|
814
|
+
});
|
|
815
|
+
revoke = response.ok ? "revoked" : "failed";
|
|
816
|
+
}
|
|
817
|
+
catch {
|
|
818
|
+
revoke = "failed";
|
|
819
|
+
}
|
|
820
|
+
}
|
|
821
|
+
const configRemoved = tokenPresent;
|
|
822
|
+
if (tokenPresent) {
|
|
823
|
+
await writeConfig({ schema: CONFIG_SCHEMA, ...(baseUrl ? { baseUrl } : {}) });
|
|
824
|
+
}
|
|
825
|
+
const adapterStatuses = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).listStatus().catch(() => []);
|
|
826
|
+
const adapterAuthRetained = adapterStatuses.length > 0;
|
|
827
|
+
return emitResult("workbench.cli.logout.v1", {
|
|
828
|
+
...(baseUrl ? { baseUrl } : {}),
|
|
829
|
+
tokenPresent,
|
|
830
|
+
revoke,
|
|
831
|
+
configRemoved,
|
|
832
|
+
adapterAuthRetained,
|
|
833
|
+
}, parsed, io, () => [
|
|
834
|
+
`Logged out of Workbench${baseUrl ? ` (${baseUrl})` : ""}.`,
|
|
835
|
+
`Token: ${tokenPresent ? "present" : "absent"}; revoke ${revoke}; config ${configRemoved ? "removed" : "unchanged"}.`,
|
|
836
|
+
adapterAuthRetained
|
|
837
|
+
? "Local adapter auth records were retained; run workbench logout PROVIDER to remove them."
|
|
838
|
+
: "No local adapter auth records remain.",
|
|
839
|
+
].join("\n"));
|
|
840
|
+
}
|
|
841
|
+
async function handleInstall(parsed, io) {
|
|
842
|
+
const sourceInput = requiredPositional(parsed, 1, "workbench install requires HANDLE_OR_URL.");
|
|
843
|
+
rejectExtraInput(parsed, {
|
|
844
|
+
maxPositionals: 2,
|
|
845
|
+
message: "workbench install accepts one HANDLE_OR_URL argument.",
|
|
846
|
+
remediation: "Run workbench install OWNER/SKILL --to codex.",
|
|
847
|
+
});
|
|
848
|
+
const source = await resolveWorkbenchInstallSourceInput(sourceInput);
|
|
849
|
+
const workbenchSource = parseWorkbenchInstallSource(source);
|
|
850
|
+
if (!workbenchSource) {
|
|
851
|
+
throw new WorkbenchCodedError("usage", "workbench install requires a Workbench Cloud source URL.", {
|
|
852
|
+
remediation: "Run workbench install OWNER/SKILL --to codex.",
|
|
853
|
+
exitCode: 2,
|
|
854
|
+
});
|
|
855
|
+
}
|
|
856
|
+
const snapshot = await fetchWorkbenchInstallSourceSnapshot(workbenchSource, source);
|
|
857
|
+
const sourceSummary = workbenchInstallSourceSummary(workbenchSource, snapshot);
|
|
858
|
+
const config = await loadConfig();
|
|
859
|
+
if (parsed.flags.list === true) {
|
|
860
|
+
return emitResult("workbench.cli.install.v1", {
|
|
861
|
+
source: sourceSummary,
|
|
862
|
+
skills: [snapshot.name],
|
|
863
|
+
fileCount: snapshot.files.length,
|
|
864
|
+
targets: installTargetsToJson(supportedInstallTargets()),
|
|
865
|
+
}, parsed, io, () => [
|
|
866
|
+
`${snapshot.name}\t${snapshot.versionId}\tfiles=${snapshot.files.length}`,
|
|
867
|
+
"Targets:",
|
|
868
|
+
...supportedInstallTargets().map((target) => ` ${target.agent}\t${target.destination}`),
|
|
869
|
+
].join("\n"));
|
|
870
|
+
}
|
|
871
|
+
const toTargets = stringsFlag(parsed, "to");
|
|
872
|
+
const selectedTargets = toTargets.length > 0 ? normalizeInstallTargetNames(toTargets) : await defaultInstallTargetNames(config);
|
|
873
|
+
const targets = resolveInstallTargets({
|
|
874
|
+
agents: selectedTargets.filter((target) => target !== "local"),
|
|
875
|
+
local: selectedTargets.some((target) => target === "local"),
|
|
876
|
+
skillName: snapshot.name,
|
|
877
|
+
});
|
|
878
|
+
const result = await installSnapshotToTargets({
|
|
879
|
+
snapshot,
|
|
880
|
+
targets,
|
|
881
|
+
overwrite: parsed.flags.yes === true,
|
|
882
|
+
dryRun: parsed.flags["dry-run"] === true,
|
|
883
|
+
});
|
|
884
|
+
if (toTargets.length > 0 && parsed.flags["dry-run"] !== true) {
|
|
885
|
+
await writeConfig({ ...config, installTargets: selectedTargets });
|
|
886
|
+
}
|
|
887
|
+
return emitResult("workbench.cli.install.v1", {
|
|
888
|
+
source: sourceSummary,
|
|
889
|
+
result: result.result,
|
|
890
|
+
targets: result.targets,
|
|
891
|
+
filesCopied: result.filesCopied,
|
|
892
|
+
...(parsed.flags["dry-run"] === true ? { dryRun: true } : {}),
|
|
893
|
+
}, parsed, io, () => [
|
|
894
|
+
parsed.flags["dry-run"] === true
|
|
895
|
+
? `Would install ${snapshot.name}: filesCopied=${result.filesCopied}`
|
|
896
|
+
: `Installed ${snapshot.name}: ${result.result}`,
|
|
897
|
+
...result.targets.map((target) => ` ${target.agent}\t${target.previous}\t${target.destination}`),
|
|
898
|
+
].join("\n"));
|
|
899
|
+
}
|
|
900
|
+
async function handleCloudEval(parsed, io) {
|
|
901
|
+
const started = await startCloudExecution("eval", parsed);
|
|
902
|
+
const artifactIds = await artifactIdsByRunId(started.core, started.runs);
|
|
903
|
+
const failedRuns = started.runs.filter((run) => run.status === "failed" || run.status === "canceled");
|
|
904
|
+
if (failedRuns.length > 0) {
|
|
905
|
+
return emitEvalFailure(started.runs, failedRuns, artifactIds, parsed, io);
|
|
906
|
+
}
|
|
907
|
+
const deltas = await evalDeltas(started.core, started.runs);
|
|
908
|
+
const nextCommands = cloudEvalNextCommands(started.runs);
|
|
909
|
+
return emitResult("workbench.cli.eval.v1", {
|
|
910
|
+
result: started.runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
|
|
911
|
+
deltas: deltas,
|
|
912
|
+
nextCommands: nextCommands,
|
|
913
|
+
cloud: cloudExecutionSummary(started),
|
|
914
|
+
}, parsed, io, () => [
|
|
915
|
+
`Completed hosted eval on ${started.remote.url}.`,
|
|
916
|
+
started.runs.map(formatRun).join("\n"),
|
|
917
|
+
...deltas.map(formatEvalDelta),
|
|
918
|
+
...(nextCommands[0] ? [`next: ${nextCommands[0]}`] : []),
|
|
919
|
+
].filter(Boolean).join("\n"));
|
|
920
|
+
}
|
|
921
|
+
async function handleCloudImprove(parsed, io) {
|
|
922
|
+
const started = await startCloudExecution("improve", parsed);
|
|
923
|
+
const artifactIds = await artifactIdsByRunId(started.core, started.runs);
|
|
924
|
+
const failedRuns = started.runs.filter((run) => run.status === "failed" || run.status === "canceled");
|
|
925
|
+
if (failedRuns.length > 0) {
|
|
926
|
+
const first = failedRuns[0];
|
|
927
|
+
throw new WorkbenchCodedError("improve_failed", "Hosted improve failed; evidence was saved.", {
|
|
928
|
+
remediation: `Run workbench show ${first.id}.`,
|
|
929
|
+
subject: {
|
|
930
|
+
runIds: failedRuns.map((run) => run.id),
|
|
931
|
+
statuses: Object.fromEntries(failedRuns.map((run) => [run.id, run.status])),
|
|
932
|
+
},
|
|
933
|
+
exitCode: 1,
|
|
934
|
+
});
|
|
692
935
|
}
|
|
693
|
-
|
|
694
|
-
|
|
936
|
+
const switchedVersionId = await switchHostedImproveVersionIfPromoted(started);
|
|
937
|
+
const nextCommands = cloudImproveNextCommands(started.runs);
|
|
938
|
+
return emitResult("workbench.cli.improve.v1", {
|
|
939
|
+
result: started.runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? [])),
|
|
940
|
+
nextCommands: nextCommands,
|
|
941
|
+
cloud: cloudExecutionSummary(started),
|
|
942
|
+
...(switchedVersionId ? { switchedVersionId } : {}),
|
|
943
|
+
}, parsed, io, () => [
|
|
944
|
+
`Completed hosted improve on ${started.remote.url}.`,
|
|
945
|
+
started.runs.map(formatRun).join("\n"),
|
|
946
|
+
...(switchedVersionId ? [`Switched local source to ${switchedVersionId}.`] : []),
|
|
947
|
+
...(nextCommands[0] ? [`next: ${nextCommands[0]}`] : []),
|
|
948
|
+
].filter(Boolean).join("\n"));
|
|
949
|
+
}
|
|
950
|
+
async function defaultInstallTargetNames(config) {
|
|
951
|
+
if (config.installTargets && config.installTargets.length > 0) {
|
|
952
|
+
return config.installTargets;
|
|
953
|
+
}
|
|
954
|
+
const detected = [];
|
|
955
|
+
for (const target of supportedInstallTargets()) {
|
|
956
|
+
if (target.agent === "local") {
|
|
957
|
+
continue;
|
|
958
|
+
}
|
|
959
|
+
const home = path.dirname(path.dirname(target.destination));
|
|
960
|
+
if (await pathExists(home)) {
|
|
961
|
+
detected.push(target.agent);
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
return detected.length > 0 ? detected : ["local"];
|
|
965
|
+
}
|
|
966
|
+
function normalizeInstallTargetNames(values) {
|
|
967
|
+
const normalized = [];
|
|
968
|
+
for (const value of values) {
|
|
969
|
+
const target = value.trim().toLowerCase();
|
|
970
|
+
if (target !== "codex" && target !== "claude" && target !== "local") {
|
|
971
|
+
throw new WorkbenchCodedError("usage", `Unsupported install target: ${value}`, {
|
|
972
|
+
remediation: "Use --to codex, --to claude, or --to local.",
|
|
973
|
+
exitCode: 2,
|
|
974
|
+
});
|
|
975
|
+
}
|
|
976
|
+
normalized.push(target);
|
|
977
|
+
}
|
|
978
|
+
return [...new Set(normalized)];
|
|
979
|
+
}
|
|
980
|
+
async function pathExists(filePath) {
|
|
981
|
+
try {
|
|
982
|
+
await fs.access(filePath);
|
|
983
|
+
return true;
|
|
984
|
+
}
|
|
985
|
+
catch {
|
|
986
|
+
return false;
|
|
987
|
+
}
|
|
988
|
+
}
|
|
989
|
+
async function startCloudExecution(command, parsed) {
|
|
990
|
+
const root = dirFlag(parsed) ?? process.cwd();
|
|
991
|
+
const remote = await ensureCloudRemoteForExecution(root, parsed);
|
|
992
|
+
const source = parseWorkbenchInstallSource(remote.url);
|
|
993
|
+
if (!source) {
|
|
994
|
+
throw new WorkbenchCodedError("remote_invalid_url", `Workbench remote is not a Cloud skill URL: ${remote.url}`, {
|
|
995
|
+
remediation: "Run workbench publish to recreate the Workbench Cloud link.",
|
|
996
|
+
subject: { remote: remote.name, url: remote.url },
|
|
997
|
+
exitCode: 2,
|
|
998
|
+
});
|
|
999
|
+
}
|
|
1000
|
+
const token = await workbenchCloudToken({ baseUrl: source.baseUrl });
|
|
1001
|
+
if (!token) {
|
|
1002
|
+
throw new WorkbenchCodedError("auth_required", `workbench ${command} --cloud requires Workbench Cloud auth.`, {
|
|
1003
|
+
remediation: `Run workbench login --base-url ${source.baseUrl}.`,
|
|
1004
|
+
exitCode: 1,
|
|
1005
|
+
});
|
|
1006
|
+
}
|
|
1007
|
+
const core = { dir: root, authToken: token };
|
|
1008
|
+
const syncBefore = await syncWorkbenchRemote({ ...core, remote: remote.name });
|
|
1009
|
+
const startSnapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
1010
|
+
const skillId = await resolveCloudSkillId(source);
|
|
1011
|
+
const response = await apiRequest(`/api/workbench/skills/${encodeURIComponent(skillId)}${command === "improve" ? "/improve" : "/runs"}`, { method: "POST", body: cloudExecutionRequestBody(command, parsed) }, source.baseUrl);
|
|
1012
|
+
const runs = response.runs ?? [];
|
|
1013
|
+
if (runs.length === 0) {
|
|
1014
|
+
throw new WorkbenchCodedError("cloud_run_missing", `Workbench Cloud did not return a run for ${command}.`, {
|
|
1015
|
+
retryable: true,
|
|
1016
|
+
remediation: "Run workbench log --runs.",
|
|
1017
|
+
subject: { remote: remote.name, skillId },
|
|
1018
|
+
exitCode: 1,
|
|
1019
|
+
});
|
|
1020
|
+
}
|
|
1021
|
+
const initialSyncAfter = await syncWorkbenchRemote({ ...core, remote: remote.name });
|
|
1022
|
+
const completed = await waitForCloudRuns({
|
|
1023
|
+
core,
|
|
1024
|
+
remote,
|
|
1025
|
+
runs,
|
|
1026
|
+
initialSync: initialSyncAfter,
|
|
1027
|
+
});
|
|
1028
|
+
return {
|
|
1029
|
+
core,
|
|
1030
|
+
remote,
|
|
1031
|
+
skillId,
|
|
1032
|
+
runs: completed.runs,
|
|
1033
|
+
startVersionId: startSnapshot.status.currentVersionId ?? startSnapshot.refs.current,
|
|
1034
|
+
source,
|
|
1035
|
+
sync: {
|
|
1036
|
+
before: { pushed: syncBefore.pushed, pulled: syncBefore.pulled, upToDate: syncBefore.upToDate },
|
|
1037
|
+
after: { pushed: completed.sync.pushed, pulled: completed.sync.pulled, upToDate: completed.sync.upToDate },
|
|
1038
|
+
},
|
|
1039
|
+
};
|
|
1040
|
+
}
|
|
1041
|
+
async function waitForCloudRuns(input) {
|
|
1042
|
+
const runIds = input.runs
|
|
1043
|
+
.map((run) => run.id)
|
|
1044
|
+
.filter((id) => typeof id === "string" && id.length > 0);
|
|
1045
|
+
if (runIds.length === 0 || runIds.length !== input.runs.length) {
|
|
1046
|
+
throw new WorkbenchCodedError("cloud_run_missing", "Workbench Cloud did not return a run id.", {
|
|
1047
|
+
retryable: true,
|
|
1048
|
+
remediation: "Run workbench log --runs.",
|
|
1049
|
+
exitCode: 1,
|
|
1050
|
+
});
|
|
1051
|
+
}
|
|
1052
|
+
let sync = input.initialSync;
|
|
1053
|
+
const timeoutMs = positiveIntEnv("WORKBENCH_CLOUD_RUN_TIMEOUT_MS") ?? CLOUD_RUN_TIMEOUT_MS;
|
|
1054
|
+
const pollIntervalMs = positiveIntEnv("WORKBENCH_CLOUD_RUN_POLL_INTERVAL_MS") ?? CLOUD_RUN_POLL_INTERVAL_MS;
|
|
1055
|
+
const deadline = Date.now() + timeoutMs;
|
|
1056
|
+
while (true) {
|
|
1057
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(input.core);
|
|
1058
|
+
const runs = runIds
|
|
1059
|
+
.map((id) => snapshot.runs.find((entry) => entry.id === id))
|
|
1060
|
+
.filter((run) => Boolean(run));
|
|
1061
|
+
if (runs.length === runIds.length && runs.every(isTerminalRun)) {
|
|
1062
|
+
return { runs, sync };
|
|
1063
|
+
}
|
|
1064
|
+
if (Date.now() >= deadline) {
|
|
1065
|
+
throw new WorkbenchCodedError("cloud_run_pending", "Hosted Workbench run is still running.", {
|
|
1066
|
+
retryable: true,
|
|
1067
|
+
remediation: runIds[0] ? `Run workbench show ${runIds[0]}.` : "Run workbench log --runs.",
|
|
1068
|
+
subject: {
|
|
1069
|
+
runIds,
|
|
1070
|
+
statuses: Object.fromEntries(runs.map((run) => [run.id, run.status])),
|
|
1071
|
+
},
|
|
1072
|
+
exitCode: 1,
|
|
1073
|
+
});
|
|
1074
|
+
}
|
|
1075
|
+
await sleep(pollIntervalMs);
|
|
1076
|
+
sync = await syncWorkbenchRemote({ ...input.core, remote: input.remote.name });
|
|
1077
|
+
}
|
|
1078
|
+
}
|
|
1079
|
+
function isTerminalRun(run) {
|
|
1080
|
+
return run.status === "succeeded" || run.status === "failed" || run.status === "canceled";
|
|
1081
|
+
}
|
|
1082
|
+
async function switchHostedImproveVersionIfPromoted(started) {
|
|
1083
|
+
const outputVersionId = started.runs.find((run) => run.status === "succeeded" && run.outputVersionId)?.outputVersionId;
|
|
1084
|
+
if (!outputVersionId) {
|
|
1085
|
+
return undefined;
|
|
1086
|
+
}
|
|
1087
|
+
const refs = await fetchCloudObjectRefs(started);
|
|
1088
|
+
if (refs.current !== outputVersionId) {
|
|
1089
|
+
return undefined;
|
|
1090
|
+
}
|
|
1091
|
+
await listWorkbenchVersions(started.core);
|
|
1092
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(started.core);
|
|
1093
|
+
const currentVersionId = snapshot.status.currentVersionId ?? snapshot.refs.current;
|
|
1094
|
+
if (started.startVersionId && currentVersionId && currentVersionId !== started.startVersionId) {
|
|
1095
|
+
throw new WorkbenchCodedError("worktree_changed", "Local source changed while hosted improve was running; refusing to overwrite it.", {
|
|
1096
|
+
remediation: `Review workbench diff, then run workbench switch ${outputVersionId} when ready.`,
|
|
1097
|
+
subject: {
|
|
1098
|
+
startedFrom: started.startVersionId,
|
|
1099
|
+
current: currentVersionId,
|
|
1100
|
+
hostedVersion: outputVersionId,
|
|
1101
|
+
},
|
|
1102
|
+
exitCode: 1,
|
|
1103
|
+
});
|
|
1104
|
+
}
|
|
1105
|
+
const version = await switchWorkbenchVersion(outputVersionId, started.core);
|
|
1106
|
+
return version.id;
|
|
1107
|
+
}
|
|
1108
|
+
async function fetchCloudObjectRefs(started) {
|
|
1109
|
+
const response = await apiRequest(`/api/workbench/skills/${encodeURIComponent(started.skillId)}/objects`, {}, started.source.baseUrl);
|
|
1110
|
+
return response.objectPack?.refs ?? {};
|
|
1111
|
+
}
|
|
1112
|
+
async function ensureCloudRemoteForExecution(root, parsed) {
|
|
1113
|
+
const linked = await linkedCloudRemote(root);
|
|
1114
|
+
if (linked) {
|
|
1115
|
+
return linked;
|
|
1116
|
+
}
|
|
1117
|
+
const link = await cloudRemoteLinkTarget(root);
|
|
1118
|
+
const remote = await derivePublishCloudRemote(parsed, "workbench --cloud", link.name);
|
|
1119
|
+
const source = parseWorkbenchInstallSource(remote.url);
|
|
1120
|
+
if (!source) {
|
|
1121
|
+
throw new WorkbenchCodedError("remote_invalid_url", `Workbench remote is not a Cloud skill URL: ${remote.url}`, {
|
|
1122
|
+
remediation: "Run workbench publish to recreate the Workbench Cloud link.",
|
|
1123
|
+
subject: { remote: remote.name, url: remote.url },
|
|
1124
|
+
exitCode: 2,
|
|
1125
|
+
});
|
|
1126
|
+
}
|
|
1127
|
+
const token = await workbenchCloudToken({ baseUrl: source.baseUrl });
|
|
1128
|
+
if (!token) {
|
|
1129
|
+
throw new WorkbenchCodedError("auth_required", "workbench --cloud requires Workbench Cloud auth.", {
|
|
1130
|
+
remediation: `Run workbench login --base-url ${source.baseUrl}.`,
|
|
1131
|
+
exitCode: 1,
|
|
1132
|
+
});
|
|
1133
|
+
}
|
|
1134
|
+
const result = await addWorkbenchRemote(remote.name, remote.url, {
|
|
1135
|
+
dir: root,
|
|
1136
|
+
authToken: token,
|
|
1137
|
+
replace: link.replace,
|
|
1138
|
+
});
|
|
1139
|
+
return result.remote;
|
|
1140
|
+
}
|
|
1141
|
+
async function linkedCloudRemote(root) {
|
|
1142
|
+
return preferredCloudRemote(await inspectionRemotes(root)) ?? null;
|
|
1143
|
+
}
|
|
1144
|
+
async function inspectionRemotes(root) {
|
|
1145
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot({ dir: root }).catch((error) => {
|
|
1146
|
+
if (error instanceof WorkbenchCodedError || error instanceof WorkbenchUserError) {
|
|
1147
|
+
return null;
|
|
1148
|
+
}
|
|
1149
|
+
throw error;
|
|
1150
|
+
});
|
|
1151
|
+
return snapshot?.remotes ?? [];
|
|
1152
|
+
}
|
|
1153
|
+
async function cloudRemoteLinkTarget(root) {
|
|
1154
|
+
return cloudRemoteLinkTargetFromRemotes(await inspectionRemotes(root));
|
|
1155
|
+
}
|
|
1156
|
+
function cloudRemoteLinkTargetFromRemotes(remotes) {
|
|
1157
|
+
const existing = preferredCloudRemote(remotes);
|
|
1158
|
+
if (existing) {
|
|
1159
|
+
return { name: existing.name, replace: true, existing };
|
|
1160
|
+
}
|
|
1161
|
+
return { name: availableCloudRemoteName(remotes), replace: false };
|
|
1162
|
+
}
|
|
1163
|
+
function preferredCloudRemote(remotes) {
|
|
1164
|
+
const cloudRemotes = remotes.filter((remote) => remote.kind === "workbench-cloud");
|
|
1165
|
+
return cloudRemotes.find((remote) => remote.name === "cloud") ?? cloudRemotes[0];
|
|
1166
|
+
}
|
|
1167
|
+
function availableCloudRemoteName(remotes) {
|
|
1168
|
+
const names = new Set(remotes.map((remote) => remote.name));
|
|
1169
|
+
if (!names.has("cloud")) {
|
|
1170
|
+
return "cloud";
|
|
1171
|
+
}
|
|
1172
|
+
for (let index = 1;; index += 1) {
|
|
1173
|
+
const name = `cloud-${index}`;
|
|
1174
|
+
if (!names.has(name)) {
|
|
1175
|
+
return name;
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
}
|
|
1179
|
+
async function resolveCloudSkillId(source) {
|
|
1180
|
+
const listed = await apiRequest("/api/workbench/skills", {}, source.baseUrl);
|
|
1181
|
+
const skill = listed.skills?.find((entry) => entry.ownerSlug === source.owner && entry.name === source.skill);
|
|
1182
|
+
if (!skill?.id) {
|
|
1183
|
+
throw new WorkbenchCodedError("remote_not_found", `Workbench Cloud skill not found: ${source.owner}/${source.skill}`, {
|
|
1184
|
+
remediation: "Run workbench publish.",
|
|
1185
|
+
subject: { owner: source.owner, skill: source.skill },
|
|
1186
|
+
exitCode: 1,
|
|
1187
|
+
});
|
|
1188
|
+
}
|
|
1189
|
+
return skill.id;
|
|
1190
|
+
}
|
|
1191
|
+
function cloudExecutionRequestBody(command, parsed) {
|
|
1192
|
+
return {
|
|
1193
|
+
version: optionalPositional(parsed, 1),
|
|
1194
|
+
skill: stringFlag(parsed, "skills"),
|
|
1195
|
+
agent: stringFlag(parsed, "agents"),
|
|
1196
|
+
samples: intFlag(parsed, "samples"),
|
|
1197
|
+
...(command === "improve" ? { budget: intFlag(parsed, "budget") } : {}),
|
|
1198
|
+
};
|
|
1199
|
+
}
|
|
1200
|
+
function cloudEvalNextCommands(runs) {
|
|
1201
|
+
return cloudExecutionNextCommands(runs, "workbench publish");
|
|
1202
|
+
}
|
|
1203
|
+
function cloudImproveNextCommands(runs) {
|
|
1204
|
+
return cloudExecutionNextCommands(runs, "workbench eval");
|
|
1205
|
+
}
|
|
1206
|
+
function cloudExecutionNextCommands(runs, successCommand) {
|
|
1207
|
+
const first = runs[0];
|
|
1208
|
+
if (!first) {
|
|
1209
|
+
return ["workbench log --runs"];
|
|
1210
|
+
}
|
|
1211
|
+
if (first.status === "running" || first.status === "failed" || first.status === "canceled") {
|
|
1212
|
+
return [`workbench show ${first.id}`];
|
|
1213
|
+
}
|
|
1214
|
+
return [successCommand];
|
|
1215
|
+
}
|
|
1216
|
+
function cloudExecutionSummary(started) {
|
|
1217
|
+
return {
|
|
1218
|
+
remote: started.remote.name,
|
|
1219
|
+
url: started.remote.url,
|
|
1220
|
+
skillId: started.skillId,
|
|
1221
|
+
sync: started.sync,
|
|
1222
|
+
};
|
|
1223
|
+
}
|
|
1224
|
+
function workbenchInstallSourceSummary(source, snapshot) {
|
|
1225
|
+
const installUrl = `${source.baseUrl}/skills/${encodeURIComponent(source.owner)}/${encodeURIComponent(source.skill)}`;
|
|
1226
|
+
return {
|
|
1227
|
+
kind: "workbench-cloud",
|
|
1228
|
+
owner: snapshot.owner,
|
|
1229
|
+
skill: snapshot.name,
|
|
1230
|
+
versionId: snapshot.versionId,
|
|
1231
|
+
installUrl,
|
|
1232
|
+
pinnedInstallUrl: `${installUrl}/releases/${encodeURIComponent(snapshot.versionId)}`,
|
|
1233
|
+
};
|
|
1234
|
+
}
|
|
1235
|
+
function parseWorkbenchInstallSource(source) {
|
|
1236
|
+
let url;
|
|
1237
|
+
try {
|
|
1238
|
+
url = new URL(source);
|
|
1239
|
+
}
|
|
1240
|
+
catch {
|
|
1241
|
+
return undefined;
|
|
1242
|
+
}
|
|
1243
|
+
if (url.protocol !== "http:" && url.protocol !== "https:") {
|
|
1244
|
+
return undefined;
|
|
1245
|
+
}
|
|
1246
|
+
const segments = url.pathname
|
|
1247
|
+
.split("/")
|
|
1248
|
+
.filter(Boolean)
|
|
1249
|
+
.map((segment) => decodeURIComponent(segment));
|
|
1250
|
+
if (segments[0] !== "skills") {
|
|
1251
|
+
return undefined;
|
|
1252
|
+
}
|
|
1253
|
+
if (!segments[1] || !segments[2]) {
|
|
1254
|
+
throw new WorkbenchUserError(`Invalid Workbench skill URL: ${source}`);
|
|
1255
|
+
}
|
|
1256
|
+
if (segments.length === 3) {
|
|
1257
|
+
return {
|
|
1258
|
+
baseUrl: url.origin,
|
|
1259
|
+
owner: segments[1],
|
|
1260
|
+
skill: segments[2],
|
|
1261
|
+
};
|
|
1262
|
+
}
|
|
1263
|
+
if (segments.length === 5 && segments[3] === "releases" && segments[4]) {
|
|
1264
|
+
return {
|
|
1265
|
+
baseUrl: url.origin,
|
|
1266
|
+
owner: segments[1],
|
|
1267
|
+
skill: segments[2],
|
|
1268
|
+
version: segments[4],
|
|
1269
|
+
};
|
|
1270
|
+
}
|
|
1271
|
+
throw new WorkbenchUserError(`Invalid Workbench skill URL: ${source}`);
|
|
1272
|
+
}
|
|
1273
|
+
async function fetchWorkbenchInstallSourceSnapshot(source, displaySource) {
|
|
1274
|
+
const token = await workbenchCloudToken({ baseUrl: source.baseUrl });
|
|
1275
|
+
const apiPath = source.version
|
|
1276
|
+
? `/api/workbench/source/skills/${encodeURIComponent(source.owner)}/${encodeURIComponent(source.skill)}/releases/${encodeURIComponent(source.version)}/source`
|
|
1277
|
+
: `/api/workbench/source/skills/${encodeURIComponent(source.owner)}/${encodeURIComponent(source.skill)}/source`;
|
|
1278
|
+
const response = await fetch(`${source.baseUrl}${apiPath}`, {
|
|
1279
|
+
headers: {
|
|
1280
|
+
...(token ? { authorization: `Bearer ${token}` } : {}),
|
|
1281
|
+
},
|
|
1282
|
+
});
|
|
1283
|
+
const text = await response.text();
|
|
1284
|
+
const cloudError = parseWorkbenchCloudErrorBody(text);
|
|
1285
|
+
if (cloudError) {
|
|
1286
|
+
throw new WorkbenchCodedError(cloudError.code, cloudError.message, {
|
|
1287
|
+
retryable: cloudError.retryable,
|
|
1288
|
+
...(cloudError.remediation ? { remediation: cloudError.remediation } : {}),
|
|
1289
|
+
...(cloudError.subject ? { subject: cloudError.subject } : {}),
|
|
1290
|
+
exitCode: response.status === 400 ? 2 : 1,
|
|
1291
|
+
});
|
|
1292
|
+
}
|
|
1293
|
+
if (response.status === 401) {
|
|
1294
|
+
throw new WorkbenchCodedError("auth_required", token
|
|
1295
|
+
? `Workbench Cloud rejected the provided token while installing ${displaySource}.`
|
|
1296
|
+
: `Authentication is required to install ${displaySource}.`, {
|
|
1297
|
+
remediation: `Run workbench login --base-url ${source.baseUrl}.`,
|
|
1298
|
+
exitCode: 1,
|
|
1299
|
+
});
|
|
1300
|
+
}
|
|
1301
|
+
if (!response.ok) {
|
|
1302
|
+
throw new WorkbenchCodedError("install_failed", `Unable to download Workbench source ${displaySource}: ${response.status} ${readResponseError(text) ?? response.statusText}`, {
|
|
1303
|
+
subject: { source: displaySource, status: response.status },
|
|
1304
|
+
exitCode: 1,
|
|
1305
|
+
});
|
|
1306
|
+
}
|
|
1307
|
+
let parsed;
|
|
1308
|
+
try {
|
|
1309
|
+
parsed = text ? JSON.parse(text) : null;
|
|
1310
|
+
}
|
|
1311
|
+
catch {
|
|
1312
|
+
throw new WorkbenchCodedError("install_failed", `Workbench source ${displaySource} did not return JSON.`, {
|
|
1313
|
+
subject: { source: displaySource },
|
|
1314
|
+
exitCode: 1,
|
|
1315
|
+
});
|
|
1316
|
+
}
|
|
1317
|
+
const snapshot = parseWorkbenchInstallSourceSnapshot(parsed, displaySource);
|
|
1318
|
+
if (source.version && snapshot.versionId !== source.version) {
|
|
1319
|
+
throw new WorkbenchCodedError("install_failed", `Workbench source ${displaySource} resolved ${snapshot.versionId} instead of requested release ${source.version}.`, {
|
|
1320
|
+
subject: { source: displaySource, resolvedVersionId: snapshot.versionId, requestedVersionId: source.version },
|
|
1321
|
+
exitCode: 1,
|
|
1322
|
+
});
|
|
1323
|
+
}
|
|
1324
|
+
return snapshot;
|
|
1325
|
+
}
|
|
1326
|
+
function parseWorkbenchInstallSourceSnapshot(value, displaySource) {
|
|
1327
|
+
const record = asRecord(value);
|
|
1328
|
+
if (record?.schema !== "workbench.source.snapshot.v1") {
|
|
1329
|
+
throw new WorkbenchCodedError("install_failed", `Workbench source ${displaySource} did not return a source snapshot.`, {
|
|
1330
|
+
subject: { source: displaySource },
|
|
1331
|
+
exitCode: 1,
|
|
1332
|
+
});
|
|
1333
|
+
}
|
|
1334
|
+
const owner = typeof record.owner === "string" ? record.owner : "";
|
|
1335
|
+
const name = typeof record.name === "string" ? record.name : "";
|
|
1336
|
+
const versionId = typeof record.versionId === "string" ? record.versionId : "";
|
|
1337
|
+
const files = Array.isArray(record.files) ? record.files.map((entry) => parseWorkbenchInstallSourceFile(entry, displaySource)) : [];
|
|
1338
|
+
if (!owner || !name || !versionId || files.length === 0) {
|
|
1339
|
+
throw new WorkbenchCodedError("install_failed", `Workbench source ${displaySource} returned an incomplete source snapshot.`, {
|
|
1340
|
+
subject: { source: displaySource },
|
|
1341
|
+
exitCode: 1,
|
|
1342
|
+
});
|
|
1343
|
+
}
|
|
1344
|
+
return {
|
|
1345
|
+
schema: "workbench.source.snapshot.v1",
|
|
1346
|
+
owner,
|
|
1347
|
+
name,
|
|
1348
|
+
versionId,
|
|
1349
|
+
files,
|
|
1350
|
+
};
|
|
1351
|
+
}
|
|
1352
|
+
function parseWorkbenchInstallSourceFile(value, displaySource) {
|
|
1353
|
+
const record = asRecord(value);
|
|
1354
|
+
if (!record) {
|
|
1355
|
+
throw new WorkbenchCodedError("install_failed", `Workbench source ${displaySource} returned an invalid file entry.`, {
|
|
1356
|
+
subject: { source: displaySource },
|
|
1357
|
+
exitCode: 1,
|
|
1358
|
+
});
|
|
1359
|
+
}
|
|
1360
|
+
const filePath = typeof record?.path === "string" ? record.path : "";
|
|
1361
|
+
const content = typeof record?.content === "string" ? record.content : undefined;
|
|
1362
|
+
if (!filePath || content === undefined) {
|
|
1363
|
+
throw new WorkbenchCodedError("install_failed", `Workbench source ${displaySource} returned an invalid file entry.`, {
|
|
1364
|
+
subject: { source: displaySource },
|
|
1365
|
+
exitCode: 1,
|
|
1366
|
+
});
|
|
1367
|
+
}
|
|
1368
|
+
return {
|
|
1369
|
+
path: normalizeInstallSnapshotPath(filePath),
|
|
1370
|
+
...(record.kind === "text" || record.kind === "binary" ? { kind: record.kind } : {}),
|
|
1371
|
+
encoding: record.encoding === "base64" ? "base64" : "utf8",
|
|
1372
|
+
executable: record.executable === true,
|
|
1373
|
+
content,
|
|
1374
|
+
};
|
|
695
1375
|
}
|
|
696
1376
|
async function loadConfig() {
|
|
697
1377
|
const parsed = await readConfigJson(configPath()) ?? {};
|
|
@@ -699,11 +1379,24 @@ async function loadConfig() {
|
|
|
699
1379
|
schema: CONFIG_SCHEMA,
|
|
700
1380
|
...(typeof parsed.baseUrl === "string" ? { baseUrl: normalizeBaseUrl(parsed.baseUrl) } : {}),
|
|
701
1381
|
...(typeof parsed.accessToken === "string" ? { accessToken: parsed.accessToken } : {}),
|
|
1382
|
+
...(typeof parsed.username === "string" ? { username: parsed.username } : {}),
|
|
1383
|
+
...(Array.isArray(parsed.installTargets) ? { installTargets: normalizeInstallTargetNames(parsed.installTargets.flatMap((entry) => typeof entry === "string" ? [entry] : [])) } : {}),
|
|
702
1384
|
};
|
|
703
1385
|
}
|
|
704
|
-
|
|
1386
|
+
// Single resolver for the Workbench Cloud token used by every authenticated
|
|
1387
|
+
// path: config accessToken first, then WORKBENCH_API_TOKEN, then
|
|
1388
|
+
// WORKBENCH_SMOKE_BEARER_TOKEN. When a target base URL is known, the config
|
|
1389
|
+
// token is only used if the config base URL matches it.
|
|
1390
|
+
async function workbenchCloudToken(options = {}) {
|
|
705
1391
|
const config = await loadConfig();
|
|
706
|
-
|
|
1392
|
+
const configToken = config.accessToken &&
|
|
1393
|
+
(!options.baseUrl || !config.baseUrl || normalizeBaseUrl(config.baseUrl) === normalizeBaseUrl(options.baseUrl))
|
|
1394
|
+
? config.accessToken
|
|
1395
|
+
: undefined;
|
|
1396
|
+
return configToken ?? workbenchCloudEnvToken();
|
|
1397
|
+
}
|
|
1398
|
+
function workbenchCloudEnvToken() {
|
|
1399
|
+
return process.env.WORKBENCH_API_TOKEN?.trim() || process.env.WORKBENCH_SMOKE_BEARER_TOKEN?.trim() || undefined;
|
|
707
1400
|
}
|
|
708
1401
|
async function readConfigJson(filePath) {
|
|
709
1402
|
try {
|
|
@@ -723,6 +1416,9 @@ async function writeConfig(config) {
|
|
|
723
1416
|
function configPath() {
|
|
724
1417
|
return process.env.WORKBENCH_CONFIG?.trim() || path.join(os.homedir(), ".workbench", "config.json");
|
|
725
1418
|
}
|
|
1419
|
+
function deviceAuthPath() {
|
|
1420
|
+
return process.env.WORKBENCH_DEVICE_AUTH?.trim() || path.join(path.dirname(configPath()), "device-auth.json");
|
|
1421
|
+
}
|
|
726
1422
|
function selectWorkbenchBaseUrl(input = {}) {
|
|
727
1423
|
const baseUrl = optionalWorkbenchBaseUrl(input);
|
|
728
1424
|
if (!baseUrl) {
|
|
@@ -742,13 +1438,41 @@ function normalizeBaseUrl(value) {
|
|
|
742
1438
|
}
|
|
743
1439
|
async function requestDeviceAuthorization(baseUrl) {
|
|
744
1440
|
const response = await fetch(`${baseUrl}/api/oauth/device/code`, { method: "POST" });
|
|
1441
|
+
const text = await response.text();
|
|
1442
|
+
const cloudError = parseWorkbenchCloudErrorBody(text);
|
|
1443
|
+
if (cloudError) {
|
|
1444
|
+
throw new WorkbenchCodedError(cloudError.code, cloudError.message, {
|
|
1445
|
+
retryable: cloudError.retryable,
|
|
1446
|
+
...(cloudError.remediation ? { remediation: cloudError.remediation } : {}),
|
|
1447
|
+
...(cloudError.subject ? { subject: cloudError.subject } : {}),
|
|
1448
|
+
exitCode: 1,
|
|
1449
|
+
});
|
|
1450
|
+
}
|
|
745
1451
|
if (!response.ok) {
|
|
746
|
-
throw new
|
|
1452
|
+
throw new WorkbenchCodedError("login_denied", `Device login failed: ${readResponseError(text) ?? response.statusText}`, {
|
|
1453
|
+
exitCode: 1,
|
|
1454
|
+
});
|
|
747
1455
|
}
|
|
748
|
-
return
|
|
1456
|
+
return JSON.parse(text);
|
|
1457
|
+
}
|
|
1458
|
+
async function startDeviceAuthorization(baseUrl) {
|
|
1459
|
+
const authorization = await requestDeviceAuthorization(baseUrl);
|
|
1460
|
+
return {
|
|
1461
|
+
schema: "workbench.cli.device-auth.v1",
|
|
1462
|
+
baseUrl,
|
|
1463
|
+
device_code: authorization.device_code,
|
|
1464
|
+
user_code: authorization.user_code,
|
|
1465
|
+
verification_uri: authorization.verification_uri,
|
|
1466
|
+
verification_uri_complete: authorization.verification_uri_complete,
|
|
1467
|
+
expiresAt: new Date(Date.now() + Math.max(1, authorization.expires_in) * 1000).toISOString(),
|
|
1468
|
+
...(authorization.interval !== undefined ? { interval: authorization.interval } : {}),
|
|
1469
|
+
};
|
|
749
1470
|
}
|
|
750
|
-
async function pollDeviceToken(baseUrl, authorization) {
|
|
751
|
-
const
|
|
1471
|
+
async function pollDeviceToken(baseUrl, authorization, timeoutSeconds) {
|
|
1472
|
+
const expiresAtMs = Date.parse(authorization.expiresAt);
|
|
1473
|
+
const expiryDeadline = Number.isFinite(expiresAtMs) ? expiresAtMs : Date.now() + 15 * 60 * 1000;
|
|
1474
|
+
const timeoutDeadline = timeoutSeconds ? Date.now() + timeoutSeconds * 1000 : Number.POSITIVE_INFINITY;
|
|
1475
|
+
const deadline = Math.min(expiryDeadline, timeoutDeadline);
|
|
752
1476
|
let intervalMs = Math.max(1, authorization.interval ?? 5) * 1000;
|
|
753
1477
|
while (Date.now() < deadline) {
|
|
754
1478
|
const response = await fetch(`${baseUrl}/api/oauth/token`, {
|
|
@@ -768,17 +1492,87 @@ async function pollDeviceToken(baseUrl, authorization) {
|
|
|
768
1492
|
intervalMs += 5000;
|
|
769
1493
|
}
|
|
770
1494
|
else if (error !== "authorization_pending") {
|
|
771
|
-
throw new
|
|
1495
|
+
throw new WorkbenchCodedError("login_denied", `Device login failed: ${error}`, {
|
|
1496
|
+
exitCode: 1,
|
|
1497
|
+
});
|
|
772
1498
|
}
|
|
773
1499
|
await sleep(intervalMs);
|
|
774
1500
|
}
|
|
775
|
-
throw new
|
|
1501
|
+
throw new WorkbenchCodedError("login_pending", "Device login is still waiting for browser authorization.", {
|
|
1502
|
+
retryable: true,
|
|
1503
|
+
remediation: "Authorize the device in the browser, then run workbench login --wait --timeout 120.",
|
|
1504
|
+
subject: {
|
|
1505
|
+
retryAfterSeconds: Math.max(1, Math.ceil(intervalMs / 1000)),
|
|
1506
|
+
verificationUri: authorization.verification_uri,
|
|
1507
|
+
verificationUriComplete: authorization.verification_uri_complete,
|
|
1508
|
+
userCode: authorization.user_code,
|
|
1509
|
+
expiresAt: authorization.expiresAt,
|
|
1510
|
+
},
|
|
1511
|
+
exitCode: 1,
|
|
1512
|
+
});
|
|
1513
|
+
}
|
|
1514
|
+
async function fetchWorkbenchUsername(baseUrl, accessToken) {
|
|
1515
|
+
const response = await fetch(`${baseUrl}/api/workbench/profile`, {
|
|
1516
|
+
headers: { authorization: `Bearer ${accessToken}` },
|
|
1517
|
+
});
|
|
1518
|
+
if (!response.ok) {
|
|
1519
|
+
return undefined;
|
|
1520
|
+
}
|
|
1521
|
+
const record = asRecord(await response.json());
|
|
1522
|
+
const profile = asRecord(record?.profile);
|
|
1523
|
+
return typeof profile?.username === "string" ? profile.username : undefined;
|
|
1524
|
+
}
|
|
1525
|
+
async function readPendingDeviceAuthorization(baseUrl) {
|
|
1526
|
+
const record = await readDeviceAuthorizationJson(deviceAuthPath());
|
|
1527
|
+
if (!record || record.baseUrl !== baseUrl || Date.parse(record.expiresAt) <= Date.now()) {
|
|
1528
|
+
return null;
|
|
1529
|
+
}
|
|
1530
|
+
return record;
|
|
1531
|
+
}
|
|
1532
|
+
async function writePendingDeviceAuthorization(record) {
|
|
1533
|
+
await fs.mkdir(path.dirname(deviceAuthPath()), { recursive: true });
|
|
1534
|
+
await fs.writeFile(deviceAuthPath(), `${JSON.stringify(record, null, 2)}\n`);
|
|
1535
|
+
}
|
|
1536
|
+
async function clearPendingDeviceAuthorization() {
|
|
1537
|
+
await fs.rm(deviceAuthPath(), { force: true });
|
|
1538
|
+
}
|
|
1539
|
+
async function readDeviceAuthorizationJson(filePath) {
|
|
1540
|
+
try {
|
|
1541
|
+
const record = asRecord(JSON.parse(await fs.readFile(filePath, "utf8")));
|
|
1542
|
+
if (record?.schema !== "workbench.cli.device-auth.v1" ||
|
|
1543
|
+
typeof record.baseUrl !== "string" ||
|
|
1544
|
+
typeof record.device_code !== "string" ||
|
|
1545
|
+
typeof record.user_code !== "string" ||
|
|
1546
|
+
typeof record.verification_uri !== "string" ||
|
|
1547
|
+
typeof record.verification_uri_complete !== "string" ||
|
|
1548
|
+
typeof record.expiresAt !== "string" ||
|
|
1549
|
+
!Number.isFinite(Date.parse(record.expiresAt))) {
|
|
1550
|
+
return null;
|
|
1551
|
+
}
|
|
1552
|
+
return {
|
|
1553
|
+
schema: "workbench.cli.device-auth.v1",
|
|
1554
|
+
baseUrl: record.baseUrl,
|
|
1555
|
+
device_code: record.device_code,
|
|
1556
|
+
user_code: record.user_code,
|
|
1557
|
+
verification_uri: record.verification_uri,
|
|
1558
|
+
verification_uri_complete: record.verification_uri_complete,
|
|
1559
|
+
expiresAt: record.expiresAt,
|
|
1560
|
+
...(typeof record.interval === "number" ? { interval: record.interval } : {}),
|
|
1561
|
+
};
|
|
1562
|
+
}
|
|
1563
|
+
catch (error) {
|
|
1564
|
+
if (error?.code === "ENOENT") {
|
|
1565
|
+
return null;
|
|
1566
|
+
}
|
|
1567
|
+
throw error;
|
|
1568
|
+
}
|
|
776
1569
|
}
|
|
777
1570
|
async function apiRequest(apiPath, options = {}, baseUrlOverride) {
|
|
778
1571
|
const config = await loadConfig();
|
|
779
1572
|
const baseUrl = baseUrlOverride !== undefined
|
|
780
1573
|
? normalizeBaseUrl(baseUrlOverride)
|
|
781
1574
|
: selectWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
|
|
1575
|
+
const token = await workbenchCloudToken({ baseUrl });
|
|
782
1576
|
const method = options.method ?? "GET";
|
|
783
1577
|
const canRetry = method === "GET";
|
|
784
1578
|
const requestBody = encodeJsonRequestBody(options.body);
|
|
@@ -790,7 +1584,7 @@ async function apiRequest(apiPath, options = {}, baseUrlOverride) {
|
|
|
790
1584
|
method,
|
|
791
1585
|
headers: {
|
|
792
1586
|
...requestBody.headers,
|
|
793
|
-
...(
|
|
1587
|
+
...(token ? { authorization: `Bearer ${token}` } : {}),
|
|
794
1588
|
},
|
|
795
1589
|
body: requestBody.body,
|
|
796
1590
|
});
|
|
@@ -805,6 +1599,21 @@ async function apiRequest(apiPath, options = {}, baseUrlOverride) {
|
|
|
805
1599
|
}
|
|
806
1600
|
if (!response.ok) {
|
|
807
1601
|
const text = await response.text();
|
|
1602
|
+
const cloudError = parseWorkbenchCloudErrorBody(text);
|
|
1603
|
+
if (cloudError) {
|
|
1604
|
+
const requestError = new WorkbenchCodedError(cloudError.code, cloudError.message, {
|
|
1605
|
+
retryable: cloudError.retryable,
|
|
1606
|
+
...(cloudError.remediation ? { remediation: cloudError.remediation } : {}),
|
|
1607
|
+
...(cloudError.subject ? { subject: cloudError.subject } : {}),
|
|
1608
|
+
exitCode: response.status === 400 ? 2 : 1,
|
|
1609
|
+
});
|
|
1610
|
+
lastError = requestError;
|
|
1611
|
+
if (canRetry && attempt < API_REQUEST_MAX_ATTEMPTS && cloudError.retryable) {
|
|
1612
|
+
await sleep(250 * attempt);
|
|
1613
|
+
continue;
|
|
1614
|
+
}
|
|
1615
|
+
throw requestError;
|
|
1616
|
+
}
|
|
808
1617
|
const requestError = new WorkbenchApiRequestError(response.status, readResponseError(text) ?? `Request failed with status ${response.status}${response.statusText ? ` ${response.statusText}` : ""}.`, text);
|
|
809
1618
|
lastError = requestError;
|
|
810
1619
|
if (canRetry && attempt < API_REQUEST_MAX_ATTEMPTS && isTransientApiRequestError(requestError)) {
|
|
@@ -825,8 +1634,11 @@ function encodeJsonRequestBody(body) {
|
|
|
825
1634
|
if (Buffer.byteLength(text) < API_REQUEST_GZIP_THRESHOLD_BYTES) {
|
|
826
1635
|
return { body: text, headers: { "content-type": "application/json" } };
|
|
827
1636
|
}
|
|
1637
|
+
const compressed = gzipSync(text);
|
|
1638
|
+
const compressedBody = new ArrayBuffer(compressed.byteLength);
|
|
1639
|
+
new Uint8Array(compressedBody).set(compressed);
|
|
828
1640
|
return {
|
|
829
|
-
body:
|
|
1641
|
+
body: compressedBody,
|
|
830
1642
|
headers: {
|
|
831
1643
|
"content-encoding": "gzip",
|
|
832
1644
|
"content-type": "application/json",
|
|
@@ -834,26 +1646,44 @@ function encodeJsonRequestBody(body) {
|
|
|
834
1646
|
};
|
|
835
1647
|
}
|
|
836
1648
|
async function uploadAdapterConnection(bundle, parsed) {
|
|
1649
|
+
const token = await workbenchCloudToken();
|
|
837
1650
|
if (parsed.flags["local-only"] === true) {
|
|
838
|
-
return {
|
|
1651
|
+
return {
|
|
1652
|
+
status: token ? "authenticated" : "not_authenticated",
|
|
1653
|
+
sync: "skipped",
|
|
1654
|
+
reason: "local_only",
|
|
1655
|
+
};
|
|
839
1656
|
}
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
1657
|
+
if (!token) {
|
|
1658
|
+
return {
|
|
1659
|
+
status: "not_authenticated",
|
|
1660
|
+
sync: "skipped",
|
|
1661
|
+
reason: "not_authenticated",
|
|
1662
|
+
remediation: "Run workbench login.",
|
|
1663
|
+
};
|
|
843
1664
|
}
|
|
844
1665
|
await apiRequest(adapterConnectionApiPath(bundle), { method: "PUT", body: { bundle } });
|
|
845
|
-
return { status: "
|
|
1666
|
+
return { status: "authenticated", sync: "uploaded" };
|
|
846
1667
|
}
|
|
847
1668
|
async function deleteAdapterConnectionRemote(target, parsed) {
|
|
1669
|
+
const token = await workbenchCloudToken();
|
|
848
1670
|
if (parsed.flags["local-only"] === true) {
|
|
849
|
-
return {
|
|
1671
|
+
return {
|
|
1672
|
+
status: token ? "authenticated" : "not_authenticated",
|
|
1673
|
+
sync: "skipped",
|
|
1674
|
+
reason: "local_only",
|
|
1675
|
+
};
|
|
850
1676
|
}
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
1677
|
+
if (!token) {
|
|
1678
|
+
return {
|
|
1679
|
+
status: "not_authenticated",
|
|
1680
|
+
sync: "skipped",
|
|
1681
|
+
reason: "not_authenticated",
|
|
1682
|
+
remediation: "Run workbench login.",
|
|
1683
|
+
};
|
|
854
1684
|
}
|
|
855
1685
|
await apiRequest(adapterConnectionApiPath(target), { method: "DELETE" });
|
|
856
|
-
return { status: "
|
|
1686
|
+
return { status: "authenticated", sync: "deleted" };
|
|
857
1687
|
}
|
|
858
1688
|
function adapterConnectionApiPath(target) {
|
|
859
1689
|
const params = new URLSearchParams({ profile: target.profile });
|
|
@@ -883,6 +1713,25 @@ function readResponseError(text) {
|
|
|
883
1713
|
return text.trim() || null;
|
|
884
1714
|
}
|
|
885
1715
|
}
|
|
1716
|
+
function parseWorkbenchCloudErrorBody(text) {
|
|
1717
|
+
try {
|
|
1718
|
+
const record = asRecord(JSON.parse(text));
|
|
1719
|
+
if (record?.schema !== "workbench.cloud.error.v1" || typeof record.code !== "string" || typeof record.message !== "string") {
|
|
1720
|
+
return null;
|
|
1721
|
+
}
|
|
1722
|
+
const subject = asRecord(record.subject);
|
|
1723
|
+
return {
|
|
1724
|
+
code: record.code,
|
|
1725
|
+
message: record.message,
|
|
1726
|
+
retryable: record.retryable === true,
|
|
1727
|
+
...(typeof record.remediation === "string" ? { remediation: record.remediation } : {}),
|
|
1728
|
+
...(subject ? { subject: subject } : {}),
|
|
1729
|
+
};
|
|
1730
|
+
}
|
|
1731
|
+
catch {
|
|
1732
|
+
return null;
|
|
1733
|
+
}
|
|
1734
|
+
}
|
|
886
1735
|
function isTransientFetchError(error) {
|
|
887
1736
|
return /(?:fetch failed|socket hang up|ECONNRESET|EPIPE|UND_ERR_SOCKET|terminated)/iu.test(errorMessage(error));
|
|
888
1737
|
}
|
|
@@ -895,6 +1744,14 @@ function errorMessage(error) {
|
|
|
895
1744
|
function sleep(ms) {
|
|
896
1745
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
897
1746
|
}
|
|
1747
|
+
function positiveIntEnv(name) {
|
|
1748
|
+
const raw = process.env[name]?.trim();
|
|
1749
|
+
if (!raw) {
|
|
1750
|
+
return undefined;
|
|
1751
|
+
}
|
|
1752
|
+
const value = Number(raw);
|
|
1753
|
+
return Number.isSafeInteger(value) && value > 0 ? value : undefined;
|
|
1754
|
+
}
|
|
898
1755
|
async function openBrowser(url) {
|
|
899
1756
|
const command = process.platform === "darwin"
|
|
900
1757
|
? "open"
|
|
@@ -911,26 +1768,6 @@ async function openBrowser(url) {
|
|
|
911
1768
|
});
|
|
912
1769
|
});
|
|
913
1770
|
}
|
|
914
|
-
function retrySamplesForFailedJobs(jobs, run) {
|
|
915
|
-
if (run.status === "running") {
|
|
916
|
-
throw new WorkbenchUserError(`Run ${run.id} is still running; wait for it to finish before retrying.`);
|
|
917
|
-
}
|
|
918
|
-
const failed = jobs
|
|
919
|
-
.filter((job) => job.runId === run.id && job.status !== "succeeded")
|
|
920
|
-
.map((job) => ({ caseId: job.caseId, sample: job.sample }));
|
|
921
|
-
if (failed.length === 0) {
|
|
922
|
-
throw new WorkbenchUserError(`Run ${run.id} has no failed jobs to retry; use workbench eval to intentionally run it again.`);
|
|
923
|
-
}
|
|
924
|
-
const byKey = new Map();
|
|
925
|
-
for (const sample of failed) {
|
|
926
|
-
byKey.set(`${sample.caseId}:${sample.sample}`, sample);
|
|
927
|
-
}
|
|
928
|
-
const selectedSamples = [...byKey.values()].sort((left, right) => left.caseId.localeCompare(right.caseId) || left.sample - right.sample);
|
|
929
|
-
return {
|
|
930
|
-
samples: Math.max(1, ...selectedSamples.map((entry) => entry.sample + 1)),
|
|
931
|
-
selectedSamples,
|
|
932
|
-
};
|
|
933
|
-
}
|
|
934
1771
|
function adapterAuthStoreRoot() {
|
|
935
1772
|
return process.env.WORKBENCH_ADAPTER_AUTH_STORE?.trim() || undefined;
|
|
936
1773
|
}
|
|
@@ -1062,24 +1899,40 @@ async function requiredAgentAuthStatuses(parsed, statuses) {
|
|
|
1062
1899
|
.filter((agent) => ["codex", "claude"].includes(agent.adapter.trim().toLowerCase()))
|
|
1063
1900
|
.map(async (agent) => {
|
|
1064
1901
|
const target = parseAuthTarget(agent.adapter.trim().toLowerCase(), "default");
|
|
1902
|
+
const local = statusMap.get(`${target.adapterId}/${target.slot ?? "_"}/${target.profile}`) ??
|
|
1903
|
+
await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).status(target);
|
|
1065
1904
|
return {
|
|
1066
1905
|
agent: agent.name,
|
|
1067
1906
|
adapter: agent.adapter,
|
|
1068
|
-
local:
|
|
1069
|
-
await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).status(target),
|
|
1907
|
+
local: local.status === "connected" ? "connected" : "missing",
|
|
1070
1908
|
};
|
|
1071
1909
|
}));
|
|
1072
1910
|
}
|
|
1073
1911
|
function formatAuthStatusRecord(status) {
|
|
1074
1912
|
return `${formatAuthTarget(status)}\t${status.status}${status.method ? `\t${status.method}` : ""}${status.reason ? `\t${status.reason}` : ""}`;
|
|
1075
1913
|
}
|
|
1076
|
-
function
|
|
1914
|
+
function authStatusRecordToJson(status) {
|
|
1915
|
+
return {
|
|
1916
|
+
adapter: status.adapterId,
|
|
1917
|
+
...(status.slot ? { slot: status.slot } : {}),
|
|
1918
|
+
profile: status.profile,
|
|
1919
|
+
status: status.status,
|
|
1920
|
+
...(status.method ? { method: status.method } : {}),
|
|
1921
|
+
...(status.updatedAt ? { updatedAt: status.updatedAt } : {}),
|
|
1922
|
+
};
|
|
1923
|
+
}
|
|
1924
|
+
function formatWorkbenchCloudAuthStatus(status) {
|
|
1925
|
+
return `Workbench Cloud: ${status.status}${status.baseUrl ? `\tbaseUrl=${status.baseUrl}` : ""}${status.username ? `\tuser=${status.username}` : ""}`;
|
|
1926
|
+
}
|
|
1927
|
+
function formatAuthStatusList(workbenchCloud, statuses, required) {
|
|
1077
1928
|
const lines = [
|
|
1929
|
+
formatWorkbenchCloudAuthStatus(workbenchCloud),
|
|
1930
|
+
"",
|
|
1078
1931
|
...(statuses.length > 0
|
|
1079
1932
|
? ["Adapter auth:", ...statuses.map(formatAuthStatusRecord)]
|
|
1080
1933
|
: ["No local adapter auth records."]),
|
|
1081
1934
|
...(required.length > 0
|
|
1082
|
-
? ["", "Required by agents:", ...required.map((entry) => `${entry.agent}\t${entry.adapter}\t${entry.local
|
|
1935
|
+
? ["", "Required by agents:", ...required.map((entry) => `${entry.agent}\t${entry.adapter}\t${entry.local}`)]
|
|
1083
1936
|
: []),
|
|
1084
1937
|
];
|
|
1085
1938
|
return lines.join("\n");
|
|
@@ -1108,7 +1961,7 @@ async function showLocalAgentSession(ref) {
|
|
|
1108
1961
|
const sessions = await listLocalAgentSessions();
|
|
1109
1962
|
const session = sessions.find((entry) => entry.id === ref);
|
|
1110
1963
|
if (!session) {
|
|
1111
|
-
throw new
|
|
1964
|
+
throw new WorkbenchCodedError("ref_not_found", `Session not found: ${ref}`, { exitCode: 1 });
|
|
1112
1965
|
}
|
|
1113
1966
|
return {
|
|
1114
1967
|
...session,
|
|
@@ -1234,6 +2087,17 @@ function parseArgs(argv) {
|
|
|
1234
2087
|
addFlag(flags, "version", true);
|
|
1235
2088
|
continue;
|
|
1236
2089
|
}
|
|
2090
|
+
if (arg === "-n") {
|
|
2091
|
+
const value = argv[index + 1];
|
|
2092
|
+
if (value && !value.startsWith("-")) {
|
|
2093
|
+
index += 1;
|
|
2094
|
+
addFlag(flags, "samples", value);
|
|
2095
|
+
}
|
|
2096
|
+
else {
|
|
2097
|
+
addFlag(flags, "samples", true);
|
|
2098
|
+
}
|
|
2099
|
+
continue;
|
|
2100
|
+
}
|
|
1237
2101
|
if (!arg.startsWith("--") || arg === "--") {
|
|
1238
2102
|
positionals.push(arg);
|
|
1239
2103
|
continue;
|
|
@@ -1241,7 +2105,9 @@ function parseArgs(argv) {
|
|
|
1241
2105
|
const eq = arg.indexOf("=");
|
|
1242
2106
|
const name = eq === -1 ? arg.slice(2) : arg.slice(2, eq);
|
|
1243
2107
|
const value = eq === -1 ? argv[index + 1] : arg.slice(eq + 1);
|
|
1244
|
-
|
|
2108
|
+
const flagSpec = flagSpecForParsedPrefix(positionals, flags);
|
|
2109
|
+
const kind = flagSpec?.[name];
|
|
2110
|
+
if (eq === -1 && kind === "boolean") {
|
|
1245
2111
|
addFlag(flags, name, true);
|
|
1246
2112
|
}
|
|
1247
2113
|
else if (eq === -1 && value && !value.startsWith("-")) {
|
|
@@ -1254,8 +2120,12 @@ function parseArgs(argv) {
|
|
|
1254
2120
|
}
|
|
1255
2121
|
return { positionals, flags };
|
|
1256
2122
|
}
|
|
2123
|
+
function flagSpecForParsedPrefix(positionals, flags) {
|
|
2124
|
+
const command = positionals[0] ?? (flags.version === true ? "version" : "status");
|
|
2125
|
+
return allowedFlagsForCommand({ positionals: [...positionals], flags: {} }, command);
|
|
2126
|
+
}
|
|
1257
2127
|
function addFlag(flags, name, value) {
|
|
1258
|
-
if (name === "with") {
|
|
2128
|
+
if (name === "with" || name === "to") {
|
|
1259
2129
|
const existing = flags[name];
|
|
1260
2130
|
flags[name] = Array.isArray(existing)
|
|
1261
2131
|
? [...existing, String(value)]
|
|
@@ -1272,13 +2142,21 @@ function dirFlag(parsed) {
|
|
|
1272
2142
|
async function coreOptions(parsed) {
|
|
1273
2143
|
return {
|
|
1274
2144
|
dir: dirFlag(parsed),
|
|
1275
|
-
authToken: await
|
|
2145
|
+
authToken: await workbenchCloudToken(),
|
|
1276
2146
|
};
|
|
1277
2147
|
}
|
|
1278
2148
|
function stringFlag(parsed, name) {
|
|
1279
2149
|
const value = parsed.flags[name];
|
|
1280
2150
|
return typeof value === "string" ? value : undefined;
|
|
1281
2151
|
}
|
|
2152
|
+
function stringsFlag(parsed, name) {
|
|
2153
|
+
const value = parsed.flags[name];
|
|
2154
|
+
return Array.isArray(value)
|
|
2155
|
+
? value
|
|
2156
|
+
: typeof value === "string"
|
|
2157
|
+
? [value]
|
|
2158
|
+
: [];
|
|
2159
|
+
}
|
|
1282
2160
|
function intFlag(parsed, name) {
|
|
1283
2161
|
const value = stringFlag(parsed, name);
|
|
1284
2162
|
if (!value) {
|
|
@@ -1300,14 +2178,177 @@ function requiredPositional(parsed, index, message) {
|
|
|
1300
2178
|
}
|
|
1301
2179
|
return value;
|
|
1302
2180
|
}
|
|
1303
|
-
function
|
|
1304
|
-
|
|
2181
|
+
function requiredFlag(parsed, input) {
|
|
2182
|
+
const flagValue = stringFlag(parsed, input.flag);
|
|
2183
|
+
if (!flagValue) {
|
|
2184
|
+
throw new WorkbenchCodedError("usage", input.usage, {
|
|
2185
|
+
remediation: input.remediation,
|
|
2186
|
+
exitCode: 2,
|
|
2187
|
+
});
|
|
2188
|
+
}
|
|
2189
|
+
return flagValue;
|
|
2190
|
+
}
|
|
2191
|
+
function rejectExtraInput(parsed, input) {
|
|
2192
|
+
if (parsed.positionals.length <= input.maxPositionals) {
|
|
2193
|
+
return;
|
|
2194
|
+
}
|
|
2195
|
+
throw new WorkbenchCodedError("usage", input.message, {
|
|
2196
|
+
remediation: input.remediation,
|
|
2197
|
+
exitCode: 2,
|
|
2198
|
+
});
|
|
2199
|
+
}
|
|
2200
|
+
async function defaultDiffRange(core) {
|
|
2201
|
+
await listWorkbenchVersions(core);
|
|
2202
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2203
|
+
const currentId = snapshot.status.currentVersionId ?? snapshot.refs.current;
|
|
2204
|
+
const current = snapshot.versions.find((version) => version.id === currentId);
|
|
2205
|
+
if (!current) {
|
|
2206
|
+
throw new WorkbenchCodedError("version_not_found", "Current Workbench version was not found.", {
|
|
2207
|
+
remediation: "Run workbench log --versions.",
|
|
2208
|
+
exitCode: 1,
|
|
2209
|
+
});
|
|
2210
|
+
}
|
|
2211
|
+
const parent = current.parentIds[0];
|
|
2212
|
+
return parent ? `${parent}..${current.id}` : `${current.id}..${current.id}`;
|
|
2213
|
+
}
|
|
2214
|
+
function parsePublishVisibilityFlags(parsed) {
|
|
2215
|
+
const selected = [
|
|
2216
|
+
parsed.flags.private === true ? "private" : undefined,
|
|
2217
|
+
parsed.flags.team === true ? "internal" : undefined,
|
|
2218
|
+
parsed.flags.public === true ? "public" : undefined,
|
|
2219
|
+
].filter((value) => Boolean(value));
|
|
2220
|
+
if (selected.length > 1) {
|
|
2221
|
+
throw new WorkbenchCodedError("usage", "workbench publish accepts only one visibility flag.", {
|
|
2222
|
+
remediation: "Run workbench publish --private, workbench publish --team, or workbench publish --public.",
|
|
2223
|
+
exitCode: 2,
|
|
2224
|
+
});
|
|
2225
|
+
}
|
|
2226
|
+
return selected[0];
|
|
2227
|
+
}
|
|
2228
|
+
async function previewPublishWithDerivedRemote(parsed) {
|
|
2229
|
+
const root = path.resolve(dirFlag(parsed) ?? process.cwd());
|
|
2230
|
+
const core = await coreOptions(parsed);
|
|
2231
|
+
await listWorkbenchVersions(core);
|
|
2232
|
+
const reconciledSnapshot = await createWorkbenchReadOnlyInspectionSnapshot({ dir: root });
|
|
2233
|
+
const link = cloudRemoteLinkTargetFromRemotes(reconciledSnapshot.remotes);
|
|
2234
|
+
if (link.existing) {
|
|
1305
2235
|
return undefined;
|
|
1306
2236
|
}
|
|
1307
|
-
|
|
1308
|
-
|
|
2237
|
+
const remote = await derivePublishCloudRemote(parsed, "workbench publish", link.name);
|
|
2238
|
+
const requestedVersion = optionalPositional(parsed, 1);
|
|
2239
|
+
const versionId = requestedVersion && requestedVersion !== "current"
|
|
2240
|
+
? requestedVersion
|
|
2241
|
+
: reconciledSnapshot.status.currentVersionId ?? reconciledSnapshot.refs.current;
|
|
2242
|
+
const version = reconciledSnapshot.versions.find((entry) => entry.id === versionId);
|
|
2243
|
+
if (!version) {
|
|
2244
|
+
throw new WorkbenchCodedError("version_not_found", `Version not found: ${requestedVersion ?? "current"}`, {
|
|
2245
|
+
remediation: "Run workbench log --versions.",
|
|
2246
|
+
subject: { version: requestedVersion ?? "current" },
|
|
2247
|
+
exitCode: 1,
|
|
2248
|
+
});
|
|
2249
|
+
}
|
|
2250
|
+
return {
|
|
2251
|
+
remote,
|
|
2252
|
+
version,
|
|
2253
|
+
visibility: parsePublishVisibilityFlags(parsed) ?? "private",
|
|
2254
|
+
installHandle: installHandleFromCloudRemote(remote),
|
|
2255
|
+
installUrl: remote.url,
|
|
2256
|
+
pinnedInstallUrl: `${remote.url}/releases/${encodeURIComponent(version.id)}`,
|
|
2257
|
+
};
|
|
2258
|
+
}
|
|
2259
|
+
async function ensurePublishRemote(parsed) {
|
|
2260
|
+
const core = await coreOptions(parsed);
|
|
2261
|
+
const root = path.resolve(dirFlag(parsed) ?? process.cwd());
|
|
2262
|
+
const link = await cloudRemoteLinkTarget(root);
|
|
2263
|
+
const override = stringFlag(parsed, "as");
|
|
2264
|
+
if (override) {
|
|
2265
|
+
const remote = await derivePublishCloudRemote(parsed, "workbench publish", link.name);
|
|
2266
|
+
const result = await addWorkbenchRemote(remote.name, remote.url, { ...core, replace: link.replace });
|
|
2267
|
+
return result.remote.name;
|
|
2268
|
+
}
|
|
2269
|
+
if (link.existing) {
|
|
2270
|
+
return link.existing.name;
|
|
2271
|
+
}
|
|
2272
|
+
const remote = await derivePublishCloudRemote(parsed, "workbench publish", link.name);
|
|
2273
|
+
const result = await addWorkbenchRemote(remote.name, remote.url, core);
|
|
2274
|
+
return result.remote.name;
|
|
2275
|
+
}
|
|
2276
|
+
async function derivePublishCloudRemote(parsed, action = "workbench publish", name = "cloud") {
|
|
2277
|
+
const config = await loadConfig();
|
|
2278
|
+
const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl }) ?? DEFAULT_WORKBENCH_CLOUD_BASE_URL;
|
|
2279
|
+
const override = stringFlag(parsed, "as");
|
|
2280
|
+
const handle = override ? parseOwnerSkillHandle(override) : derivedOwnerSkillHandle(parsed, config, action);
|
|
2281
|
+
const url = `${baseUrl}/skills/${encodeURIComponent(handle.owner)}/${encodeURIComponent(handle.skill)}`;
|
|
2282
|
+
return { name, kind: "workbench-cloud", url };
|
|
2283
|
+
}
|
|
2284
|
+
function installHandleFromCloudRemote(remote) {
|
|
2285
|
+
const source = parseWorkbenchInstallSource(remote.url);
|
|
2286
|
+
if (!source) {
|
|
2287
|
+
throw new WorkbenchCodedError("remote_invalid_url", `Workbench remote is not a Cloud skill URL: ${remote.url}`, {
|
|
2288
|
+
remediation: "Run workbench publish to recreate the Workbench Cloud link.",
|
|
2289
|
+
subject: { remote: remote.name, url: remote.url },
|
|
2290
|
+
exitCode: 2,
|
|
2291
|
+
});
|
|
1309
2292
|
}
|
|
1310
|
-
|
|
2293
|
+
return `${source.owner}/${source.skill}`;
|
|
2294
|
+
}
|
|
2295
|
+
function parseOwnerSkillHandle(input) {
|
|
2296
|
+
const handle = normalizedOwnerSkillHandle(input);
|
|
2297
|
+
if (!handle) {
|
|
2298
|
+
throw new WorkbenchCodedError("usage", "workbench publish --as expects OWNER/SKILL.", {
|
|
2299
|
+
remediation: "Run workbench publish --as OWNER/SKILL.",
|
|
2300
|
+
exitCode: 2,
|
|
2301
|
+
});
|
|
2302
|
+
}
|
|
2303
|
+
return handle;
|
|
2304
|
+
}
|
|
2305
|
+
function derivedOwnerSkillHandle(parsed, config, action) {
|
|
2306
|
+
const owner = config.username?.trim();
|
|
2307
|
+
if (!owner) {
|
|
2308
|
+
throw new WorkbenchCodedError("auth_required", `${action} needs a logged-in Workbench Cloud username before it can derive OWNER/SKILL.`, {
|
|
2309
|
+
remediation: "Run workbench login.",
|
|
2310
|
+
exitCode: 1,
|
|
2311
|
+
});
|
|
2312
|
+
}
|
|
2313
|
+
const root = path.resolve(dirFlag(parsed) ?? process.cwd());
|
|
2314
|
+
const handle = normalizeOwnerSkillHandle(owner, path.basename(root));
|
|
2315
|
+
if (!handle.owner || !handle.skill) {
|
|
2316
|
+
throw new WorkbenchCodedError("usage", `${action} could not derive a valid OWNER/SKILL handle.`, {
|
|
2317
|
+
remediation: `Run ${action} --as OWNER/SKILL.`,
|
|
2318
|
+
subject: { owner, skill: path.basename(root) },
|
|
2319
|
+
exitCode: 2,
|
|
2320
|
+
});
|
|
2321
|
+
}
|
|
2322
|
+
return handle;
|
|
2323
|
+
}
|
|
2324
|
+
async function resolveWorkbenchInstallSourceInput(input) {
|
|
2325
|
+
if (/^https?:\/\//u.test(input)) {
|
|
2326
|
+
return input;
|
|
2327
|
+
}
|
|
2328
|
+
const handle = normalizedOwnerSkillHandle(input);
|
|
2329
|
+
if (!handle) {
|
|
2330
|
+
throw new WorkbenchCodedError("usage", "workbench install expects OWNER/SKILL or a Workbench Cloud skill URL.", {
|
|
2331
|
+
remediation: "Run workbench install OWNER/SKILL --to codex.",
|
|
2332
|
+
exitCode: 2,
|
|
2333
|
+
});
|
|
2334
|
+
}
|
|
2335
|
+
const config = await loadConfig();
|
|
2336
|
+
const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl }) ?? DEFAULT_WORKBENCH_CLOUD_BASE_URL;
|
|
2337
|
+
return `${baseUrl}/skills/${encodeURIComponent(handle.owner)}/${encodeURIComponent(handle.skill)}`;
|
|
2338
|
+
}
|
|
2339
|
+
function normalizedOwnerSkillHandle(value) {
|
|
2340
|
+
const parts = value.trim().split("/");
|
|
2341
|
+
if (parts.length !== 2) {
|
|
2342
|
+
return null;
|
|
2343
|
+
}
|
|
2344
|
+
const handle = normalizeOwnerSkillHandle(parts[0] ?? "", parts[1] ?? "");
|
|
2345
|
+
return handle.owner && handle.skill ? handle : null;
|
|
2346
|
+
}
|
|
2347
|
+
function normalizeOwnerSkillHandle(owner, skill) {
|
|
2348
|
+
return {
|
|
2349
|
+
owner: normalizeWorkbenchSkillName(owner),
|
|
2350
|
+
skill: normalizeWorkbenchSkillName(skill),
|
|
2351
|
+
};
|
|
1311
2352
|
}
|
|
1312
2353
|
function parseWithFlags(parsed) {
|
|
1313
2354
|
const raw = parsed.flags.with;
|
|
@@ -1332,71 +2373,284 @@ function parseScalar(value) {
|
|
|
1332
2373
|
}
|
|
1333
2374
|
return value;
|
|
1334
2375
|
}
|
|
1335
|
-
function
|
|
1336
|
-
|
|
1337
|
-
|
|
2376
|
+
async function artifactIdsByRunId(core, runs) {
|
|
2377
|
+
const runIds = new Set(runs.map((run) => run.id));
|
|
2378
|
+
const byRun = new Map([...runIds].map((runId) => [runId, []]));
|
|
2379
|
+
if (runIds.size === 0) {
|
|
2380
|
+
return byRun;
|
|
1338
2381
|
}
|
|
1339
|
-
|
|
1340
|
-
|
|
2382
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2383
|
+
for (const job of snapshot.jobs) {
|
|
2384
|
+
if (!runIds.has(job.runId)) {
|
|
2385
|
+
continue;
|
|
2386
|
+
}
|
|
2387
|
+
const current = byRun.get(job.runId) ?? [];
|
|
2388
|
+
byRun.set(job.runId, [...new Set([...current, ...job.artifactIds])]);
|
|
1341
2389
|
}
|
|
1342
|
-
return
|
|
2390
|
+
return byRun;
|
|
2391
|
+
}
|
|
2392
|
+
function emitEvalFailure(runs, failedRuns, artifactIds, parsed, io) {
|
|
2393
|
+
const nextCommands = evalFailureNextCommands(failedRuns);
|
|
2394
|
+
if (parsed.flags.json === true) {
|
|
2395
|
+
io.stdout.write(`${JSON.stringify({
|
|
2396
|
+
schema: "workbench.cli.eval.v1",
|
|
2397
|
+
ok: false,
|
|
2398
|
+
code: "eval_runs_failed",
|
|
2399
|
+
message: "Eval failed; evidence was saved.",
|
|
2400
|
+
retryable: false,
|
|
2401
|
+
evidenceSaved: true,
|
|
2402
|
+
runs: runs.map((run) => runFailureSummary(run, artifactIds.get(run.id) ?? [])),
|
|
2403
|
+
failedRuns: failedRuns.map((run) => runFailureSummary(run, artifactIds.get(run.id) ?? [])),
|
|
2404
|
+
nextCommands,
|
|
2405
|
+
}, null, 2)}\n`);
|
|
2406
|
+
return 1;
|
|
2407
|
+
}
|
|
2408
|
+
io.stdout.write([
|
|
2409
|
+
"Eval failed; evidence was saved.",
|
|
2410
|
+
...failedRuns.map(formatRun),
|
|
2411
|
+
...(nextCommands[0] ? [`next: ${nextCommands[0]}`] : []),
|
|
2412
|
+
].join("\n") + "\n");
|
|
2413
|
+
return 1;
|
|
2414
|
+
}
|
|
2415
|
+
function runSummary(run, artifactIds) {
|
|
2416
|
+
return {
|
|
2417
|
+
id: run.id,
|
|
2418
|
+
kind: run.kind,
|
|
2419
|
+
status: run.status,
|
|
2420
|
+
versionId: run.versionId,
|
|
2421
|
+
skillName: run.skillName,
|
|
2422
|
+
agentName: run.agentName,
|
|
2423
|
+
...(run.score !== undefined ? { score: run.score } : {}),
|
|
2424
|
+
...(run.latencyMs !== undefined ? { latencyMs: run.latencyMs } : {}),
|
|
2425
|
+
...(run.error ? { error: run.error } : {}),
|
|
2426
|
+
...(run.jobIds ? { jobIds: run.jobIds } : {}),
|
|
2427
|
+
traceIds: run.traceIds,
|
|
2428
|
+
artifactIds: [...artifactIds],
|
|
2429
|
+
};
|
|
1343
2430
|
}
|
|
1344
|
-
function
|
|
1345
|
-
|
|
1346
|
-
|
|
2431
|
+
function runFailureSummary(run, artifactIds) {
|
|
2432
|
+
return {
|
|
2433
|
+
runId: run.id,
|
|
2434
|
+
agent: run.agentName,
|
|
2435
|
+
skill: run.skillName,
|
|
2436
|
+
status: run.status,
|
|
2437
|
+
versionId: run.versionId,
|
|
2438
|
+
...(run.score !== undefined ? { score: run.score } : {}),
|
|
2439
|
+
...(run.error ? { error: run.error } : {}),
|
|
2440
|
+
traceIds: run.traceIds,
|
|
2441
|
+
artifactIds: [...artifactIds],
|
|
2442
|
+
};
|
|
2443
|
+
}
|
|
2444
|
+
function evalFailureNextCommands(failedRuns) {
|
|
2445
|
+
const first = failedRuns[0];
|
|
2446
|
+
if (!first) {
|
|
2447
|
+
return ["workbench log --runs"];
|
|
1347
2448
|
}
|
|
1348
2449
|
return [
|
|
1349
|
-
`
|
|
1350
|
-
`
|
|
1351
|
-
`
|
|
1352
|
-
`
|
|
1353
|
-
|
|
1354
|
-
`Versions: ${status.versionCount}`,
|
|
1355
|
-
`Skills: ${status.skillCount}`,
|
|
1356
|
-
`Agents: ${status.agentCount}`,
|
|
1357
|
-
`Runs: ${status.runCount}`,
|
|
1358
|
-
`Remotes: ${status.remoteCount}`,
|
|
1359
|
-
...(status.pendingSyncCount ? [`Pending sync: ${status.pendingSyncCount}`] : []),
|
|
1360
|
-
...(status.lastScore !== undefined ? [`Last score: ${status.lastScore}`] : []),
|
|
1361
|
-
...(status.automationReadiness ? [`Automation readiness: ${status.automationReadiness.label} - ${status.automationReadiness.reason}`] : []),
|
|
1362
|
-
].join("\n");
|
|
2450
|
+
`workbench show ${first.id}`,
|
|
2451
|
+
`workbench show ${first.id}:stderr.log`,
|
|
2452
|
+
`workbench case add ${first.id}`,
|
|
2453
|
+
`workbench improve --agents ${first.agentName} --budget 1 -n 1`,
|
|
2454
|
+
];
|
|
1363
2455
|
}
|
|
1364
|
-
function
|
|
1365
|
-
return
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
2456
|
+
function output(value, parsed, io, text) {
|
|
2457
|
+
return emitResult(commandSchema(parsed), { result: value }, parsed, io, text);
|
|
2458
|
+
}
|
|
2459
|
+
function commandSchema(parsed) {
|
|
2460
|
+
const command = parsed.positionals[0] ?? "result";
|
|
2461
|
+
const subcommand = parsed.positionals[1];
|
|
2462
|
+
const suffix = ["agent", "case"].includes(command) && subcommand
|
|
2463
|
+
? `${command}-${subcommand}`
|
|
2464
|
+
: command;
|
|
2465
|
+
return `workbench.cli.${suffix}.v1`;
|
|
2466
|
+
}
|
|
2467
|
+
async function workbenchCliAuthStatus() {
|
|
2468
|
+
const config = await loadConfig();
|
|
2469
|
+
const adapterStatuses = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).listStatus().catch(() => []);
|
|
2470
|
+
const baseUrl = optionalWorkbenchBaseUrl({ configBaseUrl: config.baseUrl });
|
|
2471
|
+
return {
|
|
2472
|
+
workbenchCloud: {
|
|
2473
|
+
status: config.accessToken || workbenchCloudEnvToken() ? "authenticated" : "not_authenticated",
|
|
2474
|
+
...(baseUrl ? { baseUrl } : {}),
|
|
2475
|
+
...(config.accessToken && config.username ? { username: config.username } : {}),
|
|
2476
|
+
},
|
|
2477
|
+
adapters: adapterStatuses.map((status) => ({
|
|
2478
|
+
adapter: status.adapterId,
|
|
2479
|
+
...(status.slot ? { slot: status.slot } : {}),
|
|
2480
|
+
profile: status.profile,
|
|
2481
|
+
status: status.status,
|
|
2482
|
+
...(status.method ? { method: status.method } : {}),
|
|
2483
|
+
...(status.updatedAt ? { updatedAt: status.updatedAt } : {}),
|
|
2484
|
+
})),
|
|
2485
|
+
};
|
|
2486
|
+
}
|
|
2487
|
+
function formatLogEntry(entry) {
|
|
2488
|
+
if (entry.kind === "version") {
|
|
2489
|
+
return `${entry.createdAt}\tversion\t${entry.id}\tfiles=${entry.fileCount}\t${entry.message}`;
|
|
2490
|
+
}
|
|
2491
|
+
const score = entry.score === undefined ? "n/a" : entry.score.toFixed(3);
|
|
2492
|
+
return `${entry.createdAt}\trun\t${entry.id}\t${entry.status}\tversion=${entry.versionId}\tskill=${entry.skillName}\tagent=${entry.agentName}\tscore=${score}`;
|
|
2493
|
+
}
|
|
2494
|
+
function splitShowRef(ref) {
|
|
2495
|
+
const index = ref.indexOf(":");
|
|
2496
|
+
if (index === -1) {
|
|
2497
|
+
return [ref, null];
|
|
2498
|
+
}
|
|
2499
|
+
return [ref.slice(0, index), ref.slice(index + 1)];
|
|
2500
|
+
}
|
|
2501
|
+
async function fileForRunOrJobRef(core, objectRef, requestedPath) {
|
|
2502
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2503
|
+
const run = snapshot.runs.find((entry) => entry.id === objectRef);
|
|
2504
|
+
const job = snapshot.jobs.find((entry) => entry.id === objectRef);
|
|
2505
|
+
if (!run && !job) {
|
|
2506
|
+
return null;
|
|
2507
|
+
}
|
|
2508
|
+
const traceIds = run?.traceIds ?? job?.traceIds ?? [];
|
|
2509
|
+
const traces = snapshot.traces.filter((trace) => traceIds.includes(trace.id));
|
|
2510
|
+
for (const trace of traces) {
|
|
2511
|
+
const file = findShowFile(trace.files, requestedPath);
|
|
2512
|
+
if (file) {
|
|
2513
|
+
return file;
|
|
2514
|
+
}
|
|
2515
|
+
}
|
|
2516
|
+
throw new WorkbenchCodedError("ref_not_found", `File not found in ${objectRef}: ${requestedPath}`, {
|
|
2517
|
+
remediation: `Run workbench show ${objectRef}.`,
|
|
2518
|
+
subject: { ref: objectRef, path: requestedPath },
|
|
2519
|
+
exitCode: 1,
|
|
2520
|
+
});
|
|
2521
|
+
}
|
|
2522
|
+
function evidenceDetailsForRunOrJob(snapshot, ref) {
|
|
2523
|
+
const run = snapshot.runs.find((entry) => entry.id === ref);
|
|
2524
|
+
const job = snapshot.jobs.find((entry) => entry.id === ref);
|
|
2525
|
+
const jobs = run
|
|
2526
|
+
? snapshot.jobs.filter((entry) => entry.runId === run.id)
|
|
2527
|
+
: job ? [job] : [];
|
|
2528
|
+
return jobs.flatMap((entry) => {
|
|
2529
|
+
const detail = workbenchJobEvidenceForSnapshot(snapshot, {
|
|
2530
|
+
runId: entry.runId,
|
|
2531
|
+
jobId: entry.id,
|
|
2532
|
+
});
|
|
2533
|
+
return detail ? [detail] : [];
|
|
2534
|
+
}).filter((detail) => detail.executions.some((execution) => execution.sessions.length > 0 ||
|
|
2535
|
+
execution.trace.spans.length > 0 ||
|
|
2536
|
+
execution.trace.events.length > 0 ||
|
|
2537
|
+
execution.trace.summaries.length > 0));
|
|
2538
|
+
}
|
|
2539
|
+
function findShowFile(files, requestedPath) {
|
|
2540
|
+
const normalized = requestedPath.replace(/\\/gu, "/");
|
|
2541
|
+
return files.find((file) => file.path === normalized) ??
|
|
2542
|
+
files.find((file) => file.path.endsWith(`/${normalized}`)) ??
|
|
2543
|
+
files.find((file) => path.basename(file.path) === normalized) ??
|
|
2544
|
+
null;
|
|
2545
|
+
}
|
|
2546
|
+
function fileListing(kind, id, files) {
|
|
2547
|
+
return {
|
|
2548
|
+
kind,
|
|
2549
|
+
id,
|
|
2550
|
+
fileCount: files.length,
|
|
2551
|
+
files: files.map(fileSummary),
|
|
2552
|
+
};
|
|
2553
|
+
}
|
|
2554
|
+
function formatFileListing(kind, id, files) {
|
|
2555
|
+
return [`${kind}\t${id}\tfiles=${files.length}`, ...files.map((file) => file.path)].join("\n");
|
|
2556
|
+
}
|
|
2557
|
+
async function traceIdForCaseSource(core, ref) {
|
|
2558
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2559
|
+
const trace = snapshot.traces.find((entry) => entry.id === ref);
|
|
2560
|
+
if (trace) {
|
|
2561
|
+
return trace.id;
|
|
2562
|
+
}
|
|
2563
|
+
const run = snapshot.runs.find((entry) => entry.id === ref);
|
|
2564
|
+
const job = snapshot.jobs.find((entry) => entry.id === ref);
|
|
2565
|
+
const traceId = run?.traceIds[0] ?? job?.traceIds[0];
|
|
2566
|
+
if (traceId) {
|
|
2567
|
+
return traceId;
|
|
2568
|
+
}
|
|
2569
|
+
throw new WorkbenchCodedError("ref_not_found", `Run, job, or trace not found: ${ref}`, {
|
|
2570
|
+
remediation: "Run workbench log, then workbench case add RUN_ID.",
|
|
2571
|
+
subject: { ref },
|
|
2572
|
+
exitCode: 1,
|
|
2573
|
+
});
|
|
2574
|
+
}
|
|
2575
|
+
async function evalDeltas(core, runs) {
|
|
2576
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
2577
|
+
return runs.map((run) => {
|
|
2578
|
+
const previous = snapshot.runs
|
|
2579
|
+
.filter((candidate) => candidate.id !== run.id &&
|
|
2580
|
+
candidate.skillName === run.skillName &&
|
|
2581
|
+
candidate.agentName === run.agentName &&
|
|
2582
|
+
typeof candidate.score === "number" &&
|
|
2583
|
+
candidate.createdAt < run.createdAt)
|
|
2584
|
+
.sort((left, right) => right.createdAt.localeCompare(left.createdAt))[0];
|
|
2585
|
+
return {
|
|
2586
|
+
runId: run.id,
|
|
2587
|
+
versionId: run.versionId,
|
|
2588
|
+
skillName: run.skillName,
|
|
2589
|
+
agentName: run.agentName,
|
|
2590
|
+
...(run.score !== undefined ? { score: run.score } : {}),
|
|
2591
|
+
...(previous?.score !== undefined ? { previousScore: previous.score } : {}),
|
|
2592
|
+
...(run.score !== undefined && previous?.score !== undefined ? { delta: run.score - previous.score } : {}),
|
|
2593
|
+
};
|
|
2594
|
+
});
|
|
2595
|
+
}
|
|
2596
|
+
function formatEvalDelta(delta) {
|
|
2597
|
+
const score = delta.score === undefined ? "n/a" : delta.score.toFixed(3);
|
|
2598
|
+
if (delta.previousScore === undefined || delta.delta === undefined) {
|
|
2599
|
+
return `${delta.skillName} ${delta.versionId} ${score} (was n/a)`;
|
|
2600
|
+
}
|
|
2601
|
+
const sign = delta.delta >= 0 ? "+" : "";
|
|
2602
|
+
return `${delta.skillName} ${delta.versionId} ${score} (was ${delta.previousScore.toFixed(3)}, ${sign}${delta.delta.toFixed(3)})`;
|
|
2603
|
+
}
|
|
2604
|
+
function evalSuccessNextCommands(runs) {
|
|
2605
|
+
return runs.length > 0 ? ["workbench publish"] : ["workbench eval"];
|
|
2606
|
+
}
|
|
2607
|
+
function formatStatusSnapshot(status) {
|
|
2608
|
+
const lines = [
|
|
2609
|
+
`Root: ${status.project.root}`,
|
|
2610
|
+
`Initialized: ${status.project.initialized ? "yes" : "no"}`,
|
|
2611
|
+
...(status.project.currentVersionId ? [`Current version: ${status.project.currentVersionId}`] : []),
|
|
2612
|
+
...(status.project.defaultSkill ? [`Default skill: ${status.project.defaultSkill}`] : []),
|
|
2613
|
+
...(status.project.defaultAgent ? [`Default agent: ${status.project.defaultAgent}`] : []),
|
|
2614
|
+
`Runs: ${status.runs.total}${status.runs.lastStatus ? ` (last ${status.runs.lastStatus})` : ""}`,
|
|
2615
|
+
`Workbench Cloud: ${status.auth?.workbenchCloud.status ?? "not_authenticated"}${status.auth?.workbenchCloud.baseUrl ? ` ${status.auth.workbenchCloud.baseUrl}` : ""}`,
|
|
2616
|
+
...(status.remotes.length > 0 ? ["Remotes:", ...status.remotes.flatMap((remote) => {
|
|
2617
|
+
const publication = remote.publication.status === "published"
|
|
2618
|
+
? [
|
|
2619
|
+
"publication=published",
|
|
2620
|
+
remote.publication.visibility ? `visibility=${remote.publication.visibility}` : undefined,
|
|
2621
|
+
remote.publication.versionId ? `version=${remote.publication.versionId}` : undefined,
|
|
2622
|
+
remote.publication.installUrl ? `install=${remote.publication.installUrl}` : undefined,
|
|
2623
|
+
remote.publication.pinnedInstallUrl ? `pinned=${remote.publication.pinnedInstallUrl}` : undefined,
|
|
2624
|
+
].filter(Boolean).join("\t")
|
|
2625
|
+
: "publication=unpublished";
|
|
2626
|
+
return [
|
|
2627
|
+
` ${remote.name}\tkind=${remote.kind}\tsync=${remote.sync.status}\turl=${remote.url}\t${publication}`,
|
|
2628
|
+
...(remote.sync.status === "error" && remote.sync.lastError
|
|
2629
|
+
? [
|
|
2630
|
+
` error[${remote.sync.lastError.code}]: ${remote.sync.lastError.message}`,
|
|
2631
|
+
...(remote.sync.lastAttemptAt ? [` last attempt: ${remote.sync.lastAttemptAt}`] : []),
|
|
2632
|
+
...(remote.sync.nextCommand ? [` next: ${remote.sync.nextCommand}`] : []),
|
|
2633
|
+
]
|
|
2634
|
+
: []),
|
|
2635
|
+
];
|
|
2636
|
+
})] : ["Remotes: none"]),
|
|
2637
|
+
...(status.next[0] ? [`next: ${status.next[0]}`] : []),
|
|
2638
|
+
];
|
|
2639
|
+
return lines.join("\n");
|
|
1396
2640
|
}
|
|
1397
2641
|
function formatVersion(version) {
|
|
1398
2642
|
return `${version.id}\t${version.hash.slice(0, 12)}\t${version.message}`;
|
|
1399
2643
|
}
|
|
2644
|
+
function versionSummary(version) {
|
|
2645
|
+
return {
|
|
2646
|
+
id: version.id,
|
|
2647
|
+
hash: version.hash,
|
|
2648
|
+
message: version.message,
|
|
2649
|
+
parentIds: version.parentIds,
|
|
2650
|
+
createdAt: version.createdAt,
|
|
2651
|
+
fileCount: version.files.length,
|
|
2652
|
+
};
|
|
2653
|
+
}
|
|
1400
2654
|
function formatAgent(agent) {
|
|
1401
2655
|
return `${agent.name}\t${agent.adapter}${agent.model ? `\t${agent.model}` : ""}`;
|
|
1402
2656
|
}
|
|
@@ -1419,14 +2673,14 @@ function formatJob(job) {
|
|
|
1419
2673
|
return `${job.id}\trun=${job.runId}\tcase=${job.caseId}\tsample=${job.sample}\t${job.status}\tscore=${score}\tduration=${duration}`;
|
|
1420
2674
|
}
|
|
1421
2675
|
function formatComparison(comparison) {
|
|
1422
|
-
const lines = ["version\tskill\tagent\tscore\
|
|
2676
|
+
const lines = ["version\tskill\tagent\tstatus\tscore\tcost\tlatency\trun"];
|
|
1423
2677
|
for (const cell of comparison.cells) {
|
|
1424
2678
|
lines.push([
|
|
1425
2679
|
cell.versionId,
|
|
1426
2680
|
cell.skillName,
|
|
1427
|
-
cell.agentName
|
|
2681
|
+
`${cell.agentName}@${shortObjectId(cell.agentHash)}`,
|
|
2682
|
+
cell.status ?? "not-run",
|
|
1428
2683
|
cell.score === undefined ? "n/a" : cell.score.toFixed(3),
|
|
1429
|
-
cell.automationReadiness?.label ?? "n/a",
|
|
1430
2684
|
cell.costUsd === undefined ? "n/a" : `$${cell.costUsd.toFixed(4)}`,
|
|
1431
2685
|
cell.latencyMs === undefined ? "n/a" : `${cell.latencyMs}ms`,
|
|
1432
2686
|
cell.runId ?? "n/a",
|
|
@@ -1434,6 +2688,9 @@ function formatComparison(comparison) {
|
|
|
1434
2688
|
}
|
|
1435
2689
|
return lines.join("\n");
|
|
1436
2690
|
}
|
|
2691
|
+
function shortObjectId(id) {
|
|
2692
|
+
return id.length > 12 ? id.slice(0, 12) : id;
|
|
2693
|
+
}
|
|
1437
2694
|
function formatTrace(trace) {
|
|
1438
2695
|
const result = asRecord(trace.result);
|
|
1439
2696
|
const status = typeof result?.status === "string" ? result.status : undefined;
|
|
@@ -1448,9 +2705,62 @@ function formatTrace(trace) {
|
|
|
1448
2705
|
`files=${trace.files.length}${files ? ` (${files}${trace.files.length > 5 ? ",..." : ""})` : ""}`,
|
|
1449
2706
|
].filter(Boolean).join("\t");
|
|
1450
2707
|
}
|
|
2708
|
+
function traceSummary(trace) {
|
|
2709
|
+
const result = asRecord(trace.result);
|
|
2710
|
+
return {
|
|
2711
|
+
id: trace.id,
|
|
2712
|
+
runId: trace.runId,
|
|
2713
|
+
...(trace.jobId ? { jobId: trace.jobId } : {}),
|
|
2714
|
+
versionId: trace.versionId,
|
|
2715
|
+
skillName: trace.skillName,
|
|
2716
|
+
agentName: trace.agentName,
|
|
2717
|
+
createdAt: trace.createdAt,
|
|
2718
|
+
...(typeof result?.status === "string" ? { status: result.status } : {}),
|
|
2719
|
+
...(typeof result?.score === "number" ? { score: result.score } : {}),
|
|
2720
|
+
...(typeof result?.error === "string" ? { error: singleLine(result.error) } : {}),
|
|
2721
|
+
fileCount: trace.files.length,
|
|
2722
|
+
files: trace.files.map(fileSummary),
|
|
2723
|
+
};
|
|
2724
|
+
}
|
|
2725
|
+
function formatTraceDetail(detail) {
|
|
2726
|
+
return detail.executions.map((execution) => {
|
|
2727
|
+
const sessionLabels = execution.sessions.map((session) => session.label).join(",");
|
|
2728
|
+
return [
|
|
2729
|
+
`${execution.id}\trun=${detail.runId}\tjobs=${execution.jobIds.join(",")}\tstatus=${execution.status}`,
|
|
2730
|
+
`events=${execution.trace.events.length}`,
|
|
2731
|
+
`spans=${execution.trace.spans.length}`,
|
|
2732
|
+
`summaries=${execution.trace.summaries.length}`,
|
|
2733
|
+
sessionLabels ? `sessions=${sessionLabels}` : undefined,
|
|
2734
|
+
].filter(Boolean).join("\t");
|
|
2735
|
+
}).join("\n");
|
|
2736
|
+
}
|
|
1451
2737
|
function formatArtifact(artifact) {
|
|
1452
2738
|
return `${artifact.id}\trun=${artifact.runId}\tjob=${artifact.jobId}\t${artifact.kind}\tfiles=${artifact.files.length}`;
|
|
1453
2739
|
}
|
|
2740
|
+
function artifactSummary(artifact) {
|
|
2741
|
+
return {
|
|
2742
|
+
id: artifact.id,
|
|
2743
|
+
runId: artifact.runId,
|
|
2744
|
+
jobId: artifact.jobId,
|
|
2745
|
+
kind: artifact.kind,
|
|
2746
|
+
fileCount: artifact.files.length,
|
|
2747
|
+
files: artifact.files.map(fileSummary),
|
|
2748
|
+
};
|
|
2749
|
+
}
|
|
2750
|
+
function fileSummary(file) {
|
|
2751
|
+
return {
|
|
2752
|
+
path: file.path,
|
|
2753
|
+
...(file.kind ? { kind: file.kind } : {}),
|
|
2754
|
+
...(file.encoding ? { encoding: file.encoding } : {}),
|
|
2755
|
+
...(file.executable !== undefined ? { executable: file.executable } : {}),
|
|
2756
|
+
bytes: surfaceFileByteLength(file),
|
|
2757
|
+
};
|
|
2758
|
+
}
|
|
2759
|
+
function surfaceFileByteLength(file) {
|
|
2760
|
+
return file.encoding === "base64"
|
|
2761
|
+
? Buffer.byteLength(file.content, "base64")
|
|
2762
|
+
: Buffer.byteLength(file.content, "utf8");
|
|
2763
|
+
}
|
|
1454
2764
|
function formatSession(session) {
|
|
1455
2765
|
return `${session.id}\t${session.source}\t${session.updatedAt}\t${session.bytes}b\t${session.path}${session.title ? `\t${session.title}` : ""}`;
|
|
1456
2766
|
}
|
|
@@ -1473,6 +2783,9 @@ function formatShow(value) {
|
|
|
1473
2783
|
function isSurfaceFile(value) {
|
|
1474
2784
|
return Boolean(value && typeof value === "object" && "content" in value && typeof value.content === "string");
|
|
1475
2785
|
}
|
|
2786
|
+
function singleLine(value) {
|
|
2787
|
+
return value.replace(/\s+/gu, " ").trim();
|
|
2788
|
+
}
|
|
1476
2789
|
function asRecord(value) {
|
|
1477
2790
|
return value && typeof value === "object" && !Array.isArray(value)
|
|
1478
2791
|
? value
|