@workbench-ai/workbench 0.0.74 → 0.0.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/fanout.js CHANGED
@@ -34,7 +34,7 @@ export async function fanOutSkill(name, options) {
34
34
  "--global",
35
35
  "--yes",
36
36
  ];
37
- const child = await runNode(args, scrubAgentRuntimeEnv(process.env), FANOUT_TIMEOUT_MS);
37
+ const child = await runNode(args, minimalChildEnv(process.env), FANOUT_TIMEOUT_MS);
38
38
  const { linkedAgents, additionalAgents } = parseFanOutAgents(`${child.stdout}\n${child.stderr}`);
39
39
  if (child.timedOut) {
40
40
  return {
@@ -143,29 +143,40 @@ function runNode(args, env, timeoutMs) {
143
143
  });
144
144
  });
145
145
  }
146
- function scrubAgentRuntimeEnv(env) {
147
- const next = { ...env };
148
- for (const key of [
149
- "AI_AGENT",
150
- "CODEX_SANDBOX",
151
- "CODEX_CI",
152
- "CODEX_THREAD_ID",
153
- "CURSOR_TRACE_ID",
154
- "CURSOR_AGENT",
155
- "CURSOR_EXTENSION_HOST_ROLE",
156
- "GEMINI_CLI",
157
- "ANTIGRAVITY_AGENT",
158
- "AUGMENT_AGENT",
159
- "OPENCODE_CLIENT",
160
- "CLAUDECODE",
161
- "CLAUDE_CODE",
162
- "CLAUDE_CODE_IS_COWORK",
163
- "REPL_ID",
164
- "COPILOT_MODEL",
165
- "COPILOT_ALLOW_ALL",
166
- "COPILOT_GITHUB_TOKEN",
167
- ]) {
168
- delete next[key];
146
+ // The skills CLI changes behavior when it detects it is running inside an
147
+ // agent (via @vercel/detect-agent reading agent-specific environment
148
+ // variables). Fan-out must always run in machine mode, so the child gets a
149
+ // minimal allowlisted environment instead of a hand-maintained blacklist of
150
+ // every agent's marker variables.
151
+ function minimalChildEnv(env) {
152
+ const allowed = [
153
+ "PATH",
154
+ "HOME",
155
+ "USER",
156
+ "SHELL",
157
+ "TMPDIR",
158
+ "TMP",
159
+ "TEMP",
160
+ "LANG",
161
+ "NODE_EXTRA_CA_CERTS",
162
+ "HTTP_PROXY",
163
+ "HTTPS_PROXY",
164
+ "NO_PROXY",
165
+ "http_proxy",
166
+ "https_proxy",
167
+ "no_proxy",
168
+ "SYSTEMROOT",
169
+ "SystemRoot",
170
+ "COMSPEC",
171
+ "APPDATA",
172
+ "LOCALAPPDATA",
173
+ "USERPROFILE",
174
+ ];
175
+ const next = {};
176
+ for (const key of Object.keys(env)) {
177
+ if (allowed.includes(key) || key.startsWith("XDG_") || key.startsWith("LC_")) {
178
+ next[key] = env[key];
179
+ }
169
180
  }
170
181
  return next;
171
182
  }
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAiEA,MAAM,WAAW,KAAK;IACpB,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;CAC/B;AAuTD,wBAAsB,MAAM,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,EAAE,EAAE,GAAE,KAGzD,GAAG,OAAO,CAAC,MAAM,CAAC,CAsMlB"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAgEA,MAAM,WAAW,KAAK;IACpB,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;CAC/B;AAuTD,wBAAsB,MAAM,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,EAAE,EAAE,GAAE,KAGzD,GAAG,OAAO,CAAC,MAAM,CAAC,CAoMlB"}
package/dist/index.js CHANGED
@@ -4,7 +4,7 @@ import { createRequire } from "node:module";
4
4
  import os from "node:os";
5
5
  import path from "node:path";
6
6
  import { gzipSync } from "node:zlib";
7
- import { addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchInspectionSnapshot, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, prepareWorkbenchCloudEvalRequest, prepareWorkbenchCloudImproveRequest, publishWorkbenchVersion, removeWorkbenchAgent, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchSkillImproveCanUseQueuedAdapter, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
7
+ import { addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchInspectionSnapshot, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, prepareWorkbenchCloudEvalRequest, prepareWorkbenchCloudImproveRequest, publishWorkbenchVersion, removeWorkbenchAgent, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
8
8
  import { normalizeWorkbenchSkillName } from "@workbench-ai/workbench-contract";
9
9
  import { emitError, emitResult } from "./output.js";
10
10
  import { fanOutSkill, manualFanOutCommand } from "./fanout.js";
@@ -375,12 +375,10 @@ export async function runCli(argv, io = {
375
375
  if (parsed.flags.cloud === true) {
376
376
  return await handleCloudImprove(parsed, io);
377
377
  }
378
- const improverAgent = await resolveLocalImproverAgent(parsed, core);
379
378
  const result = await improveWorkbenchSkill({
380
379
  ...core,
381
380
  skill: stringFlag(parsed, "skills"),
382
381
  agent: stringFlag(parsed, "agents"),
383
- ...(improverAgent ? { improverAgent } : {}),
384
382
  budget: intFlag(parsed, "budget"),
385
383
  samples: intFlag(parsed, "samples"),
386
384
  });
@@ -916,10 +914,7 @@ async function handleInstall(parsed, io) {
916
914
  next: next,
917
915
  ...(parsed.flags["dry-run"] === true ? { dryRun: true } : {}),
918
916
  }, parsed, io, () => [
919
- parsed.flags["dry-run"] === true
920
- ? `Would install ${result.directoryName} to ${result.destination}: filesCopied=${result.filesCopied}`
921
- : `Installed ${result.directoryName}: ${result.result}`,
922
- ` machine\t${result.previous}\t${result.destination}`,
917
+ formatInstallOutcome(result, parsed.flags["dry-run"] === true),
923
918
  formatFanOut(fanout),
924
919
  ...(next ? [`next: ${next}`] : []),
925
920
  ].join("\n"));
@@ -1015,6 +1010,21 @@ function installNextCommand(fanout) {
1015
1010
  ? fanout.command
1016
1011
  : null;
1017
1012
  }
1013
+ function formatInstallOutcome(result, dryRun) {
1014
+ if (dryRun) {
1015
+ return `Would install ${result.directoryName} to ${result.destination} (${formatFileCount(result.filesCopied)}).`;
1016
+ }
1017
+ if (result.result === "unchanged") {
1018
+ return `Already installed ${result.directoryName} at ${result.destination} (unchanged).`;
1019
+ }
1020
+ const detail = result.previous === "overwritten"
1021
+ ? `overwrote existing copy, ${formatFileCount(result.filesCopied)}`
1022
+ : formatFileCount(result.filesCopied);
1023
+ return `Installed ${result.directoryName} to ${result.destination} (${detail}).`;
1024
+ }
1025
+ function formatFileCount(count) {
1026
+ return `${count} ${count === 1 ? "file" : "files"}`;
1027
+ }
1018
1028
  function fanOutToJson(fanout) {
1019
1029
  return {
1020
1030
  status: fanout.status,
@@ -1067,83 +1077,139 @@ function withTimeout(promise, timeoutMs) {
1067
1077
  async function startCloudExecution(command, parsed, io) {
1068
1078
  const root = dirFlag(parsed) ?? process.cwd();
1069
1079
  const showProgress = parsed.flags.json !== true;
1070
- const remote = await ensureCloudRemoteForExecution(root, parsed);
1071
- const source = parseWorkbenchInstallSource(remote.url);
1072
- if (!source) {
1073
- throw new WorkbenchCodedError("remote_invalid_url", `Workbench remote is not a Cloud skill URL: ${remote.url}`, {
1074
- remediation: "Run workbench publish to recreate the Workbench Cloud link.",
1075
- subject: { remote: remote.name, url: remote.url },
1076
- exitCode: 2,
1077
- });
1078
- }
1079
- const token = await workbenchCloudToken({ baseUrl: source.baseUrl });
1080
- if (!token) {
1081
- throw new WorkbenchCodedError("auth_required", `workbench ${command} --cloud requires Workbench Cloud auth.`, {
1082
- remediation: `Run workbench login --base-url ${source.baseUrl}.`,
1083
- exitCode: 1,
1084
- });
1085
- }
1086
- const core = { dir: root, authToken: token };
1087
- const request = command === "eval"
1088
- ? await prepareWorkbenchCloudEvalRequest({
1089
- ...core,
1090
- skill: stringFlag(parsed, "skills"),
1091
- agent: stringFlag(parsed, "agents"),
1092
- samples: intFlag(parsed, "samples"),
1093
- })
1094
- : await prepareWorkbenchCloudImproveRequest({
1095
- ...core,
1096
- skill: stringFlag(parsed, "skills"),
1097
- agent: stringFlag(parsed, "agents"),
1098
- samples: intFlag(parsed, "samples"),
1099
- budget: intFlag(parsed, "budget"),
1100
- });
1101
- const syncBefore = await syncWorkbenchRemote({ ...core, remote: remote.name });
1102
- const skillId = await resolveCloudSkillId(source);
1103
- const response = await apiRequest(`/api/workbench/skills/${encodeURIComponent(skillId)}${command === "improve" ? "/improve" : "/runs"}`, { method: "POST", body: cloudExecutionRequestBody(command, request) }, source.baseUrl);
1104
- const runs = response.runs ?? [];
1105
- if (runs.length === 0) {
1106
- throw new WorkbenchCodedError("cloud_run_missing", `Workbench Cloud did not return a run for ${command}.`, {
1107
- retryable: true,
1108
- remediation: "Run workbench log --runs.",
1109
- subject: { remote: remote.name, skillId },
1110
- exitCode: 1,
1080
+ const interrupt = createCloudInterruptController(command, io, showProgress);
1081
+ try {
1082
+ writeCloudProgress(io, `workbench cloud: preparing hosted ${command}.`, showProgress);
1083
+ const remote = await cloudPreScheduleStep(command, interrupt, ensureCloudRemoteForExecution(root, parsed));
1084
+ const source = parseWorkbenchInstallSource(remote.url);
1085
+ if (!source) {
1086
+ throw new WorkbenchCodedError("remote_invalid_url", `Workbench remote is not a Cloud skill URL: ${remote.url}`, {
1087
+ remediation: "Run workbench publish to recreate the Workbench Cloud link.",
1088
+ subject: { remote: remote.name, url: remote.url },
1089
+ exitCode: 2,
1090
+ });
1091
+ }
1092
+ const token = await workbenchCloudToken({ baseUrl: source.baseUrl });
1093
+ if (!token) {
1094
+ throw new WorkbenchCodedError("auth_required", `workbench ${command} --cloud requires Workbench Cloud auth.`, {
1095
+ remediation: `Run workbench login --base-url ${source.baseUrl}.`,
1096
+ exitCode: 1,
1097
+ });
1098
+ }
1099
+ const core = { dir: root, authToken: token };
1100
+ writeCloudProgress(io, "workbench cloud: preparing current source.", showProgress);
1101
+ const request = command === "eval"
1102
+ ? await cloudPreScheduleStep(command, interrupt, prepareWorkbenchCloudEvalRequest({
1103
+ ...core,
1104
+ skill: stringFlag(parsed, "skills"),
1105
+ agent: stringFlag(parsed, "agents"),
1106
+ samples: intFlag(parsed, "samples"),
1107
+ }))
1108
+ : await cloudPreScheduleStep(command, interrupt, prepareWorkbenchCloudImproveRequest({
1109
+ ...core,
1110
+ skill: stringFlag(parsed, "skills"),
1111
+ agent: stringFlag(parsed, "agents"),
1112
+ samples: intFlag(parsed, "samples"),
1113
+ budget: intFlag(parsed, "budget"),
1114
+ }));
1115
+ writeCloudProgress(io, "workbench cloud: syncing source to cloud.", showProgress);
1116
+ const syncBefore = await cloudPreScheduleStep(command, interrupt, syncWorkbenchRemote({ ...core, remote: remote.name }));
1117
+ writeCloudProgress(io, `workbench cloud: scheduling hosted ${command}.`, showProgress);
1118
+ const skillId = await cloudPreScheduleStep(command, interrupt, resolveCloudSkillId(source));
1119
+ const response = await cloudPreScheduleStep(command, interrupt, apiRequest(`/api/workbench/skills/${encodeURIComponent(skillId)}${command === "improve" ? "/improve" : "/runs"}`, { method: "POST", body: cloudExecutionRequestBody(command, request) }, source.baseUrl));
1120
+ const runs = response.runs ?? [];
1121
+ if (runs.length === 0) {
1122
+ throw new WorkbenchCodedError("cloud_run_missing", `Workbench Cloud did not return a run for ${command}.`, {
1123
+ retryable: true,
1124
+ remediation: "Run workbench log --runs.",
1125
+ subject: { remote: remote.name, skillId },
1126
+ exitCode: 1,
1127
+ });
1128
+ }
1129
+ const initialRunIds = runs.map((run) => run.id);
1130
+ interrupt.setRunIds(initialRunIds);
1131
+ const syncAfterSchedule = await syncWorkbenchRemote({ ...core, remote: remote.name });
1132
+ writeCloudProgress(io, `workbench cloud: scheduled hosted ${command} on ${remote.url} (${formatCloudRunStatuses(runs)}).`, showProgress);
1133
+ writeCloudProgress(io, `workbench cloud: waiting for terminal status; press Ctrl-C to detach and resume with workbench show ${displayRef(initialRunIds[0] ?? "run")}.`, showProgress);
1134
+ const completed = await waitForCloudRuns({
1135
+ command,
1136
+ core,
1137
+ interrupt,
1138
+ io,
1139
+ progress: showProgress,
1140
+ remote,
1141
+ runs,
1142
+ source,
1143
+ skillId,
1144
+ initialSync: syncAfterSchedule,
1111
1145
  });
1146
+ return {
1147
+ core,
1148
+ remote,
1149
+ skillId,
1150
+ initialRunIds,
1151
+ runs: completed.runs,
1152
+ ...(completed.detached ? { detached: true } : {}),
1153
+ startVersionId: request.versionId,
1154
+ source,
1155
+ sync: {
1156
+ before: { pushed: syncBefore.pushed, pulled: syncBefore.pulled, upToDate: syncBefore.upToDate },
1157
+ after: { pushed: completed.sync.pushed, pulled: completed.sync.pulled, upToDate: completed.sync.upToDate },
1158
+ },
1159
+ };
1112
1160
  }
1113
- const initialRunIds = runs.map((run) => run.id);
1114
- writeCloudProgress(io, `workbench cloud: scheduled hosted ${command} on ${remote.url} (${formatCloudRunStatuses(runs)}).`, showProgress);
1115
- let initialSyncAfter = syncBefore;
1116
- try {
1117
- initialSyncAfter = await syncWorkbenchRemote({ ...core, remote: remote.name });
1161
+ finally {
1162
+ interrupt.dispose();
1118
1163
  }
1119
- catch (error) {
1120
- writeCloudProgress(io, `workbench cloud: sync while waiting failed; retrying (${oneLineExcerpt(errorMessage(error)) ?? "unknown error"}).`, showProgress);
1121
- }
1122
- writeCloudProgress(io, `workbench cloud: waiting for terminal status; press Ctrl-C to detach and resume with workbench show ${displayRef(initialRunIds[0] ?? "run")}.`, showProgress);
1123
- const completed = await waitForCloudRuns({
1124
- command,
1125
- core,
1126
- io,
1127
- progress: showProgress,
1128
- remote,
1129
- runs,
1130
- initialSync: initialSyncAfter,
1164
+ }
1165
+ function createCloudInterruptController(command, io, progress) {
1166
+ let interrupted = false;
1167
+ let runIds = [];
1168
+ let resolveSignal = () => undefined;
1169
+ const signal = new Promise((resolve) => {
1170
+ resolveSignal = resolve;
1131
1171
  });
1172
+ const onSigint = () => {
1173
+ interrupted = true;
1174
+ if (runIds.length > 0) {
1175
+ writeCloudProgress(io, `workbench cloud: detaching from hosted ${command} (${runIds.map(displayRef).join(", ")}).`, progress);
1176
+ }
1177
+ resolveSignal();
1178
+ };
1179
+ process.once("SIGINT", onSigint);
1132
1180
  return {
1133
- core,
1134
- remote,
1135
- skillId,
1136
- initialRunIds,
1137
- runs: completed.runs,
1138
- ...(completed.detached ? { detached: true } : {}),
1139
- startVersionId: request.versionId,
1140
- source,
1141
- sync: {
1142
- before: { pushed: syncBefore.pushed, pulled: syncBefore.pulled, upToDate: syncBefore.upToDate },
1143
- after: { pushed: completed.sync.pushed, pulled: completed.sync.pulled, upToDate: completed.sync.upToDate },
1181
+ signal,
1182
+ get interrupted() {
1183
+ return interrupted;
1184
+ },
1185
+ get runIds() {
1186
+ return runIds;
1187
+ },
1188
+ setRunIds(nextRunIds) {
1189
+ runIds = [...nextRunIds];
1190
+ },
1191
+ dispose() {
1192
+ process.off("SIGINT", onSigint);
1144
1193
  },
1145
1194
  };
1146
1195
  }
1196
+ async function cloudPreScheduleStep(command, interrupt, step) {
1197
+ if (interrupt.interrupted) {
1198
+ throw cloudCanceledBeforeRunIdError(command);
1199
+ }
1200
+ return await Promise.race([
1201
+ step,
1202
+ interrupt.signal.then(() => {
1203
+ throw cloudCanceledBeforeRunIdError(command);
1204
+ }),
1205
+ ]);
1206
+ }
1207
+ function cloudCanceledBeforeRunIdError(command) {
1208
+ return new WorkbenchCodedError("cloud_canceled", `Hosted ${command} was canceled before Workbench Cloud returned a run id.`, {
1209
+ remediation: `Run workbench ${command} --cloud.`,
1210
+ exitCode: 130,
1211
+ });
1212
+ }
1147
1213
  async function waitForCloudRuns(input) {
1148
1214
  const runIds = input.runs
1149
1215
  .map((run) => run.id)
@@ -1160,83 +1226,57 @@ async function waitForCloudRuns(input) {
1160
1226
  const pollIntervalMs = positiveIntEnv("WORKBENCH_CLOUD_RUN_POLL_INTERVAL_MS") ?? CLOUD_RUN_POLL_INTERVAL_MS;
1161
1227
  const deadline = Date.now() + timeoutMs;
1162
1228
  let runs = [...input.runs];
1163
- let interrupted = false;
1164
1229
  const startedAtMs = Date.now();
1165
1230
  let lastProgressAtMs = startedAtMs;
1166
- let lastSyncErrorMessage;
1167
- const onSigint = () => {
1168
- interrupted = true;
1169
- writeCloudProgress(input.io, `workbench cloud: detaching from hosted ${input.command} (${runIds.map(displayRef).join(", ")}).`, input.progress);
1170
- };
1171
- process.once("SIGINT", onSigint);
1172
1231
  const seenStatuses = new Map();
1173
- try {
1174
- while (true) {
1175
- const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(input.core);
1176
- const snapshotRuns = runIds
1177
- .map((id) => snapshot.runs.find((entry) => entry.id === id))
1178
- .filter((run) => Boolean(run));
1179
- if (snapshotRuns.length > 0) {
1180
- runs = runIds.map((id) => snapshotRuns.find((entry) => entry.id === id) ?? runs.find((entry) => entry.id === id))
1181
- .filter((run) => Boolean(run));
1182
- }
1183
- let wroteProgress = false;
1184
- const nowMs = Date.now();
1185
- for (const run of runs) {
1186
- const previous = seenStatuses.get(run.id);
1187
- if (previous !== run.status) {
1188
- seenStatuses.set(run.id, run.status);
1189
- writeCloudProgress(input.io, `workbench cloud: ${formatCloudRunState(run, startedAtMs, nowMs)}.`, input.progress);
1190
- wroteProgress = input.progress || wroteProgress;
1191
- }
1192
- }
1193
- if (runs.length === runIds.length && runs.every(isTerminalRun)) {
1194
- return { runs, sync };
1195
- }
1196
- if (wroteProgress) {
1197
- lastProgressAtMs = nowMs;
1198
- }
1199
- else if (input.progress && nowMs - lastProgressAtMs >= 60_000) {
1200
- writeCloudProgress(input.io, `workbench cloud: still waiting (${formatCloudRunStates(runs, startedAtMs, nowMs)}).`);
1201
- lastProgressAtMs = nowMs;
1202
- }
1203
- if (interrupted) {
1204
- return { runs, sync, detached: true };
1205
- }
1206
- if (Date.now() >= deadline) {
1207
- throw new WorkbenchCodedError("cloud_run_pending", "Hosted Workbench run is still running.", {
1208
- retryable: true,
1209
- remediation: runIds[0] ? `Run workbench show ${runIds[0]}.` : "Run workbench log --runs.",
1210
- subject: {
1211
- runIds,
1212
- statuses: Object.fromEntries(runs.map((run) => [run.id, run.status])),
1213
- },
1214
- exitCode: 1,
1215
- });
1216
- }
1217
- await sleep(pollIntervalMs);
1218
- if (interrupted) {
1219
- return { runs, sync, detached: true };
1220
- }
1221
- try {
1222
- sync = await syncWorkbenchRemote({ ...input.core, remote: input.remote.name });
1223
- if (lastSyncErrorMessage) {
1224
- writeCloudProgress(input.io, "workbench cloud: sync while waiting recovered.", input.progress);
1225
- lastSyncErrorMessage = undefined;
1226
- }
1227
- }
1228
- catch (error) {
1229
- const message = oneLineExcerpt(errorMessage(error)) ?? "unknown error";
1230
- if (message !== lastSyncErrorMessage) {
1231
- writeCloudProgress(input.io, `workbench cloud: sync while waiting failed; retrying (${message}).`, input.progress);
1232
- lastSyncErrorMessage = message;
1233
- }
1232
+ while (true) {
1233
+ runs = await fetchCloudRuns(input.source.baseUrl, input.skillId, runIds, runs);
1234
+ let wroteProgress = false;
1235
+ const nowMs = Date.now();
1236
+ for (const run of runs) {
1237
+ const previous = seenStatuses.get(run.id);
1238
+ if (previous !== run.status) {
1239
+ seenStatuses.set(run.id, run.status);
1240
+ writeCloudProgress(input.io, `workbench cloud: ${formatCloudRunState(run, startedAtMs, nowMs)}.`, input.progress);
1241
+ wroteProgress = input.progress || wroteProgress;
1234
1242
  }
1235
1243
  }
1244
+ if (runs.length === runIds.length && runs.every(isTerminalRun)) {
1245
+ sync = await syncWorkbenchRemote({ ...input.core, remote: input.remote.name });
1246
+ return { runs, sync };
1247
+ }
1248
+ if (wroteProgress) {
1249
+ lastProgressAtMs = nowMs;
1250
+ }
1251
+ else if (input.progress && nowMs - lastProgressAtMs >= 60_000) {
1252
+ writeCloudProgress(input.io, `workbench cloud: still waiting (${formatCloudRunStates(runs, startedAtMs, nowMs)}).`);
1253
+ lastProgressAtMs = nowMs;
1254
+ }
1255
+ if (input.interrupt.interrupted) {
1256
+ return { runs, sync, detached: true };
1257
+ }
1258
+ if (Date.now() >= deadline) {
1259
+ throw new WorkbenchCodedError("cloud_run_pending", "Hosted Workbench run is still running.", {
1260
+ retryable: true,
1261
+ remediation: runIds[0] ? `Run workbench show ${runIds[0]}.` : "Run workbench log --runs.",
1262
+ subject: {
1263
+ runIds,
1264
+ statuses: Object.fromEntries(runs.map((run) => [run.id, run.status])),
1265
+ },
1266
+ exitCode: 1,
1267
+ });
1268
+ }
1269
+ await Promise.race([sleep(pollIntervalMs), input.interrupt.signal]);
1270
+ if (input.interrupt.interrupted) {
1271
+ return { runs, sync, detached: true };
1272
+ }
1236
1273
  }
1237
- finally {
1238
- process.off("SIGINT", onSigint);
1239
- }
1274
+ }
1275
+ async function fetchCloudRuns(baseUrl, skillId, runIds, fallback) {
1276
+ const responses = await Promise.all(runIds.map((runId) => apiRequest(`/api/workbench/skills/${encodeURIComponent(skillId)}/runs/${encodeURIComponent(runId)}`, {}, baseUrl)));
1277
+ return runIds
1278
+ .map((runId, index) => responses[index]?.run ?? fallback.find((run) => run.id === runId))
1279
+ .filter((run) => Boolean(run));
1240
1280
  }
1241
1281
  function isTerminalRun(run) {
1242
1282
  return run.status === "succeeded" || run.status === "failed" || run.status === "canceled";
@@ -1353,15 +1393,11 @@ async function resolveCloudSkillId(source) {
1353
1393
  function cloudExecutionRequestBody(command, request) {
1354
1394
  return {
1355
1395
  versionId: request.versionId,
1356
- evalHash: request.evalHash,
1357
1396
  skill: request.skill,
1358
- skillBundleHash: request.skillBundleHash,
1359
1397
  agent: request.agent,
1360
- agentHash: request.agentHash,
1361
1398
  samples: request.samples,
1362
1399
  ...(command === "improve" ? {
1363
1400
  budget: request.budget,
1364
- evidenceTraceIds: request.evidenceTraceIds,
1365
1401
  } : {}),
1366
1402
  };
1367
1403
  }
@@ -2701,10 +2737,10 @@ async function workbenchMachineStatus(auth) {
2701
2737
  };
2702
2738
  }
2703
2739
  function scoredRunValue(run) {
2704
- return run.status === "succeeded" && typeof run.score === "number" ? run.score : undefined;
2740
+ return typeof run.score === "number" ? run.score : undefined;
2705
2741
  }
2706
2742
  function scoredJobValue(job) {
2707
- return job.status === "succeeded" && typeof job.score === "number" ? job.score : undefined;
2743
+ return typeof job.score === "number" ? job.score : undefined;
2708
2744
  }
2709
2745
  function snapshotHasWorkflowCase(snapshot) {
2710
2746
  const currentVersion = snapshotVersionByRef(snapshot, snapshot.status.currentVersionId ?? snapshot.refs.current ?? "");
@@ -2760,6 +2796,15 @@ async function statusWithCausalNext(status, auth, core, machine) {
2760
2796
  if (unpublishedCloudRemote) {
2761
2797
  return { ...status, next: "workbench publish" };
2762
2798
  }
2799
+ const currentVersionId = status.project.currentVersionId ?? snapshot?.status.currentVersionId ?? snapshot?.refs.current;
2800
+ const stalePublishedCloudRemote = status.remotes.find((remote) => remote.kind === "workbench-cloud" &&
2801
+ remote.publication.status === "published" &&
2802
+ remote.sync.status === "up_to_date" &&
2803
+ currentVersionId !== undefined &&
2804
+ remote.publication.versionId !== currentVersionId);
2805
+ if (canPublish && stalePublishedCloudRemote) {
2806
+ return { ...status, next: "workbench publish" };
2807
+ }
2763
2808
  const publishedCloudRemote = status.remotes.find((remote) => remote.kind === "workbench-cloud" &&
2764
2809
  remote.publication.status === "published" &&
2765
2810
  Boolean(remote.publication.installUrl));
@@ -2981,41 +3026,6 @@ function manifestOnly(value) {
2981
3026
  }
2982
3027
  return out;
2983
3028
  }
2984
- async function resolveLocalImproverAgent(parsed, core) {
2985
- if (stringFlag(parsed, "agents")) {
2986
- return undefined;
2987
- }
2988
- const agents = await listWorkbenchAgents(core).catch(() => []);
2989
- const status = await workbenchStatusSnapshot(core).catch(() => undefined);
2990
- const defaultAgentName = status?.project.defaultAgent ?? agents[0]?.name;
2991
- const defaultAgent = agents.find((agent) => agent.name === defaultAgentName);
2992
- if (defaultAgent && workbenchSkillImproveCanUseQueuedAdapter(defaultAgent)) {
2993
- return undefined;
2994
- }
2995
- const connected = await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).listStatus().catch(() => []);
2996
- const candidates = connected
2997
- .filter((entry) => entry.status === "connected" &&
2998
- (entry.adapterId === "claude" || entry.adapterId === "codex"))
2999
- .sort((left, right) => {
3000
- const adapterRank = (adapter) => adapter === "claude" ? 0 : adapter === "codex" ? 1 : 2;
3001
- return adapterRank(left.adapterId) - adapterRank(right.adapterId) ||
3002
- (Date.parse(right.updatedAt ?? "") || 0) - (Date.parse(left.updatedAt ?? "") || 0);
3003
- });
3004
- const selected = candidates[0];
3005
- if (!selected) {
3006
- throw new WorkbenchCodedError("auth_required", "workbench improve needs a connected improver.", {
3007
- remediation: "Run workbench login claude (or codex) to connect an improver.",
3008
- exitCode: 1,
3009
- });
3010
- }
3011
- return {
3012
- name: selected.adapterId,
3013
- adapter: selected.adapterId,
3014
- config: {
3015
- auth: selected.slot ? { [selected.slot]: selected.profile } : selected.profile,
3016
- },
3017
- };
3018
- }
3019
3029
  function formatLogEntry(entry) {
3020
3030
  if (entry.kind === "version") {
3021
3031
  return `${entry.createdAt}\tversion\t${displayRef(entry.id)}\tfiles=${entry.fileCount}\t${entry.message}`;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@workbench-ai/workbench",
3
- "version": "0.0.74",
3
+ "version": "0.0.76",
4
4
  "repository": {
5
5
  "type": "git",
6
6
  "url": "git+https://github.com/workbench-ai/workbench.git",
@@ -22,10 +22,10 @@
22
22
  "dependencies": {
23
23
  "skills": "1.5.11",
24
24
  "yaml": "^2.8.2",
25
- "@workbench-ai/workbench-built-in-adapters": "0.0.74",
26
- "@workbench-ai/workbench-contract": "0.0.74",
27
- "@workbench-ai/workbench-protocol": "0.0.74",
28
- "@workbench-ai/workbench-core": "0.0.74"
25
+ "@workbench-ai/workbench-built-in-adapters": "0.0.76",
26
+ "@workbench-ai/workbench-core": "0.0.76",
27
+ "@workbench-ai/workbench-contract": "0.0.76",
28
+ "@workbench-ai/workbench-protocol": "0.0.76"
29
29
  },
30
30
  "devDependencies": {
31
31
  "@tailwindcss/postcss": "^4.2.2",
@@ -36,7 +36,7 @@
36
36
  "react-dom": "^19.2.0",
37
37
  "typescript": "^5.9.2",
38
38
  "vitest": "^3.2.4",
39
- "@workbench-ai/workbench-ui": "0.0.74"
39
+ "@workbench-ai/workbench-ui": "0.0.76"
40
40
  },
41
41
  "scripts": {
42
42
  "build": "rm -rf dist && tsc -p tsconfig.json && chmod 755 dist/workbench.js && node ./scripts/build-dev-open-assets.mjs",