@workbench-ai/workbench 0.0.78 → 0.0.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +164 -29
- package/package.json +6 -6
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAkEA,MAAM,WAAW,KAAK;IACpB,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;CAC/B;AAuTD,wBAAsB,MAAM,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,EAAE,EAAE,GAAE,KAGzD,GAAG,OAAO,CAAC,MAAM,CAAC,CAoMlB"}
|
package/dist/index.js
CHANGED
|
@@ -4,7 +4,7 @@ import { createRequire } from "node:module";
|
|
|
4
4
|
import os from "node:os";
|
|
5
5
|
import path from "node:path";
|
|
6
6
|
import { gzipSync } from "node:zlib";
|
|
7
|
-
import { addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchInspectionSnapshot, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, prepareWorkbenchCloudEvalRequest, prepareWorkbenchCloudImproveRequest, publishWorkbenchVersion, removeWorkbenchAgent, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
|
|
7
|
+
import { addWorkbenchRemote, addWorkbenchAgent, compareWorkbench, createWorkbenchVersionRuntimeSnapshot, createWorkbenchInspectionSnapshot, createWorkbenchAdapterAuthBundle, createWorkbenchReadOnlyInspectionSnapshot, diffWorkbenchVersions, evalWorkbenchSkill, improveWorkbenchSkill, initWorkbenchSkill, listWorkbenchAgents, listWorkbenchVersions, localWorkbenchAdapterAuthStore, parseWorkbenchAdapterAuthTarget, prepareWorkbenchCloudEvalRequest, prepareWorkbenchCloudImproveRequest, publishWorkbenchVersion, removeWorkbenchAgent, showWorkbenchRef, switchWorkbenchVersion, syncWorkbenchRemote, workbenchJobEvidenceForSnapshot, workbenchStatusSnapshot, WorkbenchCodedError, WorkbenchUserError, } from "@workbench-ai/workbench-core";
|
|
8
8
|
import { normalizeWorkbenchSkillName } from "@workbench-ai/workbench-contract";
|
|
9
9
|
import { emitError, emitResult } from "./output.js";
|
|
10
10
|
import { fanOutSkill, manualFanOutCommand } from "./fanout.js";
|
|
@@ -582,13 +582,15 @@ async function handleShow(parsed, io) {
|
|
|
582
582
|
if (artifact) {
|
|
583
583
|
return output(fileListing("artifact", artifact.id, artifact.files), parsed, io, () => formatFileListing("artifact", artifact.id, artifact.files));
|
|
584
584
|
}
|
|
585
|
-
const
|
|
586
|
-
const
|
|
587
|
-
|
|
585
|
+
const selection = runOrJobEvidenceSelection(snapshot, objectRef);
|
|
586
|
+
const details = evidenceDetailsForSelection(snapshot, selection);
|
|
587
|
+
const evidenceFiles = evidenceFilesForSelection(snapshot, selection);
|
|
588
|
+
if (selection.run || selection.jobs.length > 0 || details.length > 0 || evidenceFiles.length > 0) {
|
|
588
589
|
return output({
|
|
590
|
+
jobs: selection.jobs.map(jobEvidenceSummary),
|
|
589
591
|
details: details.map(evidenceDetailSummary),
|
|
590
592
|
files: evidenceFiles.map(fileSummary),
|
|
591
|
-
}, parsed, io, () => formatRunOrJobEvidence(details, evidenceFiles));
|
|
593
|
+
}, parsed, io, () => formatRunOrJobEvidence(selection.jobs, details, evidenceFiles));
|
|
592
594
|
}
|
|
593
595
|
const value = await showWorkbenchRef(ref, core);
|
|
594
596
|
return output(value, parsed, io, () => formatShow(value));
|
|
@@ -723,6 +725,7 @@ const API_REQUEST_MAX_ATTEMPTS = 3;
|
|
|
723
725
|
const API_REQUEST_GZIP_THRESHOLD_BYTES = 1024 * 1024;
|
|
724
726
|
const CLOUD_RUN_TIMEOUT_MS = 30 * 60 * 1000;
|
|
725
727
|
const CLOUD_RUN_POLL_INTERVAL_MS = 3000;
|
|
728
|
+
const LOGIN_WAIT_TIMEOUT_SECONDS = 120;
|
|
726
729
|
async function handleLogin(parsed, io) {
|
|
727
730
|
const provider = optionalPositional(parsed, 1);
|
|
728
731
|
if (provider) {
|
|
@@ -739,16 +742,23 @@ async function handleLogin(parsed, io) {
|
|
|
739
742
|
}
|
|
740
743
|
if (parsed.flags["start-only"] === true && parsed.flags.wait === true) {
|
|
741
744
|
throw new WorkbenchCodedError("usage", "workbench login accepts only one of --start-only or --wait.", {
|
|
742
|
-
remediation:
|
|
745
|
+
remediation: `Run workbench login --start-only or workbench login --wait --timeout ${LOGIN_WAIT_TIMEOUT_SECONDS}.`,
|
|
743
746
|
exitCode: 2,
|
|
744
747
|
});
|
|
745
748
|
}
|
|
746
|
-
const startOnly = parsed.flags["start-only"] === true
|
|
749
|
+
const startOnly = parsed.flags["start-only"] === true ||
|
|
750
|
+
(parsed.flags["no-open"] === true && parsed.flags.wait !== true && parsed.flags.timeout === undefined);
|
|
747
751
|
const waitOnly = parsed.flags.wait === true;
|
|
748
752
|
const timeoutSeconds = intFlag(parsed, "timeout");
|
|
749
753
|
if (startOnly && timeoutSeconds !== undefined) {
|
|
750
754
|
throw new WorkbenchCodedError("usage", "workbench login --timeout only applies with --wait.", {
|
|
751
|
-
remediation:
|
|
755
|
+
remediation: `Run workbench login --start-only, then workbench login --wait --timeout ${LOGIN_WAIT_TIMEOUT_SECONDS}.`,
|
|
756
|
+
exitCode: 2,
|
|
757
|
+
});
|
|
758
|
+
}
|
|
759
|
+
if (waitOnly && timeoutSeconds === undefined) {
|
|
760
|
+
throw new WorkbenchCodedError("usage", "workbench login --wait requires --timeout N.", {
|
|
761
|
+
remediation: `Run workbench login --wait --timeout ${LOGIN_WAIT_TIMEOUT_SECONDS}.`,
|
|
752
762
|
exitCode: 2,
|
|
753
763
|
});
|
|
754
764
|
}
|
|
@@ -773,8 +783,8 @@ async function handleLogin(parsed, io) {
|
|
|
773
783
|
verificationUriComplete: record.verification_uri_complete,
|
|
774
784
|
userCode: record.user_code,
|
|
775
785
|
expiresAt: record.expiresAt,
|
|
776
|
-
resume:
|
|
777
|
-
}, parsed, io, () => `Open ${record.verification_uri_complete}\nCode: ${record.user_code}\nResume: workbench login --wait`);
|
|
786
|
+
resume: `workbench login --wait --timeout ${LOGIN_WAIT_TIMEOUT_SECONDS}`,
|
|
787
|
+
}, parsed, io, () => `Open ${record.verification_uri_complete}\nCode: ${record.user_code}\nResume: workbench login --wait --timeout ${LOGIN_WAIT_TIMEOUT_SECONDS}`);
|
|
778
788
|
}
|
|
779
789
|
await writePendingDeviceAuthorization(record);
|
|
780
790
|
if (freshAuthorization && !parsed.flags.json) {
|
|
@@ -1012,6 +1022,9 @@ function installNextCommand(fanout) {
|
|
|
1012
1022
|
}
|
|
1013
1023
|
function formatInstallOutcome(result, dryRun) {
|
|
1014
1024
|
if (dryRun) {
|
|
1025
|
+
if (result.result === "unchanged") {
|
|
1026
|
+
return `Already installed ${result.directoryName} at ${result.destination} (unchanged; dry run made no changes).`;
|
|
1027
|
+
}
|
|
1015
1028
|
return `Would install ${result.directoryName} to ${result.destination} (${formatFileCount(result.filesCopied)}).`;
|
|
1016
1029
|
}
|
|
1017
1030
|
if (result.result === "unchanged") {
|
|
@@ -1075,7 +1088,7 @@ function withTimeout(promise, timeoutMs) {
|
|
|
1075
1088
|
}
|
|
1076
1089
|
async function startCloudExecution(command, parsed, io) {
|
|
1077
1090
|
const root = dirFlag(parsed) ?? process.cwd();
|
|
1078
|
-
const showProgress =
|
|
1091
|
+
const showProgress = true;
|
|
1079
1092
|
const interrupt = createCloudInterruptController(command, io, showProgress);
|
|
1080
1093
|
try {
|
|
1081
1094
|
writeCloudProgress(io, `workbench cloud: preparing hosted ${command}.`, showProgress);
|
|
@@ -1111,6 +1124,14 @@ async function startCloudExecution(command, parsed, io) {
|
|
|
1111
1124
|
samples: intFlag(parsed, "samples"),
|
|
1112
1125
|
budget: intFlag(parsed, "budget"),
|
|
1113
1126
|
}));
|
|
1127
|
+
writeCloudProgress(io, "workbench cloud: checking provider auth.", showProgress);
|
|
1128
|
+
await cloudPreScheduleStep(command, interrupt, preflightCloudAdapterAuth({
|
|
1129
|
+
root,
|
|
1130
|
+
versionId: request.versionId,
|
|
1131
|
+
parsed,
|
|
1132
|
+
baseUrl: source.baseUrl,
|
|
1133
|
+
authToken: token,
|
|
1134
|
+
}));
|
|
1114
1135
|
writeCloudProgress(io, "workbench cloud: syncing source to cloud.", showProgress);
|
|
1115
1136
|
const syncBefore = await cloudPreScheduleStep(command, interrupt, syncWorkbenchRemote({ ...core, remote: remote.name }));
|
|
1116
1137
|
writeCloudProgress(io, `workbench cloud: scheduling hosted ${command}.`, showProgress);
|
|
@@ -1209,6 +1230,79 @@ function cloudCanceledBeforeRunIdError(command) {
|
|
|
1209
1230
|
exitCode: 130,
|
|
1210
1231
|
});
|
|
1211
1232
|
}
|
|
1233
|
+
async function preflightCloudAdapterAuth(input) {
|
|
1234
|
+
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot({ dir: input.root, authToken: input.authToken });
|
|
1235
|
+
const version = snapshotVersionByRef(snapshot, input.versionId);
|
|
1236
|
+
if (!version) {
|
|
1237
|
+
throw new WorkbenchCodedError("version_not_found", `Version not found: ${input.versionId}`, {
|
|
1238
|
+
remediation: "Run workbench status.",
|
|
1239
|
+
subject: { versionId: input.versionId },
|
|
1240
|
+
exitCode: 1,
|
|
1241
|
+
});
|
|
1242
|
+
}
|
|
1243
|
+
const runtime = await createWorkbenchVersionRuntimeSnapshot(version, {
|
|
1244
|
+
skill: stringFlag(input.parsed, "skills"),
|
|
1245
|
+
agent: stringFlag(input.parsed, "agents"),
|
|
1246
|
+
authToken: input.authToken,
|
|
1247
|
+
});
|
|
1248
|
+
const targets = uniqueAdapterAuthTargets(runtime.selectedAgents.flatMap(cloudAdapterAuthTargetsForAgent));
|
|
1249
|
+
if (targets.length === 0) {
|
|
1250
|
+
return;
|
|
1251
|
+
}
|
|
1252
|
+
const statuses = await fetchCloudAdapterAuthStatuses(input.baseUrl);
|
|
1253
|
+
const missing = targets.find((target) => !statuses.some((status) => adapterAuthStatusMatchesTarget(status, target)));
|
|
1254
|
+
if (!missing) {
|
|
1255
|
+
return;
|
|
1256
|
+
}
|
|
1257
|
+
throw new WorkbenchCodedError("adapter_auth_required", `${formatCloudAdapterAuthTarget(missing)} disconnected. Run workbench login ${missing.adapterId}.`, {
|
|
1258
|
+
remediation: `Run workbench login ${missing.adapterId}.`,
|
|
1259
|
+
subject: {
|
|
1260
|
+
adapterId: missing.adapterId,
|
|
1261
|
+
profile: missing.profile,
|
|
1262
|
+
...(missing.slot ? { slot: missing.slot } : {}),
|
|
1263
|
+
},
|
|
1264
|
+
exitCode: 1,
|
|
1265
|
+
});
|
|
1266
|
+
}
|
|
1267
|
+
function cloudAdapterAuthTargetsForAgent(agent) {
|
|
1268
|
+
const adapterId = agent.adapter.trim().toLowerCase();
|
|
1269
|
+
if (adapterId !== "codex" && adapterId !== "claude") {
|
|
1270
|
+
return [];
|
|
1271
|
+
}
|
|
1272
|
+
const auth = agent.config.auth;
|
|
1273
|
+
if (typeof auth === "string" && auth.trim()) {
|
|
1274
|
+
return [{ adapterId, profile: auth.trim() }];
|
|
1275
|
+
}
|
|
1276
|
+
if (auth && typeof auth === "object" && !Array.isArray(auth)) {
|
|
1277
|
+
return Object.entries(auth)
|
|
1278
|
+
.filter((entry) => typeof entry[1] === "string" && entry[1].trim().length > 0)
|
|
1279
|
+
.map(([slot, profile]) => ({ adapterId, slot, profile: profile.trim() }));
|
|
1280
|
+
}
|
|
1281
|
+
return [{ adapterId, profile: "default" }];
|
|
1282
|
+
}
|
|
1283
|
+
function uniqueAdapterAuthTargets(targets) {
|
|
1284
|
+
const byKey = new Map();
|
|
1285
|
+
for (const target of targets) {
|
|
1286
|
+
byKey.set(adapterAuthTargetKey(target), target);
|
|
1287
|
+
}
|
|
1288
|
+
return [...byKey.values()].sort((left, right) => adapterAuthTargetKey(left).localeCompare(adapterAuthTargetKey(right)));
|
|
1289
|
+
}
|
|
1290
|
+
async function fetchCloudAdapterAuthStatuses(baseUrl) {
|
|
1291
|
+
const response = await apiRequest("/api/workbench/auth/adapters", {}, baseUrl);
|
|
1292
|
+
return response.adapters ?? [];
|
|
1293
|
+
}
|
|
1294
|
+
function adapterAuthStatusMatchesTarget(status, target) {
|
|
1295
|
+
return status.status === "connected" &&
|
|
1296
|
+
status.adapterId === target.adapterId &&
|
|
1297
|
+
status.profile === target.profile &&
|
|
1298
|
+
(status.slot ?? undefined) === (target.slot ?? undefined);
|
|
1299
|
+
}
|
|
1300
|
+
function adapterAuthTargetKey(target) {
|
|
1301
|
+
return `${target.adapterId}/${target.slot ?? "_"}/${target.profile}`;
|
|
1302
|
+
}
|
|
1303
|
+
function formatCloudAdapterAuthTarget(target) {
|
|
1304
|
+
return `${target.adapterId}${target.slot ? `/${target.slot}` : ""}`;
|
|
1305
|
+
}
|
|
1212
1306
|
async function waitForCloudRuns(input) {
|
|
1213
1307
|
const runIds = input.runs
|
|
1214
1308
|
.map((run) => run.id)
|
|
@@ -1726,7 +1820,7 @@ async function pollDeviceToken(baseUrl, authorization, timeoutSeconds) {
|
|
|
1726
1820
|
}
|
|
1727
1821
|
throw new WorkbenchCodedError("login_pending", "Device login is still waiting for browser authorization.", {
|
|
1728
1822
|
retryable: true,
|
|
1729
|
-
remediation:
|
|
1823
|
+
remediation: `Authorize the device in the browser, then run workbench login --wait --timeout ${LOGIN_WAIT_TIMEOUT_SECONDS}.`,
|
|
1730
1824
|
subject: {
|
|
1731
1825
|
retryAfterSeconds: Math.max(1, Math.ceil(intervalMs / 1000)),
|
|
1732
1826
|
verificationUri: authorization.verification_uri,
|
|
@@ -2750,11 +2844,6 @@ function snapshotHasWorkflowCase(snapshot) {
|
|
|
2750
2844
|
/^\.workbench\/cases\/[^/]+\/case\.ya?ml$/u.test(file.path)) ?? [];
|
|
2751
2845
|
return caseFiles.some((file) => file.kind === "text" && !/\n\s*smoke:\s*true(?:\s|$)/u.test(`\n${file.content}`));
|
|
2752
2846
|
}
|
|
2753
|
-
function installHandleFromStatusRemote(remote) {
|
|
2754
|
-
const publicationUrl = remote.publication.status === "published" ? remote.publication.installUrl : undefined;
|
|
2755
|
-
const source = parseWorkbenchInstallSource(publicationUrl ?? remote.url);
|
|
2756
|
-
return source ? `${source.owner}/${source.skill}` : publicationUrl ?? remote.url;
|
|
2757
|
-
}
|
|
2758
2847
|
async function statusWithCausalNext(status, auth, core, machine) {
|
|
2759
2848
|
if (!status.project.initialized) {
|
|
2760
2849
|
return {
|
|
@@ -2816,12 +2905,6 @@ async function statusWithCausalNext(status, auth, core, machine) {
|
|
|
2816
2905
|
}
|
|
2817
2906
|
return { ...status, next: "workbench publish" };
|
|
2818
2907
|
}
|
|
2819
|
-
const publishedCloudRemote = status.remotes.find((remote) => remote.kind === "workbench-cloud" &&
|
|
2820
|
-
remote.publication.status === "published" &&
|
|
2821
|
-
Boolean(remote.publication.installUrl));
|
|
2822
|
-
if (publishedCloudRemote) {
|
|
2823
|
-
return { ...status, next: `workbench install ${installHandleFromStatusRemote(publishedCloudRemote)}` };
|
|
2824
|
-
}
|
|
2825
2908
|
return {
|
|
2826
2909
|
...status,
|
|
2827
2910
|
next: null,
|
|
@@ -2880,6 +2963,16 @@ function displayRef(id) {
|
|
|
2880
2963
|
function shortenCommandRefs(command) {
|
|
2881
2964
|
return command.replace(/\b(?:v_[0-9a-f]{8,}|(?:run|job|trace|artifact)_[a-z0-9_-]+)/giu, (match) => displayRef(match));
|
|
2882
2965
|
}
|
|
2966
|
+
function displayCandidateRefs(ids) {
|
|
2967
|
+
const uniqueIds = [...ids];
|
|
2968
|
+
for (let length = 8; length <= 32; length += 1) {
|
|
2969
|
+
const refs = uniqueIds.map((id) => id.length > length ? id.slice(0, length) : id);
|
|
2970
|
+
if (new Set(refs).size === refs.length) {
|
|
2971
|
+
return refs;
|
|
2972
|
+
}
|
|
2973
|
+
}
|
|
2974
|
+
return uniqueIds;
|
|
2975
|
+
}
|
|
2883
2976
|
function snapshotVersionByRef(snapshot, ref) {
|
|
2884
2977
|
const requested = ref.trim();
|
|
2885
2978
|
const normalized = requested === "current" ? snapshot.refs.current ?? "" : requested;
|
|
@@ -2888,7 +2981,7 @@ function snapshotVersionByRef(snapshot, ref) {
|
|
|
2888
2981
|
}
|
|
2889
2982
|
const candidates = snapshot.versions.filter((version) => snapshotVersionRefMatches(version, normalized));
|
|
2890
2983
|
if (candidates.length > 1) {
|
|
2891
|
-
throw new WorkbenchCodedError("ref_ambiguous", `Version ref is ambiguous: ${ref}. Candidates: ${candidates.map((version) =>
|
|
2984
|
+
throw new WorkbenchCodedError("ref_ambiguous", `Version ref is ambiguous: ${ref}. Candidates: ${displayCandidateRefs(candidates.map((version) => version.id)).join(", ")}.`, {
|
|
2892
2985
|
subject: { ref, candidates: candidates.map((version) => version.id) },
|
|
2893
2986
|
exitCode: 2,
|
|
2894
2987
|
});
|
|
@@ -2911,7 +3004,7 @@ function snapshotObjectByRef(entries, ref, kind) {
|
|
|
2911
3004
|
}
|
|
2912
3005
|
const candidates = entries.filter((entry) => objectRefMatches(entry.id, normalized));
|
|
2913
3006
|
if (candidates.length > 1) {
|
|
2914
|
-
throw new WorkbenchCodedError("ref_ambiguous", `${capitalize(kind)} ref is ambiguous: ${ref}. Candidates: ${candidates.map((entry) =>
|
|
3007
|
+
throw new WorkbenchCodedError("ref_ambiguous", `${capitalize(kind)} ref is ambiguous: ${ref}. Candidates: ${displayCandidateRefs(candidates.map((entry) => entry.id)).slice(0, 8).join(", ")}.`, {
|
|
2915
3008
|
subject: { ref, candidates: candidates.map((entry) => entry.id).slice(0, 20) },
|
|
2916
3009
|
exitCode: 2,
|
|
2917
3010
|
});
|
|
@@ -2932,7 +3025,7 @@ function runOrJobEvidenceSelection(snapshot, ref) {
|
|
|
2932
3025
|
const run = snapshotObjectByRef(snapshot.runs, ref, "run");
|
|
2933
3026
|
const job = snapshotObjectByRef(snapshot.jobs, ref, "job");
|
|
2934
3027
|
if (run && job) {
|
|
2935
|
-
throw new WorkbenchCodedError("ref_ambiguous", `Run/job ref is ambiguous: ${ref}. Candidates: ${
|
|
3028
|
+
throw new WorkbenchCodedError("ref_ambiguous", `Run/job ref is ambiguous: ${ref}. Candidates: ${displayCandidateRefs([run.id, job.id]).join(", ")}.`, {
|
|
2936
3029
|
subject: { ref, candidates: [run.id, job.id] },
|
|
2937
3030
|
exitCode: 2,
|
|
2938
3031
|
});
|
|
@@ -2947,6 +3040,9 @@ function runOrJobEvidenceSelection(snapshot, ref) {
|
|
|
2947
3040
|
}
|
|
2948
3041
|
function evidenceFilesForRunOrJob(snapshot, ref) {
|
|
2949
3042
|
const selection = runOrJobEvidenceSelection(snapshot, ref);
|
|
3043
|
+
return evidenceFilesForSelection(snapshot, selection);
|
|
3044
|
+
}
|
|
3045
|
+
function evidenceFilesForSelection(snapshot, selection) {
|
|
2950
3046
|
if (!selection.run && selection.jobs.length === 0) {
|
|
2951
3047
|
return [];
|
|
2952
3048
|
}
|
|
@@ -3028,10 +3124,32 @@ function isUserFacingTraceEvidenceFile(file) {
|
|
|
3028
3124
|
function evidencePathSegment(value) {
|
|
3029
3125
|
return value.replace(/[^A-Za-z0-9._-]+/gu, "-") || "_";
|
|
3030
3126
|
}
|
|
3031
|
-
function formatRunOrJobEvidence(details, files) {
|
|
3127
|
+
function formatRunOrJobEvidence(jobs, details, files) {
|
|
3128
|
+
const jobLines = jobs.length > 0 ? ["Jobs:", ...jobs.map(formatJobEvidenceSummary)] : [];
|
|
3032
3129
|
const detailLines = details.map(formatTraceDetail).filter(Boolean);
|
|
3033
3130
|
const fileLines = files.length > 0 ? ["Files:", ...files.map((file) => file.path)] : [];
|
|
3034
|
-
return [...detailLines, ...fileLines].join("\n") || "No evidence.";
|
|
3131
|
+
return [...jobLines, ...detailLines, ...fileLines].join("\n") || "No evidence.";
|
|
3132
|
+
}
|
|
3133
|
+
function jobEvidenceSummary(job) {
|
|
3134
|
+
return {
|
|
3135
|
+
id: job.id,
|
|
3136
|
+
runId: job.runId,
|
|
3137
|
+
caseId: job.caseId,
|
|
3138
|
+
sample: job.sample,
|
|
3139
|
+
status: job.status,
|
|
3140
|
+
...(job.score !== undefined ? { score: job.score } : {}),
|
|
3141
|
+
...(job.error ? { error: job.error } : {}),
|
|
3142
|
+
};
|
|
3143
|
+
}
|
|
3144
|
+
function formatJobEvidenceSummary(job) {
|
|
3145
|
+
return [
|
|
3146
|
+
displayRef(job.id),
|
|
3147
|
+
`case=${job.caseId}`,
|
|
3148
|
+
`sample=${job.sample}`,
|
|
3149
|
+
job.status,
|
|
3150
|
+
job.score !== undefined ? `score=${job.score.toFixed(3)}` : undefined,
|
|
3151
|
+
job.error ? `error=${singleLine(job.error)}` : undefined,
|
|
3152
|
+
].filter(Boolean).join("\t");
|
|
3035
3153
|
}
|
|
3036
3154
|
function evidenceDetailSummary(detail) {
|
|
3037
3155
|
return {
|
|
@@ -3107,6 +3225,9 @@ async function fileForRunOrJobRef(core, objectRef, requestedPath) {
|
|
|
3107
3225
|
}
|
|
3108
3226
|
function evidenceDetailsForRunOrJob(snapshot, ref) {
|
|
3109
3227
|
const selection = runOrJobEvidenceSelection(snapshot, ref);
|
|
3228
|
+
return evidenceDetailsForSelection(snapshot, selection);
|
|
3229
|
+
}
|
|
3230
|
+
function evidenceDetailsForSelection(snapshot, selection) {
|
|
3110
3231
|
return selection.jobs.flatMap((entry) => {
|
|
3111
3232
|
const detail = workbenchJobEvidenceForSnapshot(snapshot, {
|
|
3112
3233
|
runId: entry.runId,
|
|
@@ -3254,7 +3375,21 @@ async function evalSuccessNextCommand(core, runs) {
|
|
|
3254
3375
|
return "edit .workbench/cases, then run workbench eval";
|
|
3255
3376
|
}
|
|
3256
3377
|
const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
|
|
3257
|
-
|
|
3378
|
+
if (!snapshotHasWorkflowCase(snapshot)) {
|
|
3379
|
+
return "edit .workbench/cases, then run workbench eval";
|
|
3380
|
+
}
|
|
3381
|
+
const auth = await workbenchCliAuthStatus();
|
|
3382
|
+
if (auth.workbenchCloud.status !== "authenticated") {
|
|
3383
|
+
return "workbench login";
|
|
3384
|
+
}
|
|
3385
|
+
const status = await workbenchStatusSnapshot(core);
|
|
3386
|
+
return statusHasPublishedCurrentCloudSource(status) ? null : "workbench publish";
|
|
3387
|
+
}
|
|
3388
|
+
function statusHasPublishedCurrentCloudSource(status) {
|
|
3389
|
+
const currentVersionId = status.project.currentVersionId;
|
|
3390
|
+
return Boolean(currentVersionId && status.remotes.some((remote) => remote.kind === "workbench-cloud" &&
|
|
3391
|
+
remote.publication.status === "published" &&
|
|
3392
|
+
remote.publication.versionId === currentVersionId));
|
|
3258
3393
|
}
|
|
3259
3394
|
function formatStatusSnapshot(status) {
|
|
3260
3395
|
const lines = [
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@workbench-ai/workbench",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.80",
|
|
4
4
|
"repository": {
|
|
5
5
|
"type": "git",
|
|
6
6
|
"url": "git+https://github.com/workbench-ai/workbench.git",
|
|
@@ -22,10 +22,10 @@
|
|
|
22
22
|
"dependencies": {
|
|
23
23
|
"skills": "1.5.11",
|
|
24
24
|
"yaml": "^2.8.2",
|
|
25
|
-
"@workbench-ai/workbench-built-in-adapters": "0.0.
|
|
26
|
-
"@workbench-ai/workbench-protocol": "0.0.
|
|
27
|
-
"@workbench-ai/workbench-contract": "0.0.
|
|
28
|
-
"@workbench-ai/workbench-core": "0.0.
|
|
25
|
+
"@workbench-ai/workbench-built-in-adapters": "0.0.80",
|
|
26
|
+
"@workbench-ai/workbench-protocol": "0.0.80",
|
|
27
|
+
"@workbench-ai/workbench-contract": "0.0.80",
|
|
28
|
+
"@workbench-ai/workbench-core": "0.0.80"
|
|
29
29
|
},
|
|
30
30
|
"devDependencies": {
|
|
31
31
|
"@tailwindcss/postcss": "^4.2.2",
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
"react-dom": "^19.2.0",
|
|
37
37
|
"typescript": "^5.9.2",
|
|
38
38
|
"vitest": "^3.2.4",
|
|
39
|
-
"@workbench-ai/workbench-ui": "0.0.
|
|
39
|
+
"@workbench-ai/workbench-ui": "0.0.80"
|
|
40
40
|
},
|
|
41
41
|
"scripts": {
|
|
42
42
|
"build": "rm -rf dist && tsc -p tsconfig.json && chmod 755 dist/workbench.js && node ./scripts/build-dev-open-assets.mjs",
|