@workbench-ai/workbench 0.0.53 → 0.0.55
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,4 +1,6 @@
|
|
|
1
|
+
import { type WorkbenchProjectStateSource } from "@workbench-ai/workbench-core";
|
|
1
2
|
import { type LocalProjectSource } from "./project-source.js";
|
|
2
3
|
export declare function localBenchmarkFingerprint(project: LocalProjectSource): string;
|
|
4
|
+
export declare function projectStateBenchmarkFingerprint(source: WorkbenchProjectStateSource): string;
|
|
3
5
|
export declare function localCandidateFingerprint(project: LocalProjectSource): string;
|
|
4
6
|
//# sourceMappingURL=benchmark-fingerprint.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"benchmark-fingerprint.d.ts","sourceRoot":"","sources":["../src/benchmark-fingerprint.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"benchmark-fingerprint.d.ts","sourceRoot":"","sources":["../src/benchmark-fingerprint.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,KAAK,2BAA2B,EACjC,MAAM,8BAA8B,CAAC;AAEtC,OAAO,EAGL,KAAK,kBAAkB,EACxB,MAAM,qBAAqB,CAAC;AAE7B,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,CAW7E;AAED,wBAAgB,gCAAgC,CAAC,MAAM,EAAE,2BAA2B,GAAG,MAAM,CAU5F;AAED,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,CAO7E"}
|
|
@@ -12,6 +12,17 @@ export function localBenchmarkFingerprint(project) {
|
|
|
12
12
|
network: project.spec.environment.network?.egress === "open" ? "on" : "off",
|
|
13
13
|
});
|
|
14
14
|
}
|
|
15
|
+
export function projectStateBenchmarkFingerprint(source) {
|
|
16
|
+
return workbenchBenchmarkContentFingerprint({
|
|
17
|
+
sourceYaml: source.source,
|
|
18
|
+
engineResolveFiles: source.engineResolveFiles,
|
|
19
|
+
engineResolveBinding: source.engineResolveBinding,
|
|
20
|
+
adapterFiles: source.adapterFiles,
|
|
21
|
+
runtimeFiles: source.runtimeFiles,
|
|
22
|
+
resources: source.resources,
|
|
23
|
+
network: source.network,
|
|
24
|
+
});
|
|
25
|
+
}
|
|
15
26
|
export function localCandidateFingerprint(project) {
|
|
16
27
|
return workbenchCandidateContentFingerprint({
|
|
17
28
|
sourceYaml: project.specSource,
|
package/dist/command-model.js
CHANGED
|
@@ -140,7 +140,7 @@ const commandHelp = Object.fromEntries(Object.entries({
|
|
|
140
140
|
" workbench improve [SOURCE] [--dir DIR] [--from CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--json]",
|
|
141
141
|
" workbench improve --hosted [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK] [--base CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
|
|
142
142
|
"",
|
|
143
|
-
"Ensure a candidate improvement exists for the selected base, run, budget, and samples. Without --hosted, execution writes local records. With --hosted, Workbench starts or reuses hosted work against the configured remote or --benchmark target. Completed work is reused only when base, run configuration, source, adapters, benchmark, budget, and samples match; use --rerun to intentionally spend again.",
|
|
143
|
+
"Ensure a candidate improvement exists for the selected base, run, budget, and samples. Improve defaults to the evaluated active candidate when it belongs to the current benchmark fingerprint; otherwise it evaluates and uses the authored current candidate. Without --hosted, execution writes local records. With --hosted, Workbench starts or reuses hosted work against the configured remote or --benchmark target. Completed work is reused only when base, run configuration, source, adapters, benchmark, budget, and samples match; use --rerun to intentionally spend again.",
|
|
144
144
|
"",
|
|
145
145
|
"Examples:",
|
|
146
146
|
" workbench improve --budget 1 --samples 1",
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AA4IA,UAAU,KAAK;IACb,KAAK,EAAE,MAAM,CAAC,cAAc,CAAC;IAC7B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;CAC/B;AA4BD,UAAU,iBAAiB;CAAG;AA4K9B,wBAAsB,MAAM,CAC1B,IAAI,EAAE,SAAS,MAAM,EAAE,EACvB,EAAE,GAAE,KAIH,EACD,cAAc,GAAE,iBAAsB,GACrC,OAAO,CAAC,MAAM,CAAC,CAmHjB"}
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ import { createRequire } from "node:module";
|
|
|
5
5
|
import os from "node:os";
|
|
6
6
|
import path from "node:path";
|
|
7
7
|
import { Writable } from "node:stream";
|
|
8
|
-
import { createCandidateFilePreview, createBaselineCandidateJob as createRuntimeBaselineCandidateJob, evaluationScorecardId, evaluationMeanMetrics, executeWorkbenchExecutionJob, engineResolveBindingForSpec, filterOptimizerTraceJobsForCaseIds, filterCandidateSourceFiles, formatWorkbenchCaseSelector, formatWorkbenchSelectionPolicy, workbenchCaseSelectorUsesAllCases, workbenchExecutionPurpose, workbenchRunExecutionFingerprint, createWorkbenchAdapterAuthBundle, createOptimizerTraceInputFiles, DOCKER_SANDBOX_BACKEND, localWorkbenchAdapterAuthStore, materializeWorkbenchRunResult, normalizeSurfaceFiles, planWorkbenchExecutionJobsForPurpose, runWorkbenchExecutionDag, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, summarizeCandidateFiles, validateWorkbenchRunEnvelope, parseWorkbenchAdapterAuthTarget, workbenchEngineCaseIdsForImproveEvaluation, workbenchEngineCaseIdsForSelector, workbenchImproveOptimizeSelector, workbenchImproveSelectionPolicy, workbenchProjectSourceFingerprint, workbenchRuntimeBundleFingerprint, } from "@workbench-ai/workbench-core";
|
|
8
|
+
import { createCandidateFilePreview, createBaselineCandidateJob as createRuntimeBaselineCandidateJob, evaluationScorecardId, evaluationMeanMetrics, executeWorkbenchExecutionJob, engineResolveBindingForSpec, filterOptimizerTraceJobsForCaseIds, filterCandidateSourceFiles, formatWorkbenchCaseSelector, formatWorkbenchSelectionPolicy, workbenchCaseSelectorUsesAllCases, workbenchExecutionPurpose, workbenchRunExecutionFingerprint, createWorkbenchAdapterAuthBundle, createOptimizerTraceInputFiles, DOCKER_SANDBOX_BACKEND, localWorkbenchAdapterAuthStore, materializeWorkbenchRunResult, normalizeSurfaceFiles, planWorkbenchExecutionJobsForPurpose, runWorkbenchExecutionDag, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, summarizeCandidateFiles, validateWorkbenchRunEnvelope, parseWorkbenchAdapterAuthTarget, workbenchEngineCaseIdsForImproveEvaluation, workbenchEngineCaseIdsForSelector, workbenchImproveOptimizeSelector, workbenchImproveSelectionPolicy, workbenchProjectSourceFingerprint, workbenchRuntimeBundleFingerprint, workbenchRuntimeExplicitActiveId, } from "@workbench-ai/workbench-core";
|
|
9
9
|
import { assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterAuthRequirements, normalizeWorkbenchAdapterOperationRequest, readWorkbenchAdapterOperationResult, workbenchAdapterOperationCommand, workbenchAdapterOperationResultPath, withDefaultWorkbenchAdapterAuthProfiles as applyDefaultWorkbenchAdapterAuthProfiles, } from "@workbench-ai/workbench-protocol";
|
|
10
10
|
import { builtinLocalTraceAdapter, builtinLocalTraceAdapters, sortLocalTraceRefs, } from "@workbench-ai/workbench-built-in-adapters/local-traces";
|
|
11
11
|
import { commandUsage, HOSTED_WATCH_LIFECYCLE_NOTE, LOCAL_DEV_OPEN_LIFECYCLE_NOTE, rootUsage, } from "./command-model.js";
|
|
@@ -16,7 +16,7 @@ import { createAdapterCommandEnv } from "./adapter-command-env.js";
|
|
|
16
16
|
import { loadLocalArchive, loadLocalArchiveIndex, exportLocalRuntimeBundle, importLocalRuntimeBundle, runtimeBundleStats, materializeCandidateRoot, readLocalCandidate, readLocalCandidateFiles, readLocalJobs, saveLocalArchive, saveLocalJobs, setLocalActive, upsertLocalRun, upsertLocalCandidate, upsertLocalEvaluation, } from "./local-archive.js";
|
|
17
17
|
import { WorkspaceSnapshotError, } from "./workspace-snapshot.js";
|
|
18
18
|
import { hostedEngineResolveFiles, readLocalProjectSource, WORKBENCH_BENCHMARK_FILE, } from "./project-source.js";
|
|
19
|
-
import { localBenchmarkFingerprint, localCandidateFingerprint, } from "./benchmark-fingerprint.js";
|
|
19
|
+
import { localBenchmarkFingerprint, localCandidateFingerprint, projectStateBenchmarkFingerprint, } from "./benchmark-fingerprint.js";
|
|
20
20
|
const require = createRequire(import.meta.url);
|
|
21
21
|
function getCliVersion() {
|
|
22
22
|
const manifest = require("../package.json");
|
|
@@ -993,35 +993,30 @@ async function localRun(argv, io, runtimeOptions) {
|
|
|
993
993
|
async function ensureLocalImproveBaseCandidate(args) {
|
|
994
994
|
let snapshot = await loadLocalArchive(args.workspace);
|
|
995
995
|
const explicitBase = asOptionalString(args.parsed.flags.from);
|
|
996
|
-
const benchmarkFingerprint =
|
|
996
|
+
const benchmarkFingerprint = localBenchmarkFingerprint(args.projectSource);
|
|
997
|
+
const baseCandidateArgs = {
|
|
998
|
+
workspace: args.workspace,
|
|
999
|
+
benchmarkFingerprint,
|
|
1000
|
+
projectSource: args.projectSource,
|
|
1001
|
+
samples: args.samples,
|
|
1002
|
+
rerun: args.parsed.flags.rerun === true,
|
|
1003
|
+
io: args.io,
|
|
1004
|
+
runtimeOptions: args.runtimeOptions,
|
|
1005
|
+
};
|
|
997
1006
|
if (explicitBase) {
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
}
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
if (
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
explicitBase,
|
|
1011
|
-
"--runs",
|
|
1012
|
-
args.projectSource.spec.candidate.selectedRunId,
|
|
1013
|
-
"--samples",
|
|
1014
|
-
String(args.samples),
|
|
1015
|
-
...(args.parsed.flags.rerun === true ? ["--rerun"] : []),
|
|
1016
|
-
"--json",
|
|
1017
|
-
], createSilentIo(args.io), args.runtimeOptions);
|
|
1018
|
-
if (code !== 0) {
|
|
1019
|
-
throw new UsageError(`Base candidate ${explicitBase} eval failed; improve was not started.`);
|
|
1020
|
-
}
|
|
1021
|
-
snapshot = await loadLocalArchive(args.workspace);
|
|
1022
|
-
candidate = readLocalCandidate(snapshot, explicitBase);
|
|
1007
|
+
return await ensureEvaluatedLocalImproveBaseCandidate({
|
|
1008
|
+
...baseCandidateArgs,
|
|
1009
|
+
candidateId: explicitBase,
|
|
1010
|
+
});
|
|
1011
|
+
}
|
|
1012
|
+
if (snapshot.activeId) {
|
|
1013
|
+
const activeCandidate = readLocalCandidate(snapshot, snapshot.activeId);
|
|
1014
|
+
if (activeCandidate.benchmarkFingerprint === benchmarkFingerprint) {
|
|
1015
|
+
return await ensureEvaluatedLocalImproveBaseCandidate({
|
|
1016
|
+
...baseCandidateArgs,
|
|
1017
|
+
candidateId: activeCandidate.id,
|
|
1018
|
+
});
|
|
1023
1019
|
}
|
|
1024
|
-
return candidate;
|
|
1025
1020
|
}
|
|
1026
1021
|
const candidateFingerprint = localCandidateFingerprint(args.projectSource);
|
|
1027
1022
|
const existing = snapshot.candidates.find((candidate) => candidate.benchmarkFingerprint === benchmarkFingerprint &&
|
|
@@ -1063,6 +1058,36 @@ async function ensureLocalImproveBaseCandidate(args) {
|
|
|
1063
1058
|
}
|
|
1064
1059
|
return evaluated;
|
|
1065
1060
|
}
|
|
1061
|
+
async function ensureEvaluatedLocalImproveBaseCandidate(args) {
|
|
1062
|
+
let snapshot = await loadLocalArchive(args.workspace);
|
|
1063
|
+
let candidate = readLocalCandidate(snapshot, args.candidateId);
|
|
1064
|
+
if (candidate.benchmarkFingerprint !== args.benchmarkFingerprint) {
|
|
1065
|
+
throw new UsageError(`Base candidate ${args.candidateId} belongs to benchmark ${candidate.benchmarkFingerprint}, not ${args.benchmarkFingerprint}.`);
|
|
1066
|
+
}
|
|
1067
|
+
if (!candidate.candidateFingerprint) {
|
|
1068
|
+
throw new UsageError(`Base candidate ${args.candidateId} is missing a candidate fingerprint.`);
|
|
1069
|
+
}
|
|
1070
|
+
if (candidate.status === "evaluated" || candidate.eval) {
|
|
1071
|
+
return candidate;
|
|
1072
|
+
}
|
|
1073
|
+
const code = await localEvaluateCandidate([
|
|
1074
|
+
"--dir",
|
|
1075
|
+
args.workspace,
|
|
1076
|
+
"--candidate",
|
|
1077
|
+
args.candidateId,
|
|
1078
|
+
"--runs",
|
|
1079
|
+
args.projectSource.spec.candidate.selectedRunId,
|
|
1080
|
+
"--samples",
|
|
1081
|
+
String(args.samples),
|
|
1082
|
+
...(args.rerun ? ["--rerun"] : []),
|
|
1083
|
+
"--json",
|
|
1084
|
+
], createSilentIo(args.io), args.runtimeOptions);
|
|
1085
|
+
if (code !== 0) {
|
|
1086
|
+
throw new UsageError(`Base candidate ${args.candidateId} eval failed; improve was not started.`);
|
|
1087
|
+
}
|
|
1088
|
+
snapshot = await loadLocalArchive(args.workspace);
|
|
1089
|
+
return readLocalCandidate(snapshot, args.candidateId);
|
|
1090
|
+
}
|
|
1066
1091
|
function createSilentIo(io) {
|
|
1067
1092
|
const sink = new class extends Writable {
|
|
1068
1093
|
_write(_chunk, _encoding, callback) {
|
|
@@ -2922,6 +2947,7 @@ async function pushBenchmark(argv, io) {
|
|
|
2922
2947
|
const runtime = await exportLocalRuntimeBundle(dir, {
|
|
2923
2948
|
currentBenchmarkFingerprint: localBenchmarkFingerprint(source),
|
|
2924
2949
|
});
|
|
2950
|
+
const localRuntimeFingerprint = workbenchRuntimeBundleFingerprint(runtime);
|
|
2925
2951
|
const state = localProjectState({
|
|
2926
2952
|
source,
|
|
2927
2953
|
runtime,
|
|
@@ -2941,7 +2967,7 @@ async function pushBenchmark(argv, io) {
|
|
|
2941
2967
|
sourceFileCount: sourceFileCount(source),
|
|
2942
2968
|
runtime: runtimeBundleStats(runtime),
|
|
2943
2969
|
sourceFingerprint: state.source.fingerprint,
|
|
2944
|
-
runtimeFingerprint:
|
|
2970
|
+
runtimeFingerprint: localRuntimeFingerprint,
|
|
2945
2971
|
}, parsed, io, () => `Would push benchmark ${source.spec.name}.`);
|
|
2946
2972
|
return 0;
|
|
2947
2973
|
}
|
|
@@ -2990,7 +3016,7 @@ async function pushBenchmark(argv, io) {
|
|
|
2990
3016
|
sourceFileCount: sourceFileCount(source),
|
|
2991
3017
|
runtime: runtimeBundleStats(runtime),
|
|
2992
3018
|
sourceFingerprint: state.source.fingerprint,
|
|
2993
|
-
runtimeFingerprint:
|
|
3019
|
+
runtimeFingerprint: localRuntimeFingerprint,
|
|
2994
3020
|
}, parsed, io, () => `Would push ${sourceFileCount(source)} source file(s) and runtime history to ${origin.remote}.`);
|
|
2995
3021
|
return 0;
|
|
2996
3022
|
}
|
|
@@ -3081,7 +3107,7 @@ async function cloneProject(argv, io) {
|
|
|
3081
3107
|
ref,
|
|
3082
3108
|
outputDir,
|
|
3083
3109
|
fileCount: state.source.files.length,
|
|
3084
|
-
runtime:
|
|
3110
|
+
runtime: projectStateRuntimeStats(state),
|
|
3085
3111
|
sourceFingerprint: state.source.fingerprint ?? state.base.sourceFingerprint ?? null,
|
|
3086
3112
|
runtimeFingerprint: state.base.runtimeFingerprint ?? null,
|
|
3087
3113
|
}, parsed, io, () => `Would clone ${formatBenchmarkRef(ref)} to ${outputDir}.`);
|
|
@@ -3121,7 +3147,7 @@ async function pullProject(argv, io) {
|
|
|
3121
3147
|
dryRun: true,
|
|
3122
3148
|
dir,
|
|
3123
3149
|
fileCount: state.source.files.length,
|
|
3124
|
-
runtime:
|
|
3150
|
+
runtime: projectStateRuntimeStats(state),
|
|
3125
3151
|
sourceFingerprint: state.source.fingerprint ?? state.base.sourceFingerprint ?? null,
|
|
3126
3152
|
runtimeFingerprint: state.base.runtimeFingerprint ?? null,
|
|
3127
3153
|
}, parsed, io, () => `Would pull ${state.source.files.length} source file(s) and runtime history into ${dir}.`);
|
|
@@ -3902,6 +3928,18 @@ function localProjectState(args) {
|
|
|
3902
3928
|
runtime,
|
|
3903
3929
|
};
|
|
3904
3930
|
}
|
|
3931
|
+
function projectStateRuntimeStats(state) {
|
|
3932
|
+
const activeId = workbenchRuntimeExplicitActiveId({
|
|
3933
|
+
candidates: state.runtime.candidates,
|
|
3934
|
+
runs: state.runtime.runs,
|
|
3935
|
+
preferredActiveId: state.runtime.activeId ?? null,
|
|
3936
|
+
benchmarkFingerprint: projectStateBenchmarkFingerprint(state.source),
|
|
3937
|
+
});
|
|
3938
|
+
return runtimeBundleStats({
|
|
3939
|
+
...state.runtime,
|
|
3940
|
+
activeId,
|
|
3941
|
+
});
|
|
3942
|
+
}
|
|
3905
3943
|
function localCandidateRecord(candidate) {
|
|
3906
3944
|
return {
|
|
3907
3945
|
...candidate,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@workbench-ai/workbench",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.55",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -21,9 +21,9 @@
|
|
|
21
21
|
],
|
|
22
22
|
"dependencies": {
|
|
23
23
|
"yaml": "^2.8.2",
|
|
24
|
-
"@workbench-ai/workbench-
|
|
25
|
-
"@workbench-ai/workbench-core": "0.0.
|
|
26
|
-
"@workbench-ai/workbench-
|
|
24
|
+
"@workbench-ai/workbench-protocol": "0.0.55",
|
|
25
|
+
"@workbench-ai/workbench-core": "0.0.55",
|
|
26
|
+
"@workbench-ai/workbench-built-in-adapters": "0.0.55"
|
|
27
27
|
},
|
|
28
28
|
"devDependencies": {
|
|
29
29
|
"@tailwindcss/postcss": "^4.2.2",
|