npm - @workbench-ai/workbench - Versions diffs - 0.0.53 → 0.0.55 - Mend

@workbench-ai/workbench 0.0.53 → 0.0.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/benchmark-fingerprint.d.ts +2 -0
package/dist/benchmark-fingerprint.d.ts.map +1 -1
package/dist/benchmark-fingerprint.js +11 -0
package/dist/command-model.js +1 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +71 -33
package/package.json +4 -4

package/dist/benchmark-fingerprint.d.ts CHANGED Viewed

@@ -1,4 +1,6 @@
+import { type WorkbenchProjectStateSource } from "@workbench-ai/workbench-core";
 import { type LocalProjectSource } from "./project-source.js";
 export declare function localBenchmarkFingerprint(project: LocalProjectSource): string;
+export declare function projectStateBenchmarkFingerprint(source: WorkbenchProjectStateSource): string;
 export declare function localCandidateFingerprint(project: LocalProjectSource): string;
 //# sourceMappingURL=benchmark-fingerprint.d.ts.map

package/dist/benchmark-fingerprint.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"benchmark-fingerprint.d.ts","sourceRoot":"","sources":["../src/benchmark-fingerprint.ts"],"names":[],"mappings":"~~AAOA~~,OAAO,EAGL,KAAK,kBAAkB,EACxB,MAAM,qBAAqB,CAAC;AAE7B,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,CAW7E;AAED,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,CAO7E"}
1	+ {"version":3,"file":"benchmark-fingerprint.d.ts","sourceRoot":"","sources":["../src/benchmark-fingerprint.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,KAAK,2BAA2B,EACjC,MAAM,8BAA8B,CAAC;AAEtC,OAAO,EAGL,KAAK,kBAAkB,EACxB,MAAM,qBAAqB,CAAC;AAE7B,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,CAW7E;AAED,wBAAgB,gCAAgC,CAAC,MAAM,EAAE,2BAA2B,GAAG,MAAM,CAU5F;AAED,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,CAO7E"}

package/dist/benchmark-fingerprint.js CHANGED Viewed

@@ -12,6 +12,17 @@ export function localBenchmarkFingerprint(project) {
         network: project.spec.environment.network?.egress === "open" ? "on" : "off",
     });
 }
+export function projectStateBenchmarkFingerprint(source) {
+    return workbenchBenchmarkContentFingerprint({
+        sourceYaml: source.source,
+        engineResolveFiles: source.engineResolveFiles,
+        engineResolveBinding: source.engineResolveBinding,
+        adapterFiles: source.adapterFiles,
+        runtimeFiles: source.runtimeFiles,
+        resources: source.resources,
+        network: source.network,
+    });
+}
 export function localCandidateFingerprint(project) {
     return workbenchCandidateContentFingerprint({
         sourceYaml: project.specSource,

package/dist/command-model.js CHANGED Viewed

@@ -140,7 +140,7 @@ const commandHelp = Object.fromEntries(Object.entries({
         "  workbench improve [SOURCE] [--dir DIR] [--from CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--json]",
         "  workbench improve --hosted [SOURCE] [--dir DIR] [--benchmark OWNER/BENCHMARK] [--base CANDIDATE_ID] [--runs RUN] [--budget N] [--samples N] [--rerun] [--watch] [--dry-run] [--json]",
         "",
-        "Ensure a candidate improvement exists for the selected base, run, budget, and samples. Without --hosted, execution writes local records. With --hosted, Workbench starts or reuses hosted work against the configured remote or --benchmark target. Completed work is reused only when base, run configuration, source, adapters, benchmark, budget, and samples match; use --rerun to intentionally spend again.",
+        "Ensure a candidate improvement exists for the selected base, run, budget, and samples. Improve defaults to the evaluated active candidate when it belongs to the current benchmark fingerprint; otherwise it evaluates and uses the authored current candidate. Without --hosted, execution writes local records. With --hosted, Workbench starts or reuses hosted work against the configured remote or --benchmark target. Completed work is reused only when base, run configuration, source, adapters, benchmark, budget, and samples match; use --rerun to intentionally spend again.",
         "",
         "Examples:",
         "  workbench improve --budget 1 --samples 1",

package/dist/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"~~AA0IA~~,UAAU,KAAK;IACb,KAAK,EAAE,MAAM,CAAC,cAAc,CAAC;IAC7B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;CAC/B;AA4BD,UAAU,iBAAiB;CAAG;AA4K9B,wBAAsB,MAAM,CAC1B,IAAI,EAAE,SAAS,MAAM,EAAE,EACvB,EAAE,GAAE,KAIH,EACD,cAAc,GAAE,iBAAsB,GACrC,OAAO,CAAC,MAAM,CAAC,CAmHjB"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AA4IA,UAAU,KAAK;IACb,KAAK,EAAE,MAAM,CAAC,cAAc,CAAC;IAC7B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;CAC/B;AA4BD,UAAU,iBAAiB;CAAG;AA4K9B,wBAAsB,MAAM,CAC1B,IAAI,EAAE,SAAS,MAAM,EAAE,EACvB,EAAE,GAAE,KAIH,EACD,cAAc,GAAE,iBAAsB,GACrC,OAAO,CAAC,MAAM,CAAC,CAmHjB"}

package/dist/index.js CHANGED Viewed

@@ -5,7 +5,7 @@ import { createRequire } from "node:module";
 import os from "node:os";
 import path from "node:path";
 import { Writable } from "node:stream";
-import { createCandidateFilePreview, createBaselineCandidateJob as createRuntimeBaselineCandidateJob, evaluationScorecardId, evaluationMeanMetrics, executeWorkbenchExecutionJob, engineResolveBindingForSpec, filterOptimizerTraceJobsForCaseIds, filterCandidateSourceFiles, formatWorkbenchCaseSelector, formatWorkbenchSelectionPolicy, workbenchCaseSelectorUsesAllCases, workbenchExecutionPurpose, workbenchRunExecutionFingerprint, createWorkbenchAdapterAuthBundle, createOptimizerTraceInputFiles, DOCKER_SANDBOX_BACKEND, localWorkbenchAdapterAuthStore, materializeWorkbenchRunResult, normalizeSurfaceFiles, planWorkbenchExecutionJobsForPurpose, runWorkbenchExecutionDag, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, summarizeCandidateFiles, validateWorkbenchRunEnvelope, parseWorkbenchAdapterAuthTarget, workbenchEngineCaseIdsForImproveEvaluation, workbenchEngineCaseIdsForSelector, workbenchImproveOptimizeSelector, workbenchImproveSelectionPolicy, workbenchProjectSourceFingerprint, workbenchRuntimeBundleFingerprint, } from "@workbench-ai/workbench-core";
+import { createCandidateFilePreview, createBaselineCandidateJob as createRuntimeBaselineCandidateJob, evaluationScorecardId, evaluationMeanMetrics, executeWorkbenchExecutionJob, engineResolveBindingForSpec, filterOptimizerTraceJobsForCaseIds, filterCandidateSourceFiles, formatWorkbenchCaseSelector, formatWorkbenchSelectionPolicy, workbenchCaseSelectorUsesAllCases, workbenchExecutionPurpose, workbenchRunExecutionFingerprint, createWorkbenchAdapterAuthBundle, createOptimizerTraceInputFiles, DOCKER_SANDBOX_BACKEND, localWorkbenchAdapterAuthStore, materializeWorkbenchRunResult, normalizeSurfaceFiles, planWorkbenchExecutionJobsForPurpose, runWorkbenchExecutionDag, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, summarizeCandidateFiles, validateWorkbenchRunEnvelope, parseWorkbenchAdapterAuthTarget, workbenchEngineCaseIdsForImproveEvaluation, workbenchEngineCaseIdsForSelector, workbenchImproveOptimizeSelector, workbenchImproveSelectionPolicy, workbenchProjectSourceFingerprint, workbenchRuntimeBundleFingerprint, workbenchRuntimeExplicitActiveId, } from "@workbench-ai/workbench-core";
 import { assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterAuthRequirements, normalizeWorkbenchAdapterOperationRequest, readWorkbenchAdapterOperationResult, workbenchAdapterOperationCommand, workbenchAdapterOperationResultPath, withDefaultWorkbenchAdapterAuthProfiles as applyDefaultWorkbenchAdapterAuthProfiles, } from "@workbench-ai/workbench-protocol";
 import { builtinLocalTraceAdapter, builtinLocalTraceAdapters, sortLocalTraceRefs, } from "@workbench-ai/workbench-built-in-adapters/local-traces";
 import { commandUsage, HOSTED_WATCH_LIFECYCLE_NOTE, LOCAL_DEV_OPEN_LIFECYCLE_NOTE, rootUsage, } from "./command-model.js";
@@ -16,7 +16,7 @@ import { createAdapterCommandEnv } from "./adapter-command-env.js";
 import { loadLocalArchive, loadLocalArchiveIndex, exportLocalRuntimeBundle, importLocalRuntimeBundle, runtimeBundleStats, materializeCandidateRoot, readLocalCandidate, readLocalCandidateFiles, readLocalJobs, saveLocalArchive, saveLocalJobs, setLocalActive, upsertLocalRun, upsertLocalCandidate, upsertLocalEvaluation, } from "./local-archive.js";
 import { WorkspaceSnapshotError, } from "./workspace-snapshot.js";
 import { hostedEngineResolveFiles, readLocalProjectSource, WORKBENCH_BENCHMARK_FILE, } from "./project-source.js";
-import { localBenchmarkFingerprint, localCandidateFingerprint, } from "./benchmark-fingerprint.js";
+import { localBenchmarkFingerprint, localCandidateFingerprint, projectStateBenchmarkFingerprint, } from "./benchmark-fingerprint.js";
 const require = createRequire(import.meta.url);
 function getCliVersion() {
     const manifest = require("../package.json");
@@ -993,35 +993,30 @@ async function localRun(argv, io, runtimeOptions) {
 async function ensureLocalImproveBaseCandidate(args) {
     let snapshot = await loadLocalArchive(args.workspace);
     const explicitBase = asOptionalString(args.parsed.flags.from);
-    const benchmarkFingerprint = await readLocalBenchmarkFingerprint(args.workspace);
+    const benchmarkFingerprint = localBenchmarkFingerprint(args.projectSource);
+    const baseCandidateArgs = {
+        workspace: args.workspace,
+        benchmarkFingerprint,
+        projectSource: args.projectSource,
+        samples: args.samples,
+        rerun: args.parsed.flags.rerun === true,
+        io: args.io,
+        runtimeOptions: args.runtimeOptions,
+    };
     if (explicitBase) {
-        let candidate = readLocalCandidate(snapshot, explicitBase);
-        if (candidate.benchmarkFingerprint !== benchmarkFingerprint) {
-            throw new UsageError(`Base candidate ${explicitBase} belongs to benchmark ${candidate.benchmarkFingerprint}, not ${benchmarkFingerprint}.`);
-        }
-        if (!candidate.candidateFingerprint) {
-            throw new UsageError(`Base candidate ${explicitBase} is missing a candidate fingerprint.`);
-        }
-        if (candidate.status !== "evaluated" && !candidate.eval) {
-            const code = await localEvaluateCandidate([
-                "--dir",
-                args.workspace,
-                "--candidate",
-                explicitBase,
-                "--runs",
-                args.projectSource.spec.candidate.selectedRunId,
-                "--samples",
-                String(args.samples),
-                ...(args.parsed.flags.rerun === true ? ["--rerun"] : []),
-                "--json",
-            ], createSilentIo(args.io), args.runtimeOptions);
-            if (code !== 0) {
-                throw new UsageError(`Base candidate ${explicitBase} eval failed; improve was not started.`);
-            }
-            snapshot = await loadLocalArchive(args.workspace);
-            candidate = readLocalCandidate(snapshot, explicitBase);
+        return await ensureEvaluatedLocalImproveBaseCandidate({
+            ...baseCandidateArgs,
+            candidateId: explicitBase,
+        });
+    }
+    if (snapshot.activeId) {
+        const activeCandidate = readLocalCandidate(snapshot, snapshot.activeId);
+        if (activeCandidate.benchmarkFingerprint === benchmarkFingerprint) {
+            return await ensureEvaluatedLocalImproveBaseCandidate({
+                ...baseCandidateArgs,
+                candidateId: activeCandidate.id,
+            });
         }
-        return candidate;
     }
     const candidateFingerprint = localCandidateFingerprint(args.projectSource);
     const existing = snapshot.candidates.find((candidate) => candidate.benchmarkFingerprint === benchmarkFingerprint &&
@@ -1063,6 +1058,36 @@ async function ensureLocalImproveBaseCandidate(args) {
     }
     return evaluated;
 }
+async function ensureEvaluatedLocalImproveBaseCandidate(args) {
+    let snapshot = await loadLocalArchive(args.workspace);
+    let candidate = readLocalCandidate(snapshot, args.candidateId);
+    if (candidate.benchmarkFingerprint !== args.benchmarkFingerprint) {
+        throw new UsageError(`Base candidate ${args.candidateId} belongs to benchmark ${candidate.benchmarkFingerprint}, not ${args.benchmarkFingerprint}.`);
+    }
+    if (!candidate.candidateFingerprint) {
+        throw new UsageError(`Base candidate ${args.candidateId} is missing a candidate fingerprint.`);
+    }
+    if (candidate.status === "evaluated" || candidate.eval) {
+        return candidate;
+    }
+    const code = await localEvaluateCandidate([
+        "--dir",
+        args.workspace,
+        "--candidate",
+        args.candidateId,
+        "--runs",
+        args.projectSource.spec.candidate.selectedRunId,
+        "--samples",
+        String(args.samples),
+        ...(args.rerun ? ["--rerun"] : []),
+        "--json",
+    ], createSilentIo(args.io), args.runtimeOptions);
+    if (code !== 0) {
+        throw new UsageError(`Base candidate ${args.candidateId} eval failed; improve was not started.`);
+    }
+    snapshot = await loadLocalArchive(args.workspace);
+    return readLocalCandidate(snapshot, args.candidateId);
+}
 function createSilentIo(io) {
     const sink = new class extends Writable {
         _write(_chunk, _encoding, callback) {
@@ -2922,6 +2947,7 @@ async function pushBenchmark(argv, io) {
     const runtime = await exportLocalRuntimeBundle(dir, {
         currentBenchmarkFingerprint: localBenchmarkFingerprint(source),
     });
+    const localRuntimeFingerprint = workbenchRuntimeBundleFingerprint(runtime);
     const state = localProjectState({
         source,
         runtime,
@@ -2941,7 +2967,7 @@ async function pushBenchmark(argv, io) {
                 sourceFileCount: sourceFileCount(source),
                 runtime: runtimeBundleStats(runtime),
                 sourceFingerprint: state.source.fingerprint,
-                runtimeFingerprint: state.base.runtimeFingerprint,
+                runtimeFingerprint: localRuntimeFingerprint,
             }, parsed, io, () => `Would push benchmark ${source.spec.name}.`);
             return 0;
         }
@@ -2990,7 +3016,7 @@ async function pushBenchmark(argv, io) {
             sourceFileCount: sourceFileCount(source),
             runtime: runtimeBundleStats(runtime),
             sourceFingerprint: state.source.fingerprint,
-            runtimeFingerprint: state.base.runtimeFingerprint,
+            runtimeFingerprint: localRuntimeFingerprint,
         }, parsed, io, () => `Would push ${sourceFileCount(source)} source file(s) and runtime history to ${origin.remote}.`);
         return 0;
     }
@@ -3081,7 +3107,7 @@ async function cloneProject(argv, io) {
             ref,
             outputDir,
             fileCount: state.source.files.length,
-            runtime: runtimeBundleStats(state.runtime),
+            runtime: projectStateRuntimeStats(state),
             sourceFingerprint: state.source.fingerprint ?? state.base.sourceFingerprint ?? null,
             runtimeFingerprint: state.base.runtimeFingerprint ?? null,
         }, parsed, io, () => `Would clone ${formatBenchmarkRef(ref)} to ${outputDir}.`);
@@ -3121,7 +3147,7 @@ async function pullProject(argv, io) {
             dryRun: true,
             dir,
             fileCount: state.source.files.length,
-            runtime: runtimeBundleStats(state.runtime),
+            runtime: projectStateRuntimeStats(state),
             sourceFingerprint: state.source.fingerprint ?? state.base.sourceFingerprint ?? null,
             runtimeFingerprint: state.base.runtimeFingerprint ?? null,
         }, parsed, io, () => `Would pull ${state.source.files.length} source file(s) and runtime history into ${dir}.`);
@@ -3902,6 +3928,18 @@ function localProjectState(args) {
         runtime,
     };
 }
+function projectStateRuntimeStats(state) {
+    const activeId = workbenchRuntimeExplicitActiveId({
+        candidates: state.runtime.candidates,
+        runs: state.runtime.runs,
+        preferredActiveId: state.runtime.activeId ?? null,
+        benchmarkFingerprint: projectStateBenchmarkFingerprint(state.source),
+    });
+    return runtimeBundleStats({
+        ...state.runtime,
+        activeId,
+    });
+}
 function localCandidateRecord(candidate) {
     return {
         ...candidate,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@workbench-ai/workbench",
-  "version": "0.0.53",
+  "version": "0.0.55",
   "type": "module",
   "repository": {
     "type": "git",
@@ -21,9 +21,9 @@
   ],
   "dependencies": {
     "yaml": "^2.8.2",
-    "@workbench-ai/workbench-built-in-adapters": "0.0.53",
-    "@workbench-ai/workbench-core": "0.0.53",
-    "@workbench-ai/workbench-protocol": "0.0.53"
+    "@workbench-ai/workbench-protocol": "0.0.55",
+    "@workbench-ai/workbench-core": "0.0.55",
+    "@workbench-ai/workbench-built-in-adapters": "0.0.55"
   },
   "devDependencies": {
     "@tailwindcss/postcss": "^4.2.2",